aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap23
-rw-r--r--CREDITS9
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ufs2
-rw-r--r--Documentation/ABI/testing/sysfs-edac-scrub16
-rw-r--r--Documentation/ABI/testing/sysfs-fs-erofs10
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst6
-rw-r--r--Documentation/admin-guide/cifs/usage.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst4
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt13
-rw-r--r--Documentation/arch/arm64/booting.rst2
-rw-r--r--Documentation/arch/x86/mds.rst8
-rw-r--r--Documentation/block/ublk.rst77
-rw-r--r--Documentation/bpf/map_hash.rst8
-rw-r--r--Documentation/bpf/map_lru_hash_update.dot6
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml4
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml24
-rw-r--r--Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/elan,ekth6915.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.txt65
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/8250.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/altera_jtaguart.txt5
-rw-r--r--Documentation/devicetree/bindings/serial/altera_uart.txt8
-rw-r--r--Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml19
-rw-r--r--Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml25
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml2
-rw-r--r--Documentation/filesystems/locking.rst4
-rw-r--r--Documentation/filesystems/porting.rst27
-rw-r--r--Documentation/filesystems/proc.rst4
-rw-r--r--Documentation/filesystems/propagate_umount.txt484
-rw-r--r--Documentation/filesystems/vfs.rst11
-rw-r--r--Documentation/gpu/nouveau.rst2
-rw-r--r--Documentation/hwmon/ina238.rst2
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml15
-rw-r--r--Documentation/netlink/genetlink.yaml17
-rw-r--r--Documentation/netlink/netlink-raw.yaml18
-rw-r--r--Documentation/netlink/specs/devlink.yaml8
-rw-r--r--Documentation/netlink/specs/dpll.yaml2
-rw-r--r--Documentation/netlink/specs/ethtool.yaml9
-rw-r--r--Documentation/netlink/specs/fou.yaml36
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml8
-rw-r--r--Documentation/netlink/specs/nfsd.yaml4
-rw-r--r--Documentation/netlink/specs/ovpn.yaml153
-rw-r--r--Documentation/netlink/specs/ovs_flow.yaml6
-rw-r--r--Documentation/netlink/specs/rt-link.yaml4
-rw-r--r--Documentation/netlink/specs/tc.yaml4
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst2
-rw-r--r--Documentation/networking/tls.rst4
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst1
-rw-r--r--Documentation/process/maintainer-netdev.rst2
-rw-r--r--Documentation/security/credentials.rst2
-rw-r--r--Documentation/sound/codecs/cs35l56.rst24
-rw-r--r--Documentation/translations/zh_CN/security/credentials.rst2
-rw-r--r--Documentation/virt/kvm/api.rst73
-rw-r--r--Documentation/virt/kvm/review-checklist.rst95
-rw-r--r--Documentation/virt/kvm/x86/intel-tdx.rst15
-rw-r--r--Documentation/wmi/acpi-interface.rst14
-rw-r--r--MAINTAINERS204
-rw-r--r--Makefile5
-rw-r--r--arch/alpha/include/asm/param.h12
-rw-r--r--arch/alpha/include/asm/pgtable.h2
-rw-r--r--arch/alpha/include/uapi/asm/param.h9
-rw-r--r--arch/arc/include/asm/arcregs.h2
-rw-r--r--arch/arc/include/asm/atomic.h4
-rw-r--r--arch/arc/include/asm/atomic64-arcv2.h15
-rw-r--r--arch/arc/include/asm/bitops.h4
-rw-r--r--arch/arc/include/asm/bug.h4
-rw-r--r--arch/arc/include/asm/cache.h4
-rw-r--r--arch/arc/include/asm/current.h4
-rw-r--r--arch/arc/include/asm/dsp-impl.h2
-rw-r--r--arch/arc/include/asm/dsp.h4
-rw-r--r--arch/arc/include/asm/dwarf.h4
-rw-r--r--arch/arc/include/asm/entry.h4
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h4
-rw-r--r--arch/arc/include/asm/irqflags-compact.h4
-rw-r--r--arch/arc/include/asm/jump_label.h4
-rw-r--r--arch/arc/include/asm/linkage.h6
-rw-r--r--arch/arc/include/asm/mmu-arcv2.h4
-rw-r--r--arch/arc/include/asm/mmu.h2
-rw-r--r--arch/arc/include/asm/page.h4
-rw-r--r--arch/arc/include/asm/pgtable-bits-arcv2.h6
-rw-r--r--arch/arc/include/asm/pgtable-levels.h4
-rw-r--r--arch/arc/include/asm/pgtable.h4
-rw-r--r--arch/arc/include/asm/processor.h4
-rw-r--r--arch/arc/include/asm/ptrace.h4
-rw-r--r--arch/arc/include/asm/switch_to.h2
-rw-r--r--arch/arc/include/asm/thread_info.h4
-rw-r--r--arch/arc/include/uapi/asm/ptrace.h4
-rw-r--r--arch/arc/kernel/unwind.c11
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi2
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi6
-rw-r--r--arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts4
-rw-r--r--arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts4
-rw-r--r--arch/arm64/boot/dts/apple/spi1-nvram.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8103-j293.dts2
-rw-r--r--arch/arm64/boot/dts/apple/t8103-jxxx.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8103.dtsi2
-rw-r--r--arch/arm64/boot/dts/apple/t8112-j493.dts2
-rw-r--r--arch/arm64/boot/dts/apple/t8112.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts20
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts12
-rw-r--r--arch/arm64/boot/dts/freescale/imx95.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi23
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts28
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi20
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi35
-rw-r--r--arch/arm64/configs/defconfig3
-rw-r--r--arch/arm64/include/asm/assembler.h5
-rw-r--r--arch/arm64/include/asm/el2_setup.h19
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h62
-rw-r--r--arch/arm64/include/asm/kvm_host.h39
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/cpufeature.c57
-rw-r--r--arch/arm64/kernel/efi.c11
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/smp.c2
-rw-r--r--arch/arm64/kvm/arch_timer.c18
-rw-r--r--arch/arm64/kvm/arm.c19
-rw-r--r--arch/arm64/kvm/debug.c4
-rw-r--r--arch/arm64/kvm/fpsimd.c30
-rw-r--r--arch/arm64/kvm/hyp/exception.c4
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h151
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c59
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c111
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c48
-rw-r--r--arch/arm64/kvm/nested.c28
-rw-r--r--arch/arm64/kvm/pmu-emul.c24
-rw-r--r--arch/arm64/kvm/sys_regs.c62
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c95
-rw-r--r--arch/arm64/lib/crypto/poly1305-glue.c4
-rw-r--r--arch/arm64/mm/fault.c30
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/arm64/mm/proc.S1
-rw-r--r--arch/csky/include/asm/pgtable.h2
-rw-r--r--arch/hexagon/include/asm/pgtable.h2
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/addrspace.h8
-rw-r--r--arch/loongarch/include/asm/alternative-asm.h4
-rw-r--r--arch/loongarch/include/asm/alternative.h4
-rw-r--r--arch/loongarch/include/asm/asm-extable.h6
-rw-r--r--arch/loongarch/include/asm/asm.h8
-rw-r--r--arch/loongarch/include/asm/cpu.h4
-rw-r--r--arch/loongarch/include/asm/ftrace.h4
-rw-r--r--arch/loongarch/include/asm/gpr-num.h6
-rw-r--r--arch/loongarch/include/asm/irqflags.h4
-rw-r--r--arch/loongarch/include/asm/jump_label.h4
-rw-r--r--arch/loongarch/include/asm/kasan.h2
-rw-r--r--arch/loongarch/include/asm/loongarch.h16
-rw-r--r--arch/loongarch/include/asm/orc_types.h4
-rw-r--r--arch/loongarch/include/asm/page.h4
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h4
-rw-r--r--arch/loongarch/include/asm/pgtable.h6
-rw-r--r--arch/loongarch/include/asm/prefetch.h2
-rw-r--r--arch/loongarch/include/asm/smp.h2
-rw-r--r--arch/loongarch/include/asm/thread_info.h4
-rw-r--r--arch/loongarch/include/asm/types.h2
-rw-r--r--arch/loongarch/include/asm/unwind_hints.h6
-rw-r--r--arch/loongarch/include/asm/vdso/arch_data.h4
-rw-r--r--arch/loongarch/include/asm/vdso/getrandom.h4
-rw-r--r--arch/loongarch/include/asm/vdso/gettimeofday.h4
-rw-r--r--arch/loongarch/include/asm/vdso/processor.h4
-rw-r--r--arch/loongarch/include/asm/vdso/vdso.h4
-rw-r--r--arch/loongarch/include/asm/vdso/vsyscall.h4
-rw-r--r--arch/loongarch/kernel/acpi.c1
-rw-r--r--arch/loongarch/kernel/alternative.c1
-rw-r--r--arch/loongarch/kernel/efi.c12
-rw-r--r--arch/loongarch/kernel/elf.c1
-rw-r--r--arch/loongarch/kernel/kfpu.c1
-rw-r--r--arch/loongarch/kernel/paravirt.c1
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/loongarch/kernel/traps.c1
-rw-r--r--arch/loongarch/kernel/unwind_guess.c1
-rw-r--r--arch/loongarch/kernel/unwind_orc.c3
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c1
-rw-r--r--arch/loongarch/kvm/intc/eiointc.c89
-rw-r--r--arch/loongarch/lib/crc32-loongarch.c1
-rw-r--r--arch/loongarch/lib/csum.c1
-rw-r--r--arch/loongarch/mm/ioremap.c4
-rw-r--r--arch/loongarch/pci/pci.c1
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h2
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h2
-rw-r--r--arch/microblaze/include/asm/pgtable.h2
-rw-r--r--arch/mips/include/asm/pgtable.h4
-rw-r--r--arch/nios2/include/asm/pgtable.h2
-rw-r--r--arch/openrisc/include/asm/pgtable.h2
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/powerpc/boot/dts/microwatt.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8315erdb.dts10
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h2
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h8
-rw-r--r--arch/powerpc/kernel/eeh.c2
-rw-r--r--arch/powerpc/kernel/vdso/Makefile2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c49
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c8
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/include/asm/kvm_aia.h4
-rw-r--r--arch/riscv/include/asm/kvm_host.h3
-rw-r--r--arch/riscv/include/asm/pgtable.h3
-rw-r--r--arch/riscv/include/asm/runtime-const.h2
-rw-r--r--arch/riscv/include/asm/uaccess.h7
-rw-r--r--arch/riscv/include/asm/vdso/getrandom.h2
-rw-r--r--arch/riscv/include/asm/vector.h12
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c6
-rw-r--r--arch/riscv/kernel/ftrace.c18
-rw-r--r--arch/riscv/kernel/setup.c1
-rw-r--r--arch/riscv/kernel/traps.c10
-rw-r--r--arch/riscv/kernel/traps_misaligned.c6
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S2
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive.c2
-rw-r--r--arch/riscv/kvm/aia.c51
-rw-r--r--arch/riscv/kvm/aia_imsic.c45
-rw-r--r--arch/riscv/kvm/vcpu.c10
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c8
-rw-r--r--arch/riscv/kvm/vcpu_timer.c16
-rwxr-xr-xarch/riscv/tools/relocs_check.sh4
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c3
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/ptrace.h2
-rw-r--r--arch/s390/net/bpf_jit_comp.c10
-rw-r--r--arch/s390/pci/pci_event.c59
-rw-r--r--arch/sh/include/asm/pgtable_32.h2
-rw-r--r--arch/sparc/include/asm/pgtable_32.h2
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/um/drivers/ubd_user.c2
-rw-r--r--arch/um/drivers/vector_kern.c42
-rw-r--r--arch/um/drivers/vfio_kern.c14
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/pgtable.h2
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/coco/sev/Makefile3
-rw-r--r--arch/x86/coco/sev/core.c22
-rw-r--r--arch/x86/entry/entry.S8
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/hyperv/hv_init.c1
-rw-r--r--arch/x86/hyperv/irqdomain.c69
-rw-r--r--arch/x86/hyperv/ivm.c1
-rw-r--r--arch/x86/hyperv/nested.c1
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/debugreg.h19
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h10
-rw-r--r--arch/x86/include/asm/module.h8
-rw-r--r--arch/x86/include/asm/mshyperv.h22
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/mwait.h27
-rw-r--r--arch/x86/include/asm/nospec-branch.h37
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/sev.h17
-rw-r--r--arch/x86/include/asm/shared/tdx.h2
-rw-r--r--arch/x86/include/asm/sighandling.h22
-rw-r--r--arch/x86/include/asm/tdx.h2
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h21
-rw-r--r--arch/x86/include/uapi/asm/kvm.h8
-rw-r--r--arch/x86/kernel/alternative.c81
-rw-r--r--arch/x86/kernel/cpu/amd.c58
-rw-r--r--arch/x86/kernel/cpu/bugs.c136
-rw-r--r--arch/x86/kernel/cpu/common.c38
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c28
-rw-r--r--arch/x86/kernel/cpu/mce/core.c24
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_shas.c112
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/signal_64.c4
-rw-r--r--arch/x86/kernel/smp.c24
-rw-r--r--arch/x86/kernel/smpboot.c54
-rw-r--r--arch/x86/kernel/traps.c34
-rw-r--r--arch/x86/kvm/cpuid.c10
-rw-r--r--arch/x86/kvm/hyperv.c5
-rw-r--r--arch/x86/kvm/mmu/mmu.c9
-rw-r--r--arch/x86/kvm/reverse_cpuid.h7
-rw-r--r--arch/x86/kvm/svm/sev.c56
-rw-r--r--arch/x86/kvm/svm/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/tdx.c118
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c12
-rw-r--r--arch/x86/kvm/xen.c17
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/pat/set_memory.c3
-rw-r--r--arch/x86/mm/pti.c5
-rw-r--r--arch/x86/power/hibernate.c19
-rw-r--r--arch/x86/um/ptrace.c2
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c5
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/pgtable.h2
-rw-r--r--arch/xtensa/include/uapi/asm/param.h31
-rw-r--r--block/blk-merge.c26
-rw-r--r--block/blk-sysfs.c1
-rw-r--r--block/blk-zoned.c8
-rw-r--r--block/elevator.c19
-rw-r--r--block/fops.c15
-rw-r--r--block/genhd.c26
-rw-r--r--crypto/Kconfig25
-rw-r--r--crypto/ahash.c4
-rw-r--r--crypto/hkdf.c2
-rw-r--r--crypto/testmgr.c15
-rw-r--r--crypto/wp512.c121
-rw-r--r--drivers/accel/amdxdna/aie2_psp.c4
-rw-r--r--drivers/acpi/acpi_pad.c2
-rw-r--r--drivers/acpi/acpica/dsmethod.c7
-rw-r--r--drivers/acpi/apei/einj-core.c9
-rw-r--r--drivers/acpi/battery.c19
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/ec.c17
-rw-r--r--drivers/acpi/internal.h6
-rw-r--r--drivers/acpi/processor_driver.c3
-rw-r--r--drivers/acpi/processor_idle.c8
-rw-r--r--drivers/acpi/resource.c7
-rw-r--r--drivers/acpi/riscv/cppc.c2
-rw-r--r--drivers/android/binder.c22
-rw-r--r--drivers/android/binder_internal.h2
-rw-r--r--drivers/android/binderfs.c15
-rw-r--r--drivers/ata/ahci.c39
-rw-r--r--drivers/ata/libata-acpi.c24
-rw-r--r--drivers/ata/pata_cs5536.c2
-rw-r--r--drivers/ata/pata_macio.c2
-rw-r--r--drivers/ata/pata_via.c9
-rw-r--r--drivers/atm/atmtcp.c4
-rw-r--r--drivers/atm/idt77252.c5
-rw-r--r--drivers/base/cpu.c3
-rw-r--r--drivers/base/faux.c3
-rw-r--r--drivers/base/firmware_loader/main.c31
-rw-r--r--drivers/base/power/main.c24
-rw-r--r--drivers/base/regmap/regmap.c2
-rw-r--r--drivers/block/aoe/aoe.h1
-rw-r--r--drivers/block/aoe/aoecmd.c8
-rw-r--r--drivers/block/aoe/aoedev.c13
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/block/loop.c16
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/ublk_drv.c61
-rw-r--r--drivers/bluetooth/bfusb.c2
-rw-r--r--drivers/bluetooth/bpa10x.c2
-rw-r--r--drivers/bluetooth/btbcm.c8
-rw-r--r--drivers/bluetooth/btintel.c30
-rw-r--r--drivers/bluetooth/btintel_pcie.c72
-rw-r--r--drivers/bluetooth/btintel_pcie.h10
-rw-r--r--drivers/bluetooth/btmtksdio.c4
-rw-r--r--drivers/bluetooth/btmtkuart.c2
-rw-r--r--drivers/bluetooth/btnxpuart.c2
-rw-r--r--drivers/bluetooth/btqca.c2
-rw-r--r--drivers/bluetooth/btqcomsmd.c2
-rw-r--r--drivers/bluetooth/btrtl.c10
-rw-r--r--drivers/bluetooth/btsdio.c2
-rw-r--r--drivers/bluetooth/btusb.c148
-rw-r--r--drivers/bluetooth/hci_aml.c2
-rw-r--r--drivers/bluetooth/hci_bcm.c4
-rw-r--r--drivers/bluetooth/hci_bcm4377.c10
-rw-r--r--drivers/bluetooth/hci_intel.c2
-rw-r--r--drivers/bluetooth/hci_ldisc.c6
-rw-r--r--drivers/bluetooth/hci_ll.c4
-rw-r--r--drivers/bluetooth/hci_nokia.c2
-rw-r--r--drivers/bluetooth/hci_qca.c27
-rw-r--r--drivers/bluetooth/hci_serdev.c8
-rw-r--r--drivers/bluetooth/hci_vhci.c8
-rw-r--r--drivers/bluetooth/virtio_bt.c10
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c19
-rw-r--r--drivers/char/agp/amd64-agp.c16
-rw-r--r--drivers/char/tpm/eventlog/common.c46
-rw-r--r--drivers/clk/clk-scmi.c18
-rw-r--r--drivers/clk/imx/clk-imx95-blk-ctl.c12
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun55i-a523.c3
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun8i-v3s.c14
-rw-r--r--drivers/clocksource/hyperv_timer.c1
-rw-r--r--drivers/comedi/comedi_fops.c30
-rw-r--r--drivers/comedi/drivers.c17
-rw-r--r--drivers/comedi/drivers/aio_iiro_16.c3
-rw-r--r--drivers/comedi/drivers/comedi_test.c2
-rw-r--r--drivers/comedi/drivers/das16m1.c3
-rw-r--r--drivers/comedi/drivers/das6402.c3
-rw-r--r--drivers/comedi/drivers/pcl812.c3
-rw-r--r--drivers/cpufreq/rcpufreq_dt.rs4
-rw-r--r--drivers/cpuidle/cpuidle-psci.c23
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c10
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_algs.c14
-rw-r--r--drivers/cxl/core/edac.c18
-rw-r--r--drivers/cxl/core/features.c2
-rw-r--r--drivers/cxl/core/ras.c47
-rw-r--r--drivers/dma-buf/dma-buf.c2
-rw-r--r--drivers/dma-buf/dma-resv.c12
-rw-r--r--drivers/dma-buf/udmabuf.c5
-rw-r--r--drivers/dma/dw-edma/dw-edma-pcie.c60
-rw-r--r--drivers/dma/mediatek/mtk-cqdma.c4
-rw-r--r--drivers/dma/nbpfaxi.c11
-rw-r--r--drivers/edac/amd64_edac.c58
-rwxr-xr-xdrivers/edac/ecs.c4
-rw-r--r--drivers/edac/igen6_edac.c24
-rwxr-xr-xdrivers/edac/mem_repair.c1
-rwxr-xr-xdrivers/edac/scrub.c1
-rw-r--r--drivers/firmware/arm_ffa/driver.c71
-rw-r--r--drivers/firmware/efi/libstub/zboot.lds6
-rw-r--r--drivers/firmware/samsung/exynos-acpm.c25
-rw-r--r--drivers/gpio/gpio-loongson-64bit.c2
-rw-r--r--drivers/gpio/gpio-mlxbf3.c54
-rw-r--r--drivers/gpio/gpio-pca953x.c2
-rw-r--r--drivers/gpio/gpio-spacemit-k1.c1
-rw-r--r--drivers/gpio/gpiolib-acpi-quirks.c13
-rw-r--r--drivers/gpio/gpiolib-devres.c2
-rw-r--r--drivers/gpio/gpiolib-of.c2
-rw-r--r--drivers/gpio/gpiolib.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c67
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c11
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c12
-rw-r--r--drivers/gpu/drm/arm/malidp_planes.c2
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c1
-rw-r--r--drivers/gpu/drm/bridge/aux-hpd-bridge.c3
-rw-r--r--drivers/gpu/drm/bridge/panel.c5
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c4
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c69
-rw-r--r--drivers/gpu/drm/display/drm_bridge_connector.c7
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c2
-rw-r--r--drivers/gpu/drm/drm_buddy.c43
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c31
-rw-r--r--drivers/gpu/drm/drm_gem.c74
-rw-r--r--drivers/gpu/drm/drm_gem_dma_helper.c2
-rw-r--r--drivers/gpu/drm/drm_gem_framebuffer_helper.c8
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c4
-rw-r--r--drivers/gpu/drm/drm_internal.h2
-rw-r--r--drivers/gpu/drm/drm_mipi_dsi.c3
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs2
-rw-r--r--drivers/gpu/drm/drm_prime.c8
-rw-r--r--drivers/gpu/drm/drm_writeback.c7
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.c5
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c12
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c32
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c8
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c4
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c115
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c3
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c6
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c20
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_request.c2
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.c4
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.c36
-rw-r--r--drivers/gpu/drm/mediatek/mtk_crtc.h1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.c1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_ddp_comp.h9
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_drv.h1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_disp_ovl.c7
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c4
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.c12
-rw-r--r--drivers/gpu/drm/mediatek/mtk_plane.h3
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c2
-rw-r--r--drivers/gpu/drm/meson/meson_vclk.c55
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_ddc.c1
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpummu.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c18
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c39
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c54
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c14
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c7
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c7
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c32
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c10
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h23
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c17
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c20
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h9
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c12
-rw-r--r--drivers/gpu/drm/msm/msm_mmu.h2
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml3
-rw-r--r--drivers/gpu/drm/msm/registers/gen_header.py8
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_backlight.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.h5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvif/chan.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c2
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c132
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c23
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c21
-rw-r--r--drivers/gpu/drm/sitronix/Kconfig1
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c2
-rw-r--r--drivers/gpu/drm/sysfb/vesadrm.c13
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c6
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c13
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h8
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_gemfs.c21
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c37
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c8
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c12
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_prime.c5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c6
-rw-r--r--drivers/gpu/drm/xe/Kconfig8
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c2
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c11
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mchbar_regs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c38
-rw-r--r--drivers/gpu/drm/xe/xe_device.c72
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c286
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c13
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c34
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c51
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c26
-rw-r--r--drivers/gpu/drm/xe/xe_module.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c11
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c22
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules11
-rw-r--r--drivers/hid/hid-appletb-kbd.c13
-rw-r--r--drivers/hid/hid-core.c19
-rw-r--r--drivers/hid/hid-debug.c4
-rw-r--r--drivers/hid/hid-elecom.c6
-rw-r--r--drivers/hid/hid-ids.h9
-rw-r--r--drivers/hid/hid-input.c2
-rw-r--r--drivers/hid/hid-lenovo.c19
-rw-r--r--drivers/hid/hid-multitouch.c8
-rw-r--r--drivers/hid/hid-nintendo.c38
-rw-r--r--drivers/hid/hid-quirks.c6
-rw-r--r--drivers/hid/intel-ish-hid/ipc/hw-ish.h1
-rw-r--r--drivers/hid/intel-ish-hid/ipc/pci-ish.c12
-rw-r--r--drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c26
-rw-r--r--drivers/hid/wacom_sys.c7
-rw-r--r--drivers/hv/Kconfig2
-rw-r--r--drivers/hv/channel.c1
-rw-r--r--drivers/hv/channel_mgmt.c1
-rw-r--r--drivers/hv/connection.c5
-rw-r--r--drivers/hv/hv.c6
-rw-r--r--drivers/hv/hv_proc.c1
-rw-r--r--drivers/hv/mshv_common.c1
-rw-r--r--drivers/hv/mshv_eventfd.c14
-rw-r--r--drivers/hv/mshv_root_hv_call.c1
-rw-r--r--drivers/hv/ring_buffer.c1
-rw-r--r--drivers/hv/vmbus_drv.c9
-rw-r--r--drivers/hwmon/corsair-cpro.c5
-rw-r--r--drivers/hwmon/ftsteutates.c9
-rw-r--r--drivers/hwmon/ina238.c8
-rw-r--r--drivers/hwmon/ltc4282.c7
-rw-r--r--drivers/hwmon/occ/common.c240
-rw-r--r--drivers/hwmon/pmbus/ucd9000.c8
-rw-r--r--drivers/i2c/algos/i2c-algo-bit.c4
-rw-r--r--drivers/i2c/algos/i2c-algo-pca.c4
-rw-r--r--drivers/i2c/algos/i2c-algo-pcf.c4
-rw-r--r--drivers/i2c/busses/Kconfig4
-rw-r--r--drivers/i2c/busses/i2c-amd-mp2-plat.c2
-rw-r--r--drivers/i2c/busses/i2c-aspeed.c8
-rw-r--r--drivers/i2c/busses/i2c-at91-master.c4
-rw-r--r--drivers/i2c/busses/i2c-axxia.c2
-rw-r--r--drivers/i2c/busses/i2c-bcm-iproc.c2
-rw-r--r--drivers/i2c/busses/i2c-cadence.c10
-rw-r--r--drivers/i2c/busses/i2c-cgbc.c4
-rw-r--r--drivers/i2c/busses/i2c-designware-amdisp.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c6
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c2
-rw-r--r--drivers/i2c/busses/i2c-emev2.c6
-rw-r--r--drivers/i2c/busses/i2c-exynos5.c6
-rw-r--r--drivers/i2c/busses/i2c-gxp.c6
-rw-r--r--drivers/i2c/busses/i2c-img-scb.c2
-rw-r--r--drivers/i2c/busses/i2c-imx-lpi2c.c8
-rw-r--r--drivers/i2c/busses/i2c-imx.c11
-rw-r--r--drivers/i2c/busses/i2c-k1.c2
-rw-r--r--drivers/i2c/busses/i2c-keba.c2
-rw-r--r--drivers/i2c/busses/i2c-mchp-pci1xxxx.c2
-rw-r--r--drivers/i2c/busses/i2c-meson.c4
-rw-r--r--drivers/i2c/busses/i2c-microchip-corei2c.c8
-rw-r--r--drivers/i2c/busses/i2c-mt65xx.c2
-rw-r--r--drivers/i2c/busses/i2c-mxs.c2
-rw-r--r--drivers/i2c/busses/i2c-nomadik.c4
-rw-r--r--drivers/i2c/busses/i2c-npcm7xx.c6
-rw-r--r--drivers/i2c/busses/i2c-omap.c20
-rw-r--r--drivers/i2c/busses/i2c-piix4.c2
-rw-r--r--drivers/i2c/busses/i2c-pnx.c2
-rw-r--r--drivers/i2c/busses/i2c-pxa.c16
-rw-r--r--drivers/i2c/busses/i2c-qcom-cci.c4
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c4
-rw-r--r--drivers/i2c/busses/i2c-qup.c12
-rw-r--r--drivers/i2c/busses/i2c-rcar.c10
-rw-r--r--drivers/i2c/busses/i2c-robotfuzz-osif.c6
-rw-r--r--drivers/i2c/busses/i2c-s3c2410.c6
-rw-r--r--drivers/i2c/busses/i2c-sh7760.c4
-rw-r--r--drivers/i2c/busses/i2c-sh_mobile.c4
-rw-r--r--drivers/i2c/busses/i2c-stm32.c8
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c28
-rw-r--r--drivers/i2c/busses/i2c-synquacer.c4
-rw-r--r--drivers/i2c/busses/i2c-tegra.c30
-rw-r--r--drivers/i2c/busses/i2c-tiny-usb.c6
-rw-r--r--drivers/i2c/busses/i2c-virtio.c15
-rw-r--r--drivers/i2c/busses/i2c-xiic.c4
-rw-r--r--drivers/i2c/busses/i2c-xlp9xx.c2
-rw-r--r--drivers/i2c/i2c-atr.c2
-rw-r--r--drivers/i2c/i2c-mux.c6
-rw-r--r--drivers/i2c/muxes/i2c-demux-pinctrl.c4
-rw-r--r--drivers/idle/intel_idle.c12
-rw-r--r--drivers/iio/accel/fxls8962af-core.c2
-rw-r--r--drivers/iio/accel/st_accel_core.c10
-rw-r--r--drivers/iio/adc/ad7380.c5
-rw-r--r--drivers/iio/adc/ad7949.c7
-rw-r--r--drivers/iio/adc/adi-axi-adc.c6
-rw-r--r--drivers/iio/adc/axp20x_adc.c1
-rw-r--r--drivers/iio/adc/max1363.c43
-rw-r--r--drivers/iio/adc/stm32-adc-core.c7
-rw-r--r--drivers/iio/common/st_sensors/st_sensors_core.c36
-rw-r--r--drivers/iio/common/st_sensors/st_sensors_trigger.c20
-rw-r--r--drivers/iio/dac/ad3530r.c4
-rw-r--r--drivers/iio/industrialio-backend.c5
-rw-r--r--drivers/iio/industrialio-core.c5
-rw-r--r--drivers/infiniband/core/cache.c4
-rw-r--r--drivers/infiniband/core/umem_odp.c11
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c10
-rw-r--r--drivers/infiniband/hw/mlx5/main.c33
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c61
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c5
-rw-r--r--drivers/input/joystick/fsia6b.c2
-rw-r--r--drivers/input/joystick/xpad.c11
-rw-r--r--drivers/input/keyboard/atkbd.c4
-rw-r--r--drivers/input/misc/cs40l50-vibra.c2
-rw-r--r--drivers/input/misc/gpio-beeper.c2
-rw-r--r--drivers/input/misc/iqs626a.c2
-rw-r--r--drivers/input/misc/iqs7222.c7
-rw-r--r--drivers/input/mouse/alps.c10
-rw-r--r--drivers/input/mouse/lifebook.c4
-rw-r--r--drivers/input/mouse/psmouse-base.c2
-rw-r--r--drivers/input/touchscreen/Kconfig1
-rw-r--r--drivers/input/touchscreen/melfas_mip4.c2
-rw-r--r--drivers/interconnect/core.c34
-rw-r--r--drivers/interconnect/icc-clk.c2
-rw-r--r--drivers/interconnect/qcom/icc-rpmh.c7
-rw-r--r--drivers/interconnect/qcom/osm-l3.c7
-rw-r--r--drivers/interconnect/qcom/sc7280.c1
-rw-r--r--drivers/interconnect/samsung/exynos.c5
-rw-r--r--drivers/iommu/hyperv-iommu.c33
-rw-r--r--drivers/iommu/intel/cache.c5
-rw-r--r--drivers/iommu/intel/iommu.c11
-rw-r--r--drivers/iommu/intel/iommu.h2
-rw-r--r--drivers/iommu/rockchip-iommu.c3
-rw-r--r--drivers/iommu/tegra-smmu.c4
-rw-r--r--drivers/irqchip/Kconfig1
-rw-r--r--drivers/irqchip/irq-ath79-misc.c20
-rw-r--r--drivers/md/bcache/Kconfig1
-rw-r--r--drivers/md/bcache/alloc.c57
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/bset.c116
-rw-r--r--drivers/md/bcache/bset.h40
-rw-r--r--drivers/md/bcache/btree.c69
-rw-r--r--drivers/md/bcache/extents.c45
-rw-r--r--drivers/md/bcache/movinggc.c33
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/bcache/sysfs.c4
-rw-r--r--drivers/md/bcache/util.h67
-rw-r--r--drivers/md/bcache/writeback.c13
-rw-r--r--drivers/md/dm-bufio.c6
-rw-r--r--drivers/md/dm-crypt.c11
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/md-bitmap.c3
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c12
-rw-r--r--drivers/memstick/core/memstick.c2
-rw-r--r--drivers/mfd/88pm860x-core.c3
-rw-r--r--drivers/mfd/max8925-core.c6
-rw-r--r--drivers/mfd/twl4030-irq.c3
-rw-r--r--drivers/misc/amd-sbi/rmi-core.c24
-rw-r--r--drivers/mmc/core/quirks.h12
-rw-r--r--drivers/mmc/core/sd_uhs2.c4
-rw-r--r--drivers/mmc/host/bcm2835.c3
-rw-r--r--drivers/mmc/host/mtk-sd.c21
-rw-r--r--drivers/mmc/host/sdhci-of-k1.c3
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c3
-rw-r--r--drivers/mmc/host/sdhci-uhs2.c20
-rw-r--r--drivers/mmc/host/sdhci.c9
-rw-r--r--drivers/mmc/host/sdhci.h16
-rw-r--r--drivers/mmc/host/sdhci_am654.c9
-rw-r--r--drivers/mtd/mtdchar.c2
-rw-r--r--drivers/mtd/mtdcore.c152
-rw-r--r--drivers/mtd/mtdcore.h2
-rw-r--r--drivers/mtd/mtdpart.c16
-rw-r--r--drivers/mtd/nand/qpic_common.c30
-rw-r--r--drivers/mtd/nand/spi/core.c1
-rw-r--r--drivers/mtd/nand/spi/winbond.c10
-rw-r--r--drivers/mux/Kconfig1
-rw-r--r--drivers/net/can/dev/dev.c12
-rw-r--r--drivers/net/can/dev/netlink.c12
-rw-r--r--drivers/net/can/m_can/m_can.c2
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c70
-rw-r--r--drivers/net/dsa/b53/b53_common.c6
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c28
-rw-r--r--drivers/net/ethernet/airoha/airoha_npu.c3
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c4
-rw-r--r--drivers/net/ethernet/airoha/airoha_regs.h3
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c13
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c24
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h4
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.c79
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c102
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c18
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c24
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c6
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c12
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c4
-rw-r--r--drivers/net/ethernet/faraday/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c41
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c15
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig6
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h2
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c67
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c36
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c6
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h8
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c8
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h3
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c14
-rw-r--r--drivers/net/ethernet/intel/e1000e/nvm.c6
-rw-r--r--drivers/net/ethernet/intel/e1000e/ptp.c8
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c17
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c11
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.c48
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ddp.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_debugfs.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c1
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.c23
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq_api.h2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c12
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c6
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_fw.c5
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.h4
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c4
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c1
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c12
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c8
-rw-r--r--drivers/net/ethernet/renesas/rtsn.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c24
-rw-r--r--drivers/net/ethernet/sun/niu.c31
-rw-r--r--drivers/net/ethernet/sun/niu.h4
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c4
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_common.c19
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_config.c158
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_config.h80
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c20
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.h2
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_switch_map.h3
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c9
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c49
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_sriov.c4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h5
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c4
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c8
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c22
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c5
-rw-r--r--drivers/net/macsec.c40
-rw-r--r--drivers/net/netconsole.c3
-rw-r--r--drivers/net/netdevsim/netdev.c3
-rw-r--r--drivers/net/ovpn/io.c7
-rw-r--r--drivers/net/ovpn/netlink-gen.c61
-rw-r--r--drivers/net/ovpn/netlink-gen.h6
-rw-r--r--drivers/net/ovpn/netlink.c51
-rw-r--r--drivers/net/ovpn/udp.c1
-rw-r--r--drivers/net/phy/mdio_bus.c12
-rw-r--r--drivers/net/phy/microchip.c3
-rw-r--r--drivers/net/phy/phy_caps.c18
-rw-r--r--drivers/net/phy/phy_device.c6
-rw-r--r--drivers/net/phy/qcom/at803x.c27
-rw-r--r--drivers/net/phy/qcom/qca808x.c2
-rw-r--r--drivers/net/phy/qcom/qcom-phy-lib.c25
-rw-r--r--drivers/net/phy/qcom/qcom.h5
-rw-r--r--drivers/net/phy/smsc.c57
-rw-r--r--drivers/net/usb/lan78xx.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/usb/sierra_net.c4
-rw-r--r--drivers/net/veth.c4
-rw-r--r--drivers/net/virtio_net.c113
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c33
-rw-r--r--drivers/net/wireless/ath/ath10k/snoc.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/core.c29
-rw-r--r--drivers/net/wireless/ath/ath11k/core.h4
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs.c148
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs.h10
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.c127
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.h4
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c49
-rw-r--r--drivers/net/wireless/ath/ath12k/core.c14
-rw-r--r--drivers/net/wireless/ath/ath12k/core.h10
-rw-r--r--drivers/net/wireless/ath/ath12k/debugfs.c58
-rw-r--r--drivers/net/wireless/ath/ath12k/debugfs.h7
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_rx.c3
-rw-r--r--drivers/net/wireless/ath/ath12k/hal.h3
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.c6
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.h2
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c394
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.h2
-rw-r--r--drivers/net/wireless/ath/ath12k/pci.c6
-rw-r--r--drivers/net/wireless/ath/ath12k/pci.h4
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c829
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.h180
-rw-r--r--drivers/net/wireless/ath/ath6kl/bmi.c4
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c19
-rw-r--r--drivers/net/wireless/ath/wil6210/interrupt.c26
-rw-r--r--drivers/net/wireless/intel/iwlegacy/4965-rs.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/main.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/regulatory.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/mld.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mld/regulatory.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c24
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c8
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11n.c6
-rw-r--r--drivers/net/wireless/marvell/mwifiex/util.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76.h10
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/dma.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mac.c10
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mac.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02.h5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mac.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mac.c12
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mcu.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mmio.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/mac.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7921/main.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/init.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mac.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/main.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mcu.c79
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/mcu.h5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7925/regs.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_core.c32
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt792x_mac.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mac.c52
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/main.c5
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mcu.c199
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h16
-rw-r--r--drivers/net/wireless/mediatek/mt76/tx.c11
-rw-r--r--drivers/net/wireless/mediatek/mt76/util.c2
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00soc.c4
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00soc.h2
-rw-r--r--drivers/net/wireless/zydas/zd1211rw/zd_mac.c6
-rw-r--r--drivers/nvme/host/core.c112
-rw-r--r--drivers/nvme/host/ioctl.c21
-rw-r--r--drivers/nvme/host/multipath.c10
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/nvme/target/tcp.c4
-rw-r--r--drivers/nvmem/imx-ocotp-ele.c5
-rw-r--r--drivers/nvmem/imx-ocotp.c5
-rw-r--r--drivers/nvmem/layouts/u-boot-env.c6
-rw-r--r--drivers/pci/controller/pci-host-common.c4
-rw-r--r--drivers/pci/controller/pci-hyperv-intf.c1
-rw-r--r--drivers/pci/controller/pci-hyperv.c21
-rw-r--r--drivers/pci/controller/pcie-apple.c53
-rw-r--r--drivers/pci/ecam.c2
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c2
-rw-r--r--drivers/pci/msi/msi.c4
-rw-r--r--drivers/pci/pci-acpi.c23
-rw-r--r--drivers/pci/pci.c5
-rw-r--r--drivers/pci/pcie/ptm.c2
-rw-r--r--drivers/pci/probe.c7
-rw-r--r--drivers/phy/phy-core.c5
-rw-r--r--drivers/phy/phy-snps-eusb2.c6
-rw-r--r--drivers/phy/tegra/xusb-tegra186.c75
-rw-r--r--drivers/phy/tegra/xusb.h1
-rw-r--r--drivers/pinctrl/nuvoton/pinctrl-ma35.c10
-rw-r--r--drivers/pinctrl/pinctrl-amd.c11
-rw-r--r--drivers/pinctrl/pinctrl-aw9523.c2
-rw-r--r--drivers/pinctrl/pinctrl-st.c5
-rw-r--r--drivers/pinctrl/pinctrl-tb10x.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-apq8064.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-apq8084.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq4019.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq5018.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq5332.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq5424.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq6018.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq8064.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq8074.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ipq9574.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-mdm9607.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-mdm9615.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c31
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.h1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8226.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8660.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8909.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8916.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8917.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8953.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8960.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8976.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8994.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8996.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8998.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8x74.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qcm2290.c10
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qcs404.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qcs615.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qcs8300.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qdf2xxx.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qdu1000.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sa8775p.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sar2130p.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sc7180.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sc7280.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sc8180x.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sc8280xp.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdm660.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdm670.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdm845.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdx55.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdx65.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sdx75.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm4450.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm6115.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm6125.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm6350.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm6375.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm7150.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8150.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8250.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8350.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8450.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8550.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8650.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm8750.c1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-x1e80100.c1
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c8
-rw-r--r--drivers/platform/arm64/huawei-gaokun-ec.c2
-rw-r--r--drivers/platform/mellanox/mlxbf-pmc.c27
-rw-r--r--drivers/platform/mellanox/mlxbf-tmfifo.c5
-rw-r--r--drivers/platform/mellanox/mlxreg-dpu.c2
-rw-r--r--drivers/platform/mellanox/mlxreg-lc.c12
-rw-r--r--drivers/platform/mellanox/nvsw-sn2201.c2
-rw-r--r--drivers/platform/x86/Makefile3
-rw-r--r--drivers/platform/x86/amd/amd_isp4.c184
-rw-r--r--drivers/platform/x86/amd/hsmp/hsmp.c14
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c11
-rw-r--r--drivers/platform/x86/amd/pmc/pmc.c2
-rw-r--r--drivers/platform/x86/amd/pmf/core.c3
-rw-r--r--drivers/platform/x86/amd/pmf/tee-if.c108
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c9
-rw-r--r--drivers/platform/x86/dell/alienware-wmi-wmax.c19
-rw-r--r--drivers/platform/x86/dell/dell-lis3lv02d.c2
-rw-r--r--drivers/platform/x86/dell/dell-wmi-ddv.c10
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h5
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c5
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c5
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c5
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c5
-rw-r--r--drivers/platform/x86/dell/dell-wmi-sysman/sysman.c12
-rw-r--r--drivers/platform/x86/dell/dell_rbu.c10
-rw-r--r--drivers/platform/x86/hp/hp-bioscfg/bioscfg.c4
-rw-r--r--drivers/platform/x86/ideapad-laptop.c23
-rw-r--r--drivers/platform/x86/intel/hid.c1
-rw-r--r--drivers/platform/x86/intel/pmc/core.h7
-rw-r--r--drivers/platform/x86/intel/pmc/ssram_telemetry.c3
-rw-r--r--drivers/platform/x86/intel/tpmi_power_domains.c4
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c2
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c9
-rw-r--r--drivers/platform/x86/lenovo-wmi-hotkey-utilities.c30
-rw-r--r--drivers/platform/x86/portwell-ec.c1
-rw-r--r--drivers/platform/x86/samsung-galaxybook.c1
-rw-r--r--drivers/platform/x86/think-lmi.c94
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c1
-rw-r--r--drivers/platform/x86/wmi.c16
-rw-r--r--drivers/pmdomain/governor.c18
-rw-r--r--drivers/power/supply/power_supply_core.c82
-rw-r--r--drivers/power/supply/test_power.c4
-rw-r--r--drivers/powercap/intel_rapl_common.c18
-rw-r--r--drivers/ptp/ptp_clock.c3
-rw-r--r--drivers/ptp/ptp_private.h12
-rw-r--r--drivers/pwm/core.c2
-rw-r--r--drivers/pwm/pwm-mediatek.c13
-rw-r--r--drivers/rapidio/rio_cm.c3
-rw-r--r--drivers/regulator/core.c1
-rw-r--r--drivers/regulator/fan53555.c14
-rw-r--r--drivers/regulator/gpio-regulator.c8
-rw-r--r--drivers/regulator/max20086-regulator.c6
-rw-r--r--drivers/regulator/mp886x.c3
-rw-r--r--drivers/regulator/sy8824x.c5
-rw-r--r--drivers/regulator/tps65219-regulator.c28
-rw-r--r--drivers/rtc/rtc-cmos.c10
-rw-r--r--drivers/rtc/rtc-pcf2127.c7
-rw-r--r--drivers/rtc/rtc-s5m.c197
-rw-r--r--drivers/s390/crypto/pkey_api.c2
-rw-r--r--drivers/s390/net/ism_drv.c3
-rw-r--r--drivers/s390/scsi/zfcp_sysfs.c2
-rw-r--r--drivers/scsi/elx/efct/efct_hw.c5
-rw-r--r--drivers/scsi/fnic/fdls_disc.c187
-rw-r--r--drivers/scsi/fnic/fnic.h2
-rw-r--r--drivers/scsi/fnic/fnic_fcs.c2
-rw-r--r--drivers/scsi/fnic/fnic_fdls.h1
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c2
-rw-r--r--drivers/scsi/hosts.c18
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c6
-rw-r--r--drivers/scsi/mvsas/mv_defs.h4
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c2
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c2
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c11
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/storvsc_drv.c10
-rw-r--r--drivers/soc/aspeed/aspeed-lpc-snoop.c13
-rw-r--r--drivers/soundwire/amd_manager.c4
-rw-r--r--drivers/soundwire/qcom.c26
-rw-r--r--drivers/spi/spi-cadence-quadspi.c11
-rw-r--r--drivers/spi/spi-fsl-dspi.c11
-rw-r--r--drivers/spi/spi-loongson-core.c1
-rw-r--r--drivers/spi/spi-offload.c2
-rw-r--r--drivers/spi/spi-omap2-mcspi.c30
-rw-r--r--drivers/spi/spi-pci1xxxx.c4
-rw-r--r--drivers/spi/spi-qpic-snand.c18
-rw-r--r--drivers/spi/spi-stm32-ospi.c24
-rw-r--r--drivers/spi/spi-tegra210-quad.c14
-rw-r--r--drivers/spi/spi.c14
-rw-r--r--drivers/staging/gpib/common/gpib_os.c2
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_security.c44
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c98
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c1
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h2
-rw-r--r--drivers/target/target_core_pr.c4
-rw-r--r--drivers/tee/optee/ffa_abi.c41
-rw-r--r--drivers/tee/optee/optee_private.h2
-rw-r--r--drivers/thunderbolt/switch.c10
-rw-r--r--drivers/thunderbolt/tb.h2
-rw-r--r--drivers/thunderbolt/usb4.c12
-rw-r--r--drivers/tty/serial/imx.c17
-rw-r--r--drivers/tty/serial/pch_uart.c2
-rw-r--r--drivers/tty/serial/serial_base_bus.c4
-rw-r--r--drivers/tty/vt/ucs.c2
-rw-r--r--drivers/tty/vt/vt.c1
-rw-r--r--drivers/ufs/core/ufs-sysfs.c4
-rw-r--r--drivers/ufs/core/ufshcd.c10
-rw-r--r--drivers/usb/cdns3/cdnsp-debug.h5
-rw-r--r--drivers/usb/cdns3/cdnsp-ep0.c18
-rw-r--r--drivers/usb/cdns3/cdnsp-gadget.h6
-rw-r--r--drivers/usb/cdns3/cdnsp-ring.c7
-rw-r--r--drivers/usb/chipidea/udc.c7
-rw-r--r--drivers/usb/core/hub.c39
-rw-r--r--drivers/usb/core/hub.h1
-rw-r--r--drivers/usb/core/quirks.c3
-rw-r--r--drivers/usb/core/usb-acpi.c4
-rw-r--r--drivers/usb/dwc2/gadget.c38
-rw-r--r--drivers/usb/dwc3/core.c9
-rw-r--r--drivers/usb/dwc3/dwc3-qcom.c7
-rw-r--r--drivers/usb/dwc3/gadget.c24
-rw-r--r--drivers/usb/gadget/configfs.c4
-rw-r--r--drivers/usb/gadget/function/f_fs.c3
-rw-r--r--drivers/usb/gadget/function/u_serial.c12
-rw-r--r--drivers/usb/gadget/legacy/inode.c7
-rw-r--r--drivers/usb/host/xhci-dbgcap.c4
-rw-r--r--drivers/usb/host/xhci-dbgtty.c1
-rw-r--r--drivers/usb/host/xhci-mem.c4
-rw-r--r--drivers/usb/host/xhci-pci.c25
-rw-r--r--drivers/usb/host/xhci-plat.c3
-rw-r--r--drivers/usb/host/xhci-ring.c5
-rw-r--r--drivers/usb/host/xhci.c31
-rw-r--r--drivers/usb/host/xhci.h3
-rw-r--r--drivers/usb/musb/musb_gadget.c2
-rw-r--r--drivers/usb/serial/ftdi_sio.c2
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h3
-rw-r--r--drivers/usb/serial/option.c5
-rw-r--r--drivers/usb/typec/altmodes/displayport.c5
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c34
-rw-r--r--drivers/virt/coco/efi_secret/efi_secret.c47
-rw-r--r--drivers/virtio/virtio_ring.c8
-rw-r--r--fs/9p/vfs_dentry.c1
-rw-r--r--fs/9p/vfs_super.c10
-rw-r--r--fs/adfs/inode.c9
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/addr_prefs.c2
-rw-r--r--fs/afs/cell.c1
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/anon_inodes.c23
-rw-r--r--fs/attr.c10
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/backing-file.c4
-rw-r--r--fs/bcachefs/alloc_background.c13
-rw-r--r--fs/bcachefs/alloc_foreground.c3
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs.h15
-rw-r--r--fs/bcachefs/btree_cache.c26
-rw-r--r--fs/bcachefs/btree_cache.h1
-rw-r--r--fs/bcachefs/btree_gc.c106
-rw-r--r--fs/bcachefs/btree_io.c151
-rw-r--r--fs/bcachefs/btree_iter.c175
-rw-r--r--fs/bcachefs/btree_journal_iter.c82
-rw-r--r--fs/bcachefs/btree_journal_iter_types.h5
-rw-r--r--fs/bcachefs/btree_locking.c14
-rw-r--r--fs/bcachefs/btree_locking.h6
-rw-r--r--fs/bcachefs/btree_node_scan.c90
-rw-r--r--fs/bcachefs/btree_node_scan.h2
-rw-r--r--fs/bcachefs/btree_trans_commit.c18
-rw-r--r--fs/bcachefs/btree_types.h30
-rw-r--r--fs/bcachefs/btree_update.c16
-rw-r--r--fs/bcachefs/btree_update.h5
-rw-r--r--fs/bcachefs/btree_update_interior.c49
-rw-r--r--fs/bcachefs/btree_update_interior.h10
-rw-r--r--fs/bcachefs/btree_write_buffer.c8
-rw-r--r--fs/bcachefs/btree_write_buffer.h6
-rw-r--r--fs/bcachefs/chardev.c33
-rw-r--r--fs/bcachefs/data_update.c1
-rw-r--r--fs/bcachefs/debug.c11
-rw-r--r--fs/bcachefs/dirent.c19
-rw-r--r--fs/bcachefs/dirent.h11
-rw-r--r--fs/bcachefs/disk_accounting.c4
-rw-r--r--fs/bcachefs/errcode.h6
-rw-r--r--fs/bcachefs/error.c13
-rw-r--r--fs/bcachefs/extent_update.c13
-rw-r--r--fs/bcachefs/extents.c16
-rw-r--r--fs/bcachefs/fs-io-buffered.c4
-rw-r--r--fs/bcachefs/fs-io-buffered.h4
-rw-r--r--fs/bcachefs/fs.c18
-rw-r--r--fs/bcachefs/fsck.c309
-rw-r--r--fs/bcachefs/inode.c13
-rw-r--r--fs/bcachefs/inode.h5
-rw-r--r--fs/bcachefs/io_misc.c27
-rw-r--r--fs/bcachefs/io_misc.h2
-rw-r--r--fs/bcachefs/io_read.c23
-rw-r--r--fs/bcachefs/io_read.h1
-rw-r--r--fs/bcachefs/journal.c21
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c27
-rw-r--r--fs/bcachefs/journal_reclaim.c6
-rw-r--r--fs/bcachefs/movinggc.c24
-rw-r--r--fs/bcachefs/namei.c40
-rw-r--r--fs/bcachefs/opts.h10
-rw-r--r--fs/bcachefs/rcu_pending.c22
-rw-r--r--fs/bcachefs/recovery.c58
-rw-r--r--fs/bcachefs/recovery_passes.c33
-rw-r--r--fs/bcachefs/recovery_passes.h9
-rw-r--r--fs/bcachefs/reflink.c12
-rw-r--r--fs/bcachefs/sb-downgrade.c5
-rw-r--r--fs/bcachefs/sb-errors_format.h31
-rw-r--r--fs/bcachefs/sb-members.c34
-rw-r--r--fs/bcachefs/snapshot.c14
-rw-r--r--fs/bcachefs/str_hash.c5
-rw-r--r--fs/bcachefs/str_hash.h2
-rw-r--r--fs/bcachefs/super.c91
-rw-r--r--fs/bcachefs/super.h1
-rw-r--r--fs/bcachefs/trace.h125
-rw-r--r--fs/bcachefs/util.c10
-rw-r--r--fs/bcachefs/util.h2
-rw-r--r--fs/bfs/file.c7
-rw-r--r--fs/binfmt_misc.c44
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/accessors.c162
-rw-r--r--fs/btrfs/accessors.h37
-rw-r--r--fs/btrfs/backref.c47
-rw-r--r--fs/btrfs/backref.h23
-rw-r--r--fs/btrfs/bio.c24
-rw-r--r--fs/btrfs/block-group.c86
-rw-r--r--fs/btrfs/block-group.h7
-rw-r--r--fs/btrfs/btrfs_inode.h13
-rw-r--r--fs/btrfs/compression.c24
-rw-r--r--fs/btrfs/compression.h9
-rw-r--r--fs/btrfs/ctree.c201
-rw-r--r--fs/btrfs/ctree.h35
-rw-r--r--fs/btrfs/defrag.c80
-rw-r--r--fs/btrfs/delayed-inode.c111
-rw-r--r--fs/btrfs/delayed-inode.h7
-rw-r--r--fs/btrfs/delayed-ref.c10
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dev-replace.c18
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/dir-item.h2
-rw-r--r--fs/btrfs/disk-io.c48
-rw-r--r--fs/btrfs/extent-io-tree.c20
-rw-r--r--fs/btrfs/extent-io-tree.h9
-rw-r--r--fs/btrfs/extent-tree.c134
-rw-r--r--fs/btrfs/extent-tree.h2
-rw-r--r--fs/btrfs/extent_io.c190
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/fiemap.c2
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c178
-rw-r--r--fs/btrfs/free-space-cache.c8
-rw-r--r--fs/btrfs/free-space-tree.c419
-rw-r--r--fs/btrfs/free-space-tree.h52
-rw-r--r--fs/btrfs/fs.h13
-rw-r--r--fs/btrfs/inode-item.c24
-rw-r--r--fs/btrfs/inode-item.h11
-rw-r--r--fs/btrfs/inode.c508
-rw-r--r--fs/btrfs/ioctl.c139
-rw-r--r--fs/btrfs/messages.h105
-rw-r--r--fs/btrfs/misc.h38
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/print-tree.c4
-rw-r--r--fs/btrfs/qgroup.c362
-rw-r--r--fs/btrfs/raid-stripe-tree.c7
-rw-r--r--fs/btrfs/rcu-string.h58
-rw-r--r--fs/btrfs/ref-verify.c146
-rw-r--r--fs/btrfs/ref-verify.h4
-rw-r--r--fs/btrfs/reflink.c24
-rw-r--r--fs/btrfs/relocation.c140
-rw-r--r--fs/btrfs/relocation.h3
-rw-r--r--fs/btrfs/scrub.c77
-rw-r--r--fs/btrfs/send.c47
-rw-r--r--fs/btrfs/space-info.c14
-rw-r--r--fs/btrfs/space-info.h3
-rw-r--r--fs/btrfs/subpage.c247
-rw-r--r--fs/btrfs/subpage.h59
-rw-r--r--fs/btrfs/super.c291
-rw-r--r--fs/btrfs/sysfs.c78
-rw-r--r--fs/btrfs/tests/extent-io-tests.c28
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c93
-rw-r--r--fs/btrfs/tests/inode-tests.c24
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/tree-checker.c12
-rw-r--r--fs/btrfs/tree-log.c617
-rw-r--r--fs/btrfs/tree-mod-log.c81
-rw-r--r--fs/btrfs/ulist.c59
-rw-r--r--fs/btrfs/volumes.c138
-rw-r--r--fs/btrfs/volumes.h38
-rw-r--r--fs/btrfs/xattr.c9
-rw-r--r--fs/btrfs/zoned.c201
-rw-r--r--fs/btrfs/zoned.h3
-rw-r--r--fs/btrfs/zstd.c3
-rw-r--r--fs/buffer.c47
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/namei.c4
-rw-r--r--fs/cachefiles/ondemand.c4
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/caps.c18
-rw-r--r--fs/ceph/crypto.c82
-rw-r--r--fs/ceph/crypto.h18
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/configfs/dir.c1
-rw-r--r--fs/configfs/mount.c3
-rw-r--r--fs/coredump.c868
-rw-r--r--fs/dcache.c163
-rw-r--r--fs/debugfs/inode.c25
-rw-r--r--fs/devpts/inode.c2
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/ecryptfs/main.c5
-rw-r--r--fs/ecryptfs/mmap.c10
-rw-r--r--fs/efivarfs/super.c10
-rw-r--r--fs/erofs/Kconfig2
-rw-r--r--fs/erofs/data.c85
-rw-r--r--fs/erofs/decompressor.c14
-rw-r--r--fs/erofs/dir.c23
-rw-r--r--fs/erofs/erofs_fs.h15
-rw-r--r--fs/erofs/fileio.c19
-rw-r--r--fs/erofs/fscache.c3
-rw-r--r--fs/erofs/inode.c21
-rw-r--r--fs/erofs/internal.h46
-rw-r--r--fs/erofs/super.c23
-rw-r--r--fs/erofs/sysfs.c4
-rw-r--r--fs/erofs/xattr.c56
-rw-r--r--fs/erofs/xattr.h3
-rw-r--r--fs/erofs/zdata.c28
-rw-r--r--fs/erofs/zmap.c159
-rw-r--r--fs/eventpoll.c70
-rw-r--r--fs/exec.c9
-rw-r--r--fs/exfat/file.c11
-rw-r--r--fs/exfat/inode.c16
-rw-r--r--fs/exfat/super.c4
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/ext4/inode.c35
-rw-r--r--fs/f2fs/data.c8
-rw-r--r--fs/f2fs/file.c38
-rw-r--r--fs/f2fs/node.c1
-rw-r--r--fs/fat/inode.c18
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c13
-rw-r--r--fs/fuse/control.c30
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c19
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c10
-rw-r--r--fs/gfs2/dir.c6
-rw-r--r--fs/gfs2/glock.c43
-rw-r--r--fs/gfs2/glock.h10
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/inode.h6
-rw-r--r--fs/gfs2/lock_dlm.c9
-rw-r--r--fs/gfs2/meta_io.c10
-rw-r--r--fs/gfs2/ops_fstype.c14
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/gfs2/util.c31
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h6
-rw-r--r--fs/hfsplus/inode.c8
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/file.c18
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c13
-rw-r--r--fs/internal.h1
-rw-r--r--fs/iomap/buffered-io.c6
-rw-r--r--fs/isofs/inode.c11
-rw-r--r--fs/jffs2/file.c28
-rw-r--r--fs/jfs/inode.c16
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/mount.c2
-rw-r--r--fs/libfs.c118
-rw-r--r--fs/locks.c4
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/minix/inode.c7
-rw-r--r--fs/mount.h40
-rw-r--r--fs/namei.c48
-rw-r--r--fs/namespace.c818
-rw-r--r--fs/netfs/buffered_write.c38
-rw-r--r--fs/netfs/direct_write.c16
-rw-r--r--fs/netfs/internal.h26
-rw-r--r--fs/netfs/main.c6
-rw-r--r--fs/netfs/misc.c50
-rw-r--r--fs/netfs/read_collect.c16
-rw-r--r--fs/netfs/read_pgpriv2.c5
-rw-r--r--fs/netfs/write_collect.c14
-rw-r--r--fs/netfs/write_retry.c3
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c53
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c118
-rw-r--r--fs/nfs/inode.c17
-rw-r--r--fs/nfs/nfs4idmap.c14
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/blocklayout.c20
-rw-r--r--fs/nfsd/blocklayoutxdr.c111
-rw-r--r--fs/nfsd/blocklayoutxdr.h8
-rw-r--r--fs/nfsd/export.c8
-rw-r--r--fs/nfsd/export.h2
-rw-r--r--fs/nfsd/filecache.c2
-rw-r--r--fs/nfsd/localio.c2
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfs4layouts.c4
-rw-r--r--fs/nfsd/nfs4proc.c21
-rw-r--r--fs/nfsd/nfs4recover.c49
-rw-r--r--fs/nfsd/nfs4state.c119
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nfsd/nfsctl.c36
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsfh.c16
-rw-r--r--fs/nfsd/nfsfh.h26
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.c24
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/inode.c17
-rw-r--r--fs/nilfs2/recovery.c3
-rw-r--r--fs/notify/dnotify/dnotify.c8
-rw-r--r--fs/ntfs3/dir.c6
-rw-r--r--fs/ntfs3/file.c41
-rw-r--r--fs/ntfs3/frecord.c31
-rw-r--r--fs/ntfs3/fsntfs.c6
-rw-r--r--fs/ntfs3/inode.c98
-rw-r--r--fs/ntfs3/namei.c26
-rw-r--r--fs/ntfs3/ntfs.h3
-rw-r--r--fs/ntfs3/ntfs_fs.h27
-rw-r--r--fs/ntfs3/super.c3
-rw-r--r--fs/ntfs3/xattr.c22
-rw-r--r--fs/ocfs2/aops.c6
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/omfs/file.c7
-rw-r--r--fs/open.c5
-rw-r--r--fs/orangefs/inode.c16
-rw-r--r--fs/orangefs/super.c2
-rw-r--r--fs/overlayfs/copy_up.c52
-rw-r--r--fs/overlayfs/dir.c260
-rw-r--r--fs/overlayfs/file.c2
-rw-r--r--fs/overlayfs/namei.c41
-rw-r--r--fs/overlayfs/overlayfs.h53
-rw-r--r--fs/overlayfs/ovl_entry.h1
-rw-r--r--fs/overlayfs/params.c12
-rw-r--r--fs/overlayfs/readdir.c44
-rw-r--r--fs/overlayfs/super.c52
-rw-r--r--fs/overlayfs/util.c46
-rw-r--r--fs/pidfs.c4
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/pnode.c697
-rw-r--r--fs/pnode.h29
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/fd.c11
-rw-r--r--fs/proc/generic.c10
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h6
-rw-r--r--fs/proc/namespaces.c3
-rw-r--r--fs/proc/proc_sysctl.c25
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/pstore/inode.c5
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/resctrl/ctrlmondata.c13
-rw-r--r--fs/resctrl/internal.h4
-rw-r--r--fs/resctrl/monitor.c6
-rw-r--r--fs/resctrl/rdtgroup.c6
-rw-r--r--fs/select.c4
-rw-r--r--fs/smb/client/cached_dir.c14
-rw-r--r--fs/smb/client/cached_dir.h10
-rw-r--r--fs/smb/client/cifs_debug.c2
-rw-r--r--fs/smb/client/cifs_ioctl.h2
-rw-r--r--fs/smb/client/cifsfs.c4
-rw-r--r--fs/smb/client/cifsglob.h3
-rw-r--r--fs/smb/client/cifsproto.h1
-rw-r--r--fs/smb/client/cifssmb.c22
-rw-r--r--fs/smb/client/connect.c84
-rw-r--r--fs/smb/client/dir.c6
-rw-r--r--fs/smb/client/file.c27
-rw-r--r--fs/smb/client/fs_context.c17
-rw-r--r--fs/smb/client/ioctl.c2
-rw-r--r--fs/smb/client/misc.c6
-rw-r--r--fs/smb/client/readdir.c30
-rw-r--r--fs/smb/client/reparse.c43
-rw-r--r--fs/smb/client/sess.c3
-rw-r--r--fs/smb/client/smb2inode.c3
-rw-r--r--fs/smb/client/smb2ops.c10
-rw-r--r--fs/smb/client/smb2pdu.c37
-rw-r--r--fs/smb/client/smbdirect.c166
-rw-r--r--fs/smb/client/trace.h24
-rw-r--r--fs/smb/server/connection.c2
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/smb2pdu.c103
-rw-r--r--fs/smb/server/transport_rdma.c15
-rw-r--r--fs/smb/server/transport_tcp.c3
-rw-r--r--fs/smb/server/vfs.c10
-rw-r--r--fs/smb/server/vfs_cache.h1
-rw-r--r--fs/stack.c4
-rw-r--r--fs/super.c4
-rw-r--r--fs/tracefs/inode.c28
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/udf/inode.c11
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/inode.c16
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/vboxsf/file.c5
-rw-r--r--fs/vboxsf/super.c2
-rw-r--r--fs/xattr.c3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c41
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c52
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_btree.c33
-rw-r--r--fs/xfs/libxfs/xfs_btree.h41
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_group.c17
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c31
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c24
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c4
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c18
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c67
-rw-r--r--fs/xfs/libxfs/xfs_rtrefcount_btree.c18
-rw-r--r--fs/xfs/libxfs/xfs_rtrmap_btree.c67
-rw-r--r--fs/xfs/scrub/btree.c2
-rw-r--r--fs/xfs/scrub/common.c7
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/dir_repair.c8
-rw-r--r--fs/xfs/scrub/fscounters.c3
-rw-r--r--fs/xfs/scrub/metapath.c4
-rw-r--r--fs/xfs/scrub/nlinks.c8
-rw-r--r--fs/xfs/scrub/nlinks_repair.c4
-rw-r--r--fs/xfs/scrub/parent_repair.c12
-rw-r--r--fs/xfs/scrub/quotacheck.c4
-rw-r--r--fs/xfs/scrub/rcbag_btree.c38
-rw-r--r--fs/xfs/scrub/repair.c36
-rw-r--r--fs/xfs/scrub/repair.h4
-rw-r--r--fs/xfs/scrub/rmap_repair.c14
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c14
-rw-r--r--fs/xfs/scrub/scrub.c5
-rw-r--r--fs/xfs/scrub/trace.h2
-rw-r--r--fs/xfs/xfs_attr_item.c148
-rw-r--r--fs/xfs/xfs_attr_item.h8
-rw-r--r--fs/xfs/xfs_bmap_item.c18
-rw-r--r--fs/xfs/xfs_buf.c53
-rw-r--r--fs/xfs/xfs_buf.h9
-rw-r--r--fs/xfs/xfs_buf_item.c303
-rw-r--r--fs/xfs/xfs_buf_item.h5
-rw-r--r--fs/xfs/xfs_buf_item_recover.c38
-rw-r--r--fs/xfs/xfs_discard.c41
-rw-r--r--fs/xfs/xfs_dquot.c4
-rw-r--r--fs/xfs/xfs_dquot_item_recover.c20
-rw-r--r--fs/xfs/xfs_exchmaps_item.c8
-rw-r--r--fs/xfs/xfs_extent_busy.h8
-rw-r--r--fs/xfs/xfs_extfree_item.c59
-rw-r--r--fs/xfs/xfs_file.c33
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_icache.c13
-rw-r--r--fs/xfs/xfs_icreate_item.c2
-rw-r--r--fs/xfs/xfs_inode.c9
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c11
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_inode_item_recover.c26
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c6
-rw-r--r--fs/xfs/xfs_itable.c18
-rw-r--r--fs/xfs/xfs_iwalk.c11
-rw-r--r--fs/xfs/xfs_log.c16
-rw-r--r--fs/xfs/xfs_log.h16
-rw-r--r--fs/xfs/xfs_log_cil.c75
-rw-r--r--fs/xfs/xfs_log_priv.h4
-rw-r--r--fs/xfs/xfs_log_recover.c16
-rw-r--r--fs/xfs/xfs_mount.c97
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_mru_cache.c19
-rw-r--r--fs/xfs/xfs_notify_failure.c9
-rw-r--r--fs/xfs/xfs_qm.c96
-rw-r--r--fs/xfs/xfs_refcount_item.c34
-rw-r--r--fs/xfs/xfs_rmap_item.c34
-rw-r--r--fs/xfs/xfs_rtalloc.c15
-rw-r--r--fs/xfs/xfs_super.c5
-rw-r--r--fs/xfs/xfs_trace.h121
-rw-r--r--fs/xfs/xfs_trans.c211
-rw-r--r--fs/xfs/xfs_trans.h4
-rw-r--r--fs/xfs/xfs_xattr.c2
-rw-r--r--fs/xfs/xfs_zone_alloc.c87
-rw-r--r--fs/xfs/xfs_zone_alloc.h4
-rw-r--r--fs/xfs/xfs_zone_gc.c18
-rw-r--r--fs/xfs/xfs_zone_info.c2
-rw-r--r--fs/xfs/xfs_zone_priv.h16
-rw-r--r--fs/xfs/xfs_zone_space_resv.c17
-rw-r--r--include/asm-generic/param.h2
-rw-r--r--include/crypto/hash.h2
-rw-r--r--include/crypto/internal/sha2.h2
-rw-r--r--include/crypto/internal/simd.h6
-rw-r--r--include/crypto/md5.h4
-rw-r--r--include/drm/drm_buddy.h2
-rw-r--r--include/drm/drm_file.h3
-rw-r--r--include/drm/drm_framebuffer.h7
-rw-r--r--include/drm/drm_mipi_dsi.h3
-rw-r--r--include/drm/spsc_queue.h4
-rw-r--r--include/dt-bindings/clock/sun8i-v3s-ccu.h2
-rw-r--r--include/linux/arm-smccc.h2
-rw-r--r--include/linux/arm_ffa.h1
-rw-r--r--include/linux/atmdev.h6
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/buffer_head.h8
-rw-r--r--include/linux/bvec.h7
-rw-r--r--include/linux/cleanup.h8
-rw-r--r--include/linux/coredump.h4
-rw-r--r--include/linux/cpu.h4
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/dcache.h6
-rw-r--r--include/linux/execmem.h8
-rw-r--r--include/linux/exportfs.h4
-rw-r--r--include/linux/filelock.h7
-rw-r--r--include/linux/fs.h66
-rw-r--r--include/linux/fs_context.h2
-rw-r--r--include/linux/fs_stack.h2
-rw-r--r--include/linux/futex.h1
-rw-r--r--include/linux/ieee80211.h63
-rw-r--r--include/linux/interconnect-provider.h7
-rw-r--r--include/linux/io_uring_types.h2
-rw-r--r--include/linux/irqchip/irq-msi-lib.h1
-rw-r--r--include/linux/ism.h1
-rw-r--r--include/linux/key.h2
-rw-r--r--include/linux/kmemleak.h4
-rw-r--r--include/linux/libata.h7
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/mount.h24
-rw-r--r--include/linux/mtd/nand-qpic-common.h8
-rw-r--r--include/linux/mtd/partitions.h2
-rw-r--r--include/linux/mtd/spinand.h10
-rw-r--r--include/linux/netfs.h22
-rw-r--r--include/linux/pagemap.h27
-rw-r--r--include/linux/perf_event.h42
-rw-r--r--include/linux/phy/phy.h2
-rw-r--r--include/linux/platform_data/x86/amd-fch.h (renamed from arch/x86/include/asm/amd/fch.h)0
-rw-r--r--include/linux/power_supply.h8
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/psp-sev.h2
-rw-r--r--include/linux/quotaops.h2
-rw-r--r--include/linux/resctrl.h4
-rw-r--r--include/linux/scatterlist.h4
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/security.h3
-rw-r--r--include/linux/soc/amd/isp4_misc.h12
-rw-r--r--include/linux/spi/spi.h2
-rw-r--r--include/linux/sprintf.h1
-rw-r--r--include/linux/sunrpc/msg_prot.h18
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h6
-rw-r--r--include/linux/sunrpc/svc.h6
-rw-r--r--include/linux/sunrpc/svcauth.h1
-rw-r--r--include/linux/sunrpc/xdr.h5
-rw-r--r--include/linux/suspend.h5
-rw-r--r--include/linux/tpm.h2
-rw-r--r--include/linux/usb.h2
-rw-r--r--include/linux/usb/typec_dp.h1
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/bluetooth/hci.h2
-rw-r--r--include/net/bluetooth/hci_core.h66
-rw-r--r--include/net/bluetooth/hci_sync.h4
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h15
-rw-r--r--include/net/netfilter/nf_flow_table.h2
-rw-r--r--include/net/netfilter/nf_tables.h5
-rw-r--r--include/net/pkt_sched.h25
-rw-r--r--include/net/sch_generic.h8
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/xfrm.h15
-rw-r--r--include/trace/events/btrfs.h5
-rw-r--r--include/trace/events/erofs.h18
-rw-r--r--include/trace/events/netfs.h59
-rw-r--r--include/trace/events/rxrpc.h6
-rw-r--r--include/trace/events/sunrpc.h25
-rw-r--r--include/uapi/asm-generic/param.h6
-rw-r--r--include/uapi/linux/bits.h4
-rw-r--r--include/uapi/linux/btrfs.h3
-rw-r--r--include/uapi/linux/coredump.h104
-rw-r--r--include/uapi/linux/ethtool_netlink.h4
-rw-r--r--include/uapi/linux/ethtool_netlink_generated.h4
-rw-r--r--include/uapi/linux/kvm.h26
-rw-r--r--include/uapi/linux/mptcp_pm.h6
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h10
-rw-r--r--include/uapi/linux/netfilter/nfnetlink.h2
-rw-r--r--include/uapi/linux/ublk_cmd.h32
-rw-r--r--include/uapi/linux/vm_sockets.h4
-rw-r--r--init/Kconfig4
-rw-r--r--init/initramfs.c1
-rw-r--r--init/main.c1
-rw-r--r--io_uring/fdinfo.c12
-rw-r--r--io_uring/io-wq.c4
-rw-r--r--io_uring/io_uring.c7
-rw-r--r--io_uring/io_uring.h2
-rw-r--r--io_uring/kbuf.c6
-rw-r--r--io_uring/kbuf.h3
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/net.c48
-rw-r--r--io_uring/opdef.c1
-rw-r--r--io_uring/openclose.c2
-rw-r--r--io_uring/poll.c2
-rw-r--r--io_uring/register.c7
-rw-r--r--io_uring/rsrc.c34
-rw-r--r--io_uring/rsrc.h1
-rw-r--r--io_uring/sqpoll.c49
-rw-r--r--io_uring/sqpoll.h8
-rw-r--r--io_uring/zcrx.c11
-rw-r--r--ipc/mqueue.c3
-rw-r--r--kernel/Kconfig.kexec1
-rw-r--r--kernel/audit_tree.c63
-rw-r--r--kernel/bpf/bpf_lru_list.c9
-rw-r--r--kernel/bpf/bpf_lru_list.h1
-rw-r--r--kernel/bpf/cgroup.c2
-rw-r--r--kernel/bpf/helpers.c11
-rw-r--r--kernel/bpf/sysfs_btf.c2
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/cgroup/legacy_freezer.c11
-rw-r--r--kernel/dma/contiguous.c5
-rw-r--r--kernel/events/core.c142
-rw-r--r--kernel/events/ring_buffer.c4
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/freezer.c15
-rw-r--r--kernel/futex/core.c14
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/irq/cpuhotplug.c7
-rw-r--r--kernel/irq/irq_sim.c2
-rw-r--r--kernel/kexec_core.c1
-rw-r--r--kernel/kexec_handover.c29
-rw-r--r--kernel/module/main.c17
-rw-r--r--kernel/power/hibernate.c3
-rw-r--r--kernel/power/power.h5
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/rcu/tree.c4
-rw-r--r--kernel/resource.c5
-rw-r--r--kernel/sched/core.c20
-rw-r--r--kernel/sched/deadline.c10
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/ext.c29
-rw-r--r--kernel/sched/ext.h2
-rw-r--r--kernel/sched/ext_idle.c2
-rw-r--r--kernel/sched/loadavg.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/stop_machine.c20
-rw-r--r--kernel/time/posix-cpu-timers.c9
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/trace/trace_events.c5
-rw-r--r--kernel/trace/trace_events_filter.c16
-rw-r--r--kernel/trace/trace_functions_graph.c6
-rw-r--r--kernel/trace/trace_osnoise.c2
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/workqueue.c3
-rw-r--r--lib/Kconfig1
-rw-r--r--lib/alloc_tag.c11
-rw-r--r--lib/crypto/Makefile6
-rw-r--r--lib/crypto/aescfb.c8
-rw-r--r--lib/crypto/aesgcm.c46
-rw-r--r--lib/group_cpus.c9
-rw-r--r--lib/maple_tree.c5
-rw-r--r--lib/raid6/rvv.c48
-rw-r--r--lib/scatterlist.c8
-rw-r--r--lib/test_objagg.c4
-rw-r--r--mm/damon/Kconfig1
-rw-r--r--mm/damon/core.c23
-rw-r--r--mm/damon/sysfs-schemes.c1
-rw-r--r--mm/execmem.c40
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/gup.c14
-rw-r--r--mm/hugetlb.c63
-rw-r--r--mm/kasan/report.c47
-rw-r--r--mm/kmemleak.c14
-rw-r--r--mm/ksm.c6
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c4
-rw-r--r--mm/memory.c20
-rw-r--r--mm/migrate.c14
-rw-r--r--mm/rmap.c46
-rw-r--r--mm/secretmem.c29
-rw-r--r--mm/shmem.c22
-rw-r--r--mm/swap.h23
-rw-r--r--mm/userfaultfd.c33
-rw-r--r--mm/util.c40
-rw-r--r--mm/vma.c23
-rw-r--r--mm/vma.h47
-rw-r--r--mm/vmalloc.c22
-rw-r--r--mm/vmscan.c8
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/zsmalloc.c3
-rw-r--r--net/8021q/vlan.c42
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/appletalk/aarp.c24
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/clip.c75
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/lec.c12
-rw-r--r--net/atm/raw.c2
-rw-r--r--net/atm/resources.c3
-rw-r--r--net/bluetooth/eir.c17
-rw-r--r--net/bluetooth/eir.h2
-rw-r--r--net/bluetooth/hci_conn.c31
-rw-r--r--net/bluetooth/hci_core.c70
-rw-r--r--net/bluetooth/hci_debugfs.c8
-rw-r--r--net/bluetooth/hci_event.c58
-rw-r--r--net/bluetooth/hci_sync.c333
-rw-r--r--net/bluetooth/iso.c17
-rw-r--r--net/bluetooth/l2cap_core.c25
-rw-r--r--net/bluetooth/l2cap_sock.c3
-rw-r--r--net/bluetooth/mgmt.c203
-rw-r--r--net/bluetooth/mgmt_util.c32
-rw-r--r--net/bluetooth/mgmt_util.h4
-rw-r--r--net/bluetooth/msft.c2
-rw-r--r--net/bluetooth/smp.c21
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/bridge/br_switchdev.c3
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/selftests.c5
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ethtool/ioctl.c3
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_fastopen.c3
-rw-r--r--net/ipv4/tcp_input.c41
-rw-r--r--net/ipv4/tcp_offload.c1
-rw-r--r--net/ipv4/udp_offload.c1
-rw-r--r--net/ipv4/xfrm4_input.c3
-rw-r--r--net/ipv6/addrconf.c9
-rw-r--r--net/ipv6/calipso.c8
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/route.c110
-rw-r--r--net/ipv6/rpl_iptunnel.c8
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/mac80211/cfg.c14
-rw-r--r--net/mac80211/debug.h5
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/mac80211/link.c6
-rw-r--r--net/mac80211/mlme.c12
-rw-r--r--net/mac80211/parse.c6
-rw-r--r--net/mac80211/rx.c4
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/mac80211/util.c11
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mptcp/options.c3
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/protocol.c56
-rw-r--r--net/mptcp/protocol.h29
-rw-r--r--net/mptcp/subflow.c30
-rw-r--r--net/netfilter/nf_conntrack_core.c26
-rw-r--r--net/netfilter/nf_tables_api.c59
-rw-r--r--net/netfilter/nf_tables_trace.c3
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netlink/af_netlink.c82
-rw-r--r--net/nfc/nci/uart.c8
-rw-r--r--net/openvswitch/actions.c23
-rw-r--r--net/openvswitch/datapath.c42
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rose/rose_route.c15
-rw-r--r--net/rxrpc/ar-internal.h19
-rw-r--r--net/rxrpc/call_accept.c18
-rw-r--r--net/rxrpc/call_object.c28
-rw-r--r--net/rxrpc/io_thread.c14
-rw-r--r--net/rxrpc/output.c27
-rw-r--r--net/rxrpc/peer_object.c6
-rw-r--r--net/rxrpc/recvmsg.c23
-rw-r--r--net/rxrpc/security.c8
-rw-r--r--net/sched/sch_api.c52
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/sched/sch_hfsc.c16
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c35
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c15
-rw-r--r--net/sched/sch_taprio.c6
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/smc/af_smc.c14
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c15
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c3
-rw-r--r--net/sunrpc/clnt.c36
-rw-r--r--net/sunrpc/rpc_pipe.c532
-rw-r--r--net/sunrpc/socklib.c164
-rw-r--r--net/sunrpc/svc.c37
-rw-r--r--net/sunrpc/svcsock.c5
-rw-r--r--net/sunrpc/xdr.c11
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/unix/af_unix.c34
-rw-r--r--net/vmw_vsock/af_vsock.c57
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/wireless/nl80211.c9
-rw-r--r--net/wireless/util.c52
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_interface_core.c7
-rw-r--r--net/xfrm/xfrm_ipcomp.c3
-rw-r--r--net/xfrm/xfrm_state.c69
-rw-r--r--net/xfrm/xfrm_user.c3
-rw-r--r--rust/Makefile1
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/helpers/completion.c8
-rw-r--r--rust/helpers/cpu.c8
-rw-r--r--rust/helpers/helpers.c2
-rw-r--r--rust/kernel/cpu.rs125
-rw-r--r--rust/kernel/cpufreq.rs173
-rw-r--r--rust/kernel/cpumask.rs51
-rw-r--r--rust/kernel/devres.rs60
-rw-r--r--rust/kernel/drm/device.rs12
-rw-r--r--rust/kernel/drm/driver.rs1
-rw-r--r--rust/kernel/firmware.rs2
-rw-r--r--rust/kernel/init.rs8
-rw-r--r--rust/kernel/kunit.rs2
-rw-r--r--rust/kernel/lib.rs3
-rw-r--r--rust/kernel/revocable.rs18
-rw-r--r--rust/kernel/sync.rs2
-rw-r--r--rust/kernel/sync/completion.rs112
-rw-r--r--rust/kernel/time/hrtimer.rs2
-rw-r--r--rust/macros/module.rs10
-rw-r--r--samples/damon/mtier.c8
-rw-r--r--samples/damon/prcl.c8
-rw-r--r--samples/damon/wsse.c8
-rw-r--r--scripts/Makefile.build3
-rw-r--r--scripts/gdb/linux/constants.py.in7
-rw-r--r--scripts/gdb/linux/interrupts.py16
-rw-r--r--scripts/gdb/linux/mapletree.py252
-rw-r--r--scripts/gdb/linux/vfs.py2
-rw-r--r--scripts/gdb/linux/xarray.py28
-rw-r--r--scripts/gendwarfksyms/gendwarfksyms.h14
-rw-r--r--scripts/gendwarfksyms/types.c65
-rwxr-xr-xscripts/misc-check15
-rw-r--r--security/apparmor/file.c2
-rw-r--r--security/inode.c62
-rw-r--r--security/integrity/evm/evm_secfs.c15
-rw-r--r--security/integrity/ima/ima_fs.c137
-rw-r--r--security/ipe/fs.c32
-rw-r--r--security/ipe/policy_fs.c4
-rw-r--r--security/keys/gc.c4
-rw-r--r--security/keys/key.c5
-rw-r--r--security/landlock/syscalls.c1
-rw-r--r--security/selinux/ss/services.c16
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--sound/core/compress_offload.c48
-rw-r--r--sound/isa/ad1816a/ad1816a.c2
-rw-r--r--sound/isa/sb/sb16_main.c7
-rw-r--r--sound/pci/ctxfi/xfi.c4
-rw-r--r--sound/pci/hda/cs35l56_hda.c110
-rw-r--r--sound/pci/hda/hda_intel.c2
-rw-r--r--sound/pci/hda/patch_hdmi.c19
-rw-r--r--sound/pci/hda/patch_realtek.c52
-rw-r--r--sound/pci/hda/tas2781_hda.c8
-rw-r--r--sound/soc/amd/ps/acp63.h4
-rw-r--r--sound/soc/amd/ps/ps-common.c18
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c49
-rw-r--r--sound/soc/apple/Kconfig1
-rw-r--r--sound/soc/codecs/cs35l56-sdw.c18
-rw-r--r--sound/soc/codecs/cs35l56-shared.c2
-rw-r--r--sound/soc/codecs/cs35l56.c72
-rw-r--r--sound/soc/codecs/cs35l56.h3
-rw-r--r--sound/soc/codecs/cs48l32.c4
-rw-r--r--sound/soc/codecs/es8326.c3
-rw-r--r--sound/soc/codecs/rt5645.c1
-rw-r--r--sound/soc/codecs/rt5660.c7
-rw-r--r--sound/soc/codecs/rt721-sdca.c23
-rw-r--r--sound/soc/codecs/wm_adsp.c27
-rw-r--r--sound/soc/codecs/wm_adsp.h2
-rw-r--r--sound/soc/fsl/fsl_asrc.c3
-rw-r--r--sound/soc/fsl/fsl_sai.c14
-rw-r--r--sound/soc/intel/avs/pcm.c4
-rw-r--r--sound/soc/intel/boards/Kconfig3
-rw-r--r--sound/soc/intel/boards/sof_sdw.c3
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-arl-match.c37
-rw-r--r--sound/soc/intel/common/sof-function-topology-lib.c3
-rw-r--r--sound/soc/loongson/loongson_i2s.c1
-rw-r--r--sound/soc/mediatek/common/mtk-soundcard-driver.c4
-rw-r--r--sound/soc/mediatek/mt8365/mt8365-dai-i2s.c3
-rw-r--r--sound/soc/qcom/Kconfig1
-rw-r--r--sound/soc/sdca/sdca_functions.c2
-rw-r--r--sound/soc/sdw_utils/soc_sdw_utils.c2
-rw-r--r--sound/soc/sof/imx/imx8.c15
-rw-r--r--sound/soc/sof/intel/hda.c6
-rw-r--r--sound/soc/sof/intel/ptl.c1
-rw-r--r--sound/usb/format.c22
-rw-r--r--sound/usb/mixer_maps.c12
-rw-r--r--sound/usb/qcom/qc_audio_offload.c22
-rw-r--r--sound/usb/stream.c2
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h9
-rw-r--r--tools/arch/loongarch/include/asm/orc_types.h4
-rw-r--r--tools/arch/x86/include/asm/amd/ibs.h5
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h14
-rw-r--r--tools/arch/x86/include/asm/msr-index.h17
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h71
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h2
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h5
-rw-r--r--tools/arch/x86/lib/memcpy_64.S1
-rw-r--r--tools/arch/x86/lib/memset_64.S1
-rw-r--r--tools/bpf/resolve_btfids/Makefile2
-rw-r--r--tools/hv/hv_fcopy_uio_daemon.c128
-rw-r--r--tools/include/linux/bits.h57
-rw-r--r--tools/include/linux/build_bug.h10
-rw-r--r--tools/include/linux/compiler.h8
-rw-r--r--tools/include/linux/kallsyms.h4
-rw-r--r--tools/include/uapi/drm/drm.h4
-rw-r--r--tools/include/uapi/linux/bits.h4
-rw-r--r--tools/include/uapi/linux/coredump.h104
-rw-r--r--tools/include/uapi/linux/fscrypt.h6
-rw-r--r--tools/include/uapi/linux/kvm.h4
-rw-r--r--tools/include/uapi/linux/stat.h8
-rw-r--r--tools/lib/bpf/btf.c6
-rw-r--r--tools/lib/bpf/btf_dump.c3
-rw-r--r--tools/lib/bpf/libbpf.c30
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py28
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/perf/Documentation/perf-amd-ibs.txt59
-rw-r--r--tools/perf/Documentation/perf-mem.txt50
-rw-r--r--tools/perf/bench/futex-hash.c1
-rw-r--r--tools/perf/bench/futex.c9
-rwxr-xr-xtools/perf/check-headers.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+event_uniquifying.sh12
-rw-r--r--tools/perf/tests/tests-scripts.c1
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h1
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h7
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h8
-rw-r--r--tools/perf/util/include/linux/linkage.h4
-rw-r--r--tools/perf/util/print-events.c1
-rw-r--r--tools/power/cpupower/Makefile9
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursive_attach.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_sysctl.c (renamed from tools/testing/selftests/bpf/test_sysctl.c)37
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_reject.c15
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c105
-rw-r--r--tools/testing/selftests/coredump/Makefile2
-rw-r--r--tools/testing/selftests/coredump/config3
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1692
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py59
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py23
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh3
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore1
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c10
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c2
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h8
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py70
-rw-r--r--tools/testing/selftests/iommu/iommufd.c40
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h9
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c55
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c1
-rw-r--r--tools/testing/selftests/mm/config3
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c7
-rw-r--r--tools/testing/selftests/mm/merge.c5
-rw-r--r--tools/testing/selftests/mm/settings2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c3
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c7
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c142
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh27
-rw-r--r--tools/testing/selftests/net/lib.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore1
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile3
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh174
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh97
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh8
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh98
-rw-r--r--tools/testing/selftests/sched_ext/exit.c8
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json129
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh5
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/sigtrap_loop.c101
-rw-r--r--tools/testing/vma/vma_internal.h16
-rw-r--r--virt/kvm/kvm_main.c3
2205 files changed, 30230 insertions, 17571 deletions
diff --git a/.mailmap b/.mailmap
index c8a21d72241f..4bb3a7f253b9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -197,6 +197,7 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
+Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
David Brownell <david-b@pacbell.net>
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
David Heidelberg <david@ixit.cz> <d.okias@gmail.com>
@@ -222,6 +223,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com>
Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com>
Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
+Drew Fustini <fustini@kernel.org> <drew@pdp7.com>
+<duje@dujemihanovic.xyz> <duje.mihanovic@skole.hr>
Ed L. Cashin <ecashin@coraid.com>
Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
@@ -282,6 +285,7 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com>
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
+Hans de Goede <hansg@kernel.org> <hdegoede@redhat.com>
Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com>
Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl>
Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com>
@@ -412,6 +416,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
+Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com>
Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org>
@@ -426,6 +431,9 @@ Krzysztof Wilczyński <kwilczynski@kernel.org> <krzysztof.wilczynski@linux.com>
Krzysztof Wilczyński <kwilczynski@kernel.org> <kw@linux.com>
Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.com>
+Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.co.jp>
+Kuniyuki Iwashima <kuniyu@google.com> <kuni1840@gmail.com>
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
Lee Jones <lee@kernel.org> <joneslee@google.com>
Lee Jones <lee@kernel.org> <lee.jones@canonical.com>
@@ -686,11 +694,16 @@ Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com>
Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com>
+Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky.work@gmail.com>
+Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@gmail.com>
+Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@mail.by>
+Sergey Senozhatsky <senozhatsky@chromium.org> <senozhatsky@google.com>
Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com>
-Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
-Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
-Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
+Shannon Nelson <sln@onemain.com> <shannon.nelson@amd.com>
+Shannon Nelson <sln@onemain.com> <snelson@pensando.io>
+Shannon Nelson <sln@onemain.com> <shannon.nelson@intel.com>
+Shannon Nelson <sln@onemain.com> <shannon.nelson@oracle.com>
Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org>
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
@@ -719,6 +732,7 @@ Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> <sriram.yagnaraman@est.tech>
Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com>
+Stanislav Fomichev <sdf@fomichev.me> <stfomichev@gmail.com>
Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com>
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
@@ -823,3 +837,6 @@ Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
+Zijun Hu <zijun.hu@oss.qualcomm.com> <quic_zijuhu@quicinc.com>
+Zijun Hu <zijun.hu@oss.qualcomm.com> <zijuhu@codeaurora.org>
+Zijun Hu <zijun_hu@htc.com>
diff --git a/CREDITS b/CREDITS
index 45446ae322ec..cf2a53efaf4c 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1397,6 +1397,10 @@ N: Thomas Gleixner
E: tglx@linutronix.de
D: NAND flash hardware support, JFFS2 on NAND flash
+N: Jérôme Glisse
+E: jglisse@redhat.com
+D: HMM - Heterogeneous Memory Management
+
N: Richard E. Gooch
E: rgooch@atnf.csiro.au
D: parent process death signal to children
@@ -2981,6 +2985,11 @@ S: 521 Pleasant Valley Road
S: Potsdam, New York 13676
S: USA
+N: Shannon Nelson
+E: sln@onemain.com
+D: Worked on several network drivers including
+D: ixgbe, i40e, ionic, pds_core, pds_vdpa, pds_fwctl
+
N: Dave Neuer
E: dave.neuer@pobox.com
D: Helped implement support for Compaq's H31xx series iPAQs
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 54195530e97a..d3da88b26a53 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -56,7 +56,7 @@ Date: January 2009
Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
Description:
The /sys/devices/.../async attribute allows the user space to
- enable or diasble the device's suspend and resume callbacks to
+ enable or disable the device's suspend and resume callbacks to
be executed asynchronously (ie. in separate threads, in parallel
with the main suspend/resume thread) during system-wide power
transitions (eg. suspend to RAM, hibernation).
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bf85f4de6862..ab8cd337f43a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
/sys/devices/system/cpu/vulnerabilities/srbds
+ /sys/devices/system/cpu/vulnerabilities/tsa
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index d4140dc6c5ba..615453fcc9ff 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of
The file is read only.
-What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
+What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the total physical memory resources. This is
diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub
index c43be90deab4..ab6014743da5 100644
--- a/Documentation/ABI/testing/sysfs-edac-scrub
+++ b/Documentation/ABI/testing/sysfs-edac-scrub
@@ -49,6 +49,12 @@ Description:
(RO) Supported minimum scrub cycle duration in seconds
by the memory scrubber.
+ Device-based scrub: returns the minimum scrub cycle
+ supported by the memory device.
+
+ Region-based scrub: returns the max of minimum scrub cycles
+ supported by individual memory devices that back the region.
+
What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
Date: March 2025
KernelVersion: 6.15
@@ -57,6 +63,16 @@ Description:
(RO) Supported maximum scrub cycle duration in seconds
by the memory scrubber.
+ Device-based scrub: returns the maximum scrub cycle supported
+ by the memory device.
+
+ Region-based scrub: returns the min of maximum scrub cycles
+ supported by individual memory devices that back the region.
+
+ If the memory device does not provide maximum scrub cycle
+ information, return the maximum supported value of the scrub
+ cycle field.
+
What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
Date: March 2025
KernelVersion: 6.15
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index bf3b6299c15e..76d9808ed581 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -5,7 +5,7 @@ Description: Shows all enabled kernel features.
Supported features:
zero_padding, compr_cfgs, big_pcluster, chunked_file,
device_table, compr_head2, sb_chksum, ztailpacking,
- dedupe, fragments.
+ dedupe, fragments, 48bit, metabox.
What: /sys/fs/erofs/<disk>/sync_decompress
Date: November 2021
@@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect
and multiple accelerators are separated by '\n'.
Supported accelerator(s): qat_deflate.
Disable all accelerators with an empty string (echo > accel).
+
+What: /sys/fs/erofs/<disk>/dir_ra_bytes
+Date: July 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: Used to set or show readahead bytes during readdir(), by
+ default the value is 16384.
+
+ - 0: disable readahead.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 0cc35a14afbe..bd98ea3175ec 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1732,12 +1732,6 @@ The following nested keys are defined.
numa_hint_faults (npn)
Number of NUMA hinting faults.
- numa_task_migrated (npn)
- Number of task migration by NUMA balancing.
-
- numa_task_swapped (npn)
- Number of task swap by NUMA balancing.
-
pgdemote_kswapd
Number of pages demoted by kswapd.
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index c09674a75a9e..d989ae5778ba 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled
illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
is the default for SMB3). This remap (``mapposix``) range is also
compatible with Mac (and "Services for Mac" on some older Windows).
+When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
+disabled.
CIFS VFS Mount Options
======================
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 1302fd1b55e8..6dba18dbb9ab 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
combination with a microcode update. The microcode clears the affected CPU
buffers when the VERW instruction is executed.
-Kernel reuses the MDS function to invoke the buffer clearing:
-
- mds_clear_cpu_buffers()
+Kernel does the buffer clearing with x86_clear_cpu_buffers().
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..07e22ba5bfe3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7488,6 +7488,19 @@
having this key zero'ed is acceptable. E.g. in testing
scenarios.
+ tsa= [X86] Control mitigation for Transient Scheduler
+ Attacks on AMD CPUs. Search the following in your
+ favourite search engine for more details:
+
+ "Technical guidance for mitigating transient scheduler
+ attacks".
+
+ off - disable the mitigation
+ on - enable the mitigation (default)
+ user - mitigate only user/kernel transitions
+ vm - mitigate only guest/host transitions
+
+
tsc= Disable clocksource stability checks for TSC.
Format: <string>
[x86] reliable: mark tsc clocksource as reliable, this
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index dee7b6de864f..ee9b790c0d72 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -234,7 +234,7 @@ Before jumping into the kernel, the following conditions must be met:
- If the kernel is entered at EL1:
- - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+ - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1
- ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
- The DT or ACPI tables must describe a GICv3 interrupt controller.
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst
index 5a2e6c0ef04a..3518671e1a85 100644
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@@ -93,7 +93,7 @@ enters a C-state.
The kernel provides a function to invoke the buffer clearing:
- mds_clear_cpu_buffers()
+ x86_clear_cpu_buffers()
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
@@ -185,9 +185,9 @@ Mitigation points
idle clearing would be a window dressing exercise and is therefore not
activated.
- The invocation is controlled by the static key mds_idle_clear which is
- switched depending on the chosen mitigation mode and the SMT state of
- the system.
+ The invocation is controlled by the static key cpu_buf_idle_clear which is
+ switched depending on the chosen mitigation mode and the SMT state of the
+ system.
The buffer clear is only invoked before entering the C-State to prevent
that stale data from the idling CPU from spilling to the Hyper-Thread
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
index c368e1081b41..8c4030bcabb6 100644
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@@ -352,6 +352,83 @@ For reaching best IO performance, ublk server should align its segment
parameter of `struct ublk_param_segment` with backend for avoiding
unnecessary IO split, which usually hurts io_uring performance.
+Auto Buffer Registration
+------------------------
+
+The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration
+and unregistration for I/O requests, which simplifies the buffer management
+process and reduces overhead in the ublk server implementation.
+
+This is another feature flag for using zero copy, and it is compatible with
+``UBLK_F_SUPPORT_ZERO_COPY``.
+
+Feature Overview
+~~~~~~~~~~~~~~~~
+
+This feature automatically registers request buffers to the io_uring context
+before delivering I/O commands to the ublk server and unregisters them when
+completing I/O commands. This eliminates the need for manual buffer
+registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and
+``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server
+can avoid dependency on the two uring_cmd operations.
+
+IOs can't be issued concurrently to io_uring if there is any dependency
+among these IOs. So this way not only simplifies ublk server implementation,
+but also makes concurrent IO handling becomes possible by removing the
+dependency on buffer registration & unregistration commands.
+
+Usage Requirements
+~~~~~~~~~~~~~~~~~~
+
+1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx``
+ used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If
+ uring_cmd is issued on a different ``io_ring_ctx``, manual buffer
+ unregistration is required.
+
+2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the
+ following structure::
+
+ struct ublk_auto_buf_reg {
+ __u16 index; /* Buffer index for registration */
+ __u8 flags; /* Registration flags */
+ __u8 reserved0; /* Reserved for future use */
+ __u32 reserved1; /* Reserved for future use */
+ };
+
+ ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into
+ ``sqe->addr``.
+
+3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed.
+
+4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``.
+
+Fallback Behavior
+~~~~~~~~~~~~~~~~~
+
+If auto buffer registration fails:
+
+1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled:
+
+ - The uring_cmd is completed
+ - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags``
+ - The ublk server must manually deal with the failure, such as, register
+ the buffer manually, or using user copy feature for retrieving the data
+ for handling ublk IO
+
+2. If fallback is not enabled:
+
+ - The ublk I/O request fails silently
+ - The uring_cmd won't be completed
+
+Limitations
+~~~~~~~~~~~
+
+- Requires same ``io_ring_ctx`` for all operations
+- May require manual buffer management in fallback cases
+- io_ring_ctx buffer table has a max size of 16K, which may not be enough
+ in case that too many ublk devices are handled by this single io_ring_ctx
+ and each one has very large queue depth
+
References
==========
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
index d2343952f2cb..8606bf958a8c 100644
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on
other CPUs involved in the following operation attempts:
- Attempt to use CPU-local state to batch operations
-- Attempt to fetch free nodes from global lists
+- Attempt to fetch ``target_free`` free nodes from global lists
- Attempt to pull any node from a global list and remove it from the hashmap
- Attempt to pull any node from any CPU's list and remove it from the hashmap
+The number of nodes to borrow from the global list in a batch, ``target_free``,
+depends on the size of the map. Larger batch size reduces lock contention, but
+may also exhaust the global structure. The value is computed at map init to
+avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map
+size. With a minimum of a single element and maximum budget of 128 at a time.
+
This algorithm is described visually in the following diagram. See the
description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
the corresponding operations:
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
index a0fee349d29c..ab10058f5b79 100644
--- a/Documentation/bpf/map_lru_hash_update.dot
+++ b/Documentation/bpf/map_lru_hash_update.dot
@@ -35,18 +35,18 @@ digraph {
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
label="Flush local pending,
Rotate Global list, move
- LOCAL_FREE_TARGET
+ target_free
from global -> local"]
// Also corresponds to:
// fn__local_list_flush()
// fn_bpf_lru_list_rotate()
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
- label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
+ label="Able to free\ntarget_free\nnodes?"]
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
label="Shrink inactive list
up to remaining
- LOCAL_FREE_TARGET
+ target_free
(global LRU -> local)"]
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
label="> 0 entries in\nlocal free list?"]
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
index 2985c8c717d7..5403242545ab 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
@@ -52,6 +52,9 @@ properties:
'#clock-cells':
const: 1
+ '#reset-cells':
+ const: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml
index 9b5f3f3eab19..e69b6343a8eb 100644
--- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml
@@ -118,15 +118,11 @@ $defs:
ti,lvds-vod-swing-clock-microvolt:
description: LVDS diferential output voltage <min max> for clock
lanes in microvolts.
- $ref: /schemas/types.yaml#/definitions/uint32-array
- minItems: 2
maxItems: 2
ti,lvds-vod-swing-data-microvolt:
description: LVDS diferential output voltage <min max> for data
lanes in microvolts.
- $ref: /schemas/types.yaml#/definitions/uint32-array
- minItems: 2
maxItems: 2
allOf:
diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml
index b57ae6963e62..6b6f6762d122 100644
--- a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml
@@ -97,7 +97,10 @@ properties:
resets:
items:
- - description: module reset
+ - description:
+ Module reset. This property is optional for controllers in Tegra194,
+ Tegra234 etc where an internal software reset is available as an
+ alternative.
reset-names:
items:
@@ -116,6 +119,13 @@ properties:
- const: rx
- const: tx
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
allOf:
- $ref: /schemas/i2c/i2c-controller.yaml
- if:
@@ -169,6 +179,18 @@ allOf:
properties:
power-domains: false
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nvidia,tegra194-i2c
+ then:
+ required:
+ - resets
+ - reset-names
+
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
index eddfd329c67b..69ac5db8b914 100644
--- a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
@@ -26,7 +26,8 @@ properties:
- const: realtek,rtl9301-i2c
reg:
- description: Register offset and size this I2C controller.
+ items:
+ - description: Register offset and size this I2C controller.
"#address-cells":
const: 1
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
index 29f12d650442..1a5209139e13 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
@@ -223,12 +223,6 @@ allOf:
- required:
- pwms
- - oneOf:
- - required:
- - interrupts
- - required:
- - io-backends
-
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
index d1a6103fc37a..f3242dc0e7e6 100644
--- a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
+++ b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
@@ -21,7 +21,7 @@ properties:
vlogic-supply: true
interrupts:
- minItems: 1
+ maxItems: 1
description:
Interrupt mapping for the trigger interrupt from the internal oscillator.
diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
index cb3e1801b0d3..0840e4ab28b7 100644
--- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
+++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
@@ -4,14 +4,14 @@
$id: http://devicetree.org/schemas/input/elan,ekth6915.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Elan eKTH6915 touchscreen controller
+title: Elan I2C-HID touchscreen controllers
maintainers:
- Douglas Anderson <dianders@chromium.org>
description:
- Supports the Elan eKTH6915 touchscreen controller.
- This touchscreen controller uses the i2c-hid protocol with a reset GPIO.
+ Supports the Elan eKTH6915 and other I2C-HID touchscreen controllers.
+ These touchscreen controller use the i2c-hid protocol with a reset GPIO.
allOf:
- $ref: /schemas/input/touchscreen/touchscreen.yaml#
@@ -23,12 +23,14 @@ properties:
- enum:
- elan,ekth5015m
- const: elan,ekth6915
+ - items:
+ - const: elan,ekth8d18
+ - const: elan,ekth6a12nay
- enum:
- elan,ekth6915
- elan,ekth6a12nay
- reg:
- const: 0x10
+ reg: true
interrupts:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
index b3d6db922693..1aa5775ba2bc 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
@@ -110,7 +110,7 @@ examples:
reg = <0x01cb4000 0x1000>;
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_CSI>,
- <&ccu CLK_CSI1_SCLK>,
+ <&ccu CLK_CSI_SCLK>,
<&ccu CLK_DRAM_CSI>;
clock-names = "bus",
"mod",
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
index a61a76bb611c..3ea4a4290f23 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
@@ -79,7 +79,7 @@ examples:
reg = <0x01cb8000 0x1000>;
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_CSI>,
- <&ccu CLK_CSI1_SCLK>,
+ <&ccu CLK_CSI_SCLK>,
<&ccu CLK_DRAM_CSI>;
clock-names = "bus", "mod", "ram";
resets = <&ccu RST_BUS_CSI>;
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
index 54e15ab8a7f5..627b28e94354 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
@@ -103,7 +103,7 @@ examples:
reg = <0x01cb1000 0x1000>;
interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_CSI>,
- <&ccu CLK_CSI1_SCLK>;
+ <&ccu CLK_CSI_SCLK>;
clock-names = "bus", "mod";
resets = <&ccu RST_BUS_CSI>;
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 7b6a2fde8175..19934d5c24e5 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -23,7 +23,7 @@ properties:
- allwinner,sun20i-d1-emac
- allwinner,sun50i-h6-emac
- allwinner,sun50i-h616-emac0
- - allwinner,sun55i-a523-emac0
+ - allwinner,sun55i-a523-gmac0
- const: allwinner,sun50i-a64-emac
reg:
diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
index 4dd2dc9c678b..8afbd9ebd73f 100644
--- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
@@ -80,6 +80,8 @@ examples:
interrupt-parent = <&intc>;
interrupts = <296 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "macirq";
+ phy-handle = <&phy0>;
+ phy-mode = "rgmii-id";
resets = <&rst 30>;
reset-names = "stmmaceth";
snps,multicast-filter-bins = <0>;
@@ -91,7 +93,6 @@ examples:
snps,mtl-rx-config = <&gmac0_mtl_rx_setup>;
snps,mtl-tx-config = <&gmac0_mtl_tx_setup>;
snps,axi-config = <&gmac0_stmmac_axi_setup>;
- status = "disabled";
gmac0_mtl_rx_setup: rx-queues-config {
snps,rx-queues-to-use = <8>;
diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml
index b470901f5f56..4dbef86bd958 100644
--- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml
@@ -15,7 +15,7 @@ description: |
Some peripherals such as PWM have their I/O go through the 4 "GPIOs".
maintainers:
- - Jianlong Huang <jianlong.huang@starfivetech.com>
+ - Hal Feng <hal.feng@starfivetech.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml
index 222b9e240f8a..e2a25a20f6a6 100644
--- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml
@@ -18,7 +18,7 @@ description: |
any GPIO can be set up to be controlled by any of the peripherals.
maintainers:
- - Jianlong Huang <jianlong.huang@starfivetech.com>
+ - Hal Feng <hal.feng@starfivetech.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
deleted file mode 100644
index cd79975e85ec..000000000000
--- a/Documentation/devicetree/bindings/pmem/pmem-region.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Device-tree bindings for persistent memory regions
------------------------------------------------------
-
-Persistent memory refers to a class of memory devices that are:
-
- a) Usable as main system memory (i.e. cacheable), and
- b) Retain their contents across power failure.
-
-Given b) it is best to think of persistent memory as a kind of memory mapped
-storage device. To ensure data integrity the operating system needs to manage
-persistent regions separately to the normal memory pool. To aid with that this
-binding provides a standardised interface for discovering where persistent
-memory regions exist inside the physical address space.
-
-Bindings for the region nodes:
------------------------------
-
-Required properties:
- - compatible = "pmem-region"
-
- - reg = <base, size>;
- The reg property should specify an address range that is
- translatable to a system physical address range. This address
- range should be mappable as normal system memory would be
- (i.e cacheable).
-
- If the reg property contains multiple address ranges
- each address range will be treated as though it was specified
- in a separate device node. Having multiple address ranges in a
- node implies no special relationship between the two ranges.
-
-Optional properties:
- - Any relevant NUMA associativity properties for the target platform.
-
- - volatile; This property indicates that this region is actually
- backed by non-persistent memory. This lets the OS know that it
- may skip the cache flushes required to ensure data is made
- persistent after a write.
-
- If this property is absent then the OS must assume that the region
- is backed by non-volatile memory.
-
-Examples:
---------------------
-
- /*
- * This node specifies one 4KB region spanning from
- * 0x5000 to 0x5fff that is backed by non-volatile memory.
- */
- pmem@5000 {
- compatible = "pmem-region";
- reg = <0x00005000 0x00001000>;
- };
-
- /*
- * This node specifies two 4KB regions that are backed by
- * volatile (normal) memory.
- */
- pmem@6000 {
- compatible = "pmem-region";
- reg = < 0x00006000 0x00001000
- 0x00008000 0x00001000 >;
- volatile;
- };
-
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.yaml b/Documentation/devicetree/bindings/pmem/pmem-region.yaml
new file mode 100644
index 000000000000..bd0f0c793f03
--- /dev/null
+++ b/Documentation/devicetree/bindings/pmem/pmem-region.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pmem-region.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+maintainers:
+ - Oliver O'Halloran <oohall@gmail.com>
+
+title: Persistent Memory Regions
+
+description: |
+ Persistent memory refers to a class of memory devices that are:
+
+ a) Usable as main system memory (i.e. cacheable), and
+ b) Retain their contents across power failure.
+
+ Given b) it is best to think of persistent memory as a kind of memory mapped
+ storage device. To ensure data integrity the operating system needs to manage
+ persistent regions separately to the normal memory pool. To aid with that this
+ binding provides a standardised interface for discovering where persistent
+ memory regions exist inside the physical address space.
+
+properties:
+ compatible:
+ const: pmem-region
+
+ reg:
+ maxItems: 1
+
+ volatile:
+ description:
+ Indicates the region is volatile (non-persistent) and the OS can skip
+ cache flushes for writes
+ type: boolean
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ pmem@5000 {
+ compatible = "pmem-region";
+ reg = <0x00005000 0x00001000>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml
index 33d2016b6509..c6bc27709bf7 100644
--- a/Documentation/devicetree/bindings/serial/8250.yaml
+++ b/Documentation/devicetree/bindings/serial/8250.yaml
@@ -45,7 +45,7 @@ allOf:
- ns16550
- ns16550a
then:
- anyOf:
+ oneOf:
- required: [ clock-frequency ]
- required: [ clocks ]
diff --git a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt
deleted file mode 100644
index 55a901051e8f..000000000000
--- a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-Altera JTAG UART
-
-Required properties:
-- compatible : should be "ALTR,juart-1.0" <DEPRECATED>
-- compatible : should be "altr,juart-1.0"
diff --git a/Documentation/devicetree/bindings/serial/altera_uart.txt b/Documentation/devicetree/bindings/serial/altera_uart.txt
deleted file mode 100644
index 81bf7ffb1a81..000000000000
--- a/Documentation/devicetree/bindings/serial/altera_uart.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-Altera UART
-
-Required properties:
-- compatible : should be "ALTR,uart-1.0" <DEPRECATED>
-- compatible : should be "altr,uart-1.0"
-
-Optional properties:
-- clock-frequency : frequency of the clock input to the UART
diff --git a/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml
new file mode 100644
index 000000000000..02e20fa591da
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serial/altr,juart-1.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Altera JTAG UART
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+properties:
+ compatible:
+ const: altr,juart-1.0
+
+required:
+ - compatible
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml
new file mode 100644
index 000000000000..72d4972e1e22
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/serial/altr,uart-1.0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Altera UART
+
+maintainers:
+ - Dinh Nguyen <dinguyen@kernel.org>
+
+allOf:
+ - $ref: /schemas/serial/serial.yaml#
+
+properties:
+ compatible:
+ const: altr,uart-1.0
+
+ clock-frequency:
+ description: Frequency of the clock input to the UART.
+
+required:
+ - compatible
+
+unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml
index 234089b5954d..b43df10c5ef4 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas//soc/fsl/fsl,ls1028a-reset.yaml#
+$id: http://devicetree.org/schemas/soc/fsl/fsl,ls1028a-reset.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Freescale Layerscape Reset Registers Module
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 2e567e341c3b..580581281ed7 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -253,10 +253,10 @@ prototypes::
int (*writepages)(struct address_space *, struct writeback_control *);
bool (*dirty_folio)(struct address_space *, struct folio *folio);
void (*readahead)(struct readahead_control *);
- int (*write_begin)(struct file *, struct address_space *mapping,
+ int (*write_begin)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata);
- int (*write_end)(struct file *, struct address_space *mapping,
+ int (*write_end)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 3616d7161dab..bc5a389c0c98 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1249,3 +1249,30 @@ Using try_lookup_noperm() will require linux/namei.h to be included.
Calling conventions for ->d_automount() have changed; we should *not* grab
an extra reference to new mount - it should be returned with refcount 1.
+
+---
+
+collect_mounts()/drop_collected_mounts()/iterate_mounts() are gone now.
+Replacement is collect_paths()/drop_collected_path(), with no special
+iterator needed. Instead of a cloned mount tree, the new interface returns
+an array of struct path, one for each mount collect_mounts() would've
+created. These struct path point to locations in the caller's namespace
+that would be roots of the cloned mounts.
+
+---
+
+**mandatory**
+
+If your filesystem sets the default dentry_operations, use set_default_d_op()
+rather than manually setting sb->s_d_op.
+
+---
+
+**mandatory**
+
+d_set_d_op() is no longer exported (or public, for that matter); _if_
+your filesystem really needed that, make use of d_splice_alias_ops()
+to have them set. Better yet, think hard whether you need different
+->d_op for different dentries - if not, just use set_default_d_op()
+at mount time and be done with that. Currently procfs is the only
+thing that really needs ->d_op varying between dentries.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 2a17865dfe39..5236cb52e357 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -584,7 +584,6 @@ encoded manner. The codes are the following:
ms may share
gd stack segment growns down
pf pure PFN range
- dw disabled write to the mapped file
lo pages are locked in memory
io memory mapped I/O area
sr sequential read advise provided
@@ -607,8 +606,11 @@ encoded manner. The codes are the following:
mt arm64 MTE allocation tags are enabled
um userfaultfd missing tracking
uw userfaultfd wr-protect tracking
+ ui userfaultfd minor fault
ss shadow/guarded control stack page
sl sealed
+ lf lock on fault pages
+ dp always lazily freeable mapping
== =======================================
Note that there is no guarantee that every flag and associated mnemonic will
diff --git a/Documentation/filesystems/propagate_umount.txt b/Documentation/filesystems/propagate_umount.txt
new file mode 100644
index 000000000000..c90349e5b889
--- /dev/null
+++ b/Documentation/filesystems/propagate_umount.txt
@@ -0,0 +1,484 @@
+ Notes on propagate_umount()
+
+Umount propagation starts with a set of mounts we are already going to
+take out. Ideally, we would like to add all downstream cognates to
+that set - anything with the same mountpoint as one of the removed
+mounts and with parent that would receive events from the parent of that
+mount. However, there are some constraints the resulting set must
+satisfy.
+
+It is convenient to define several properties of sets of mounts:
+
+1) A set S of mounts is non-shifting if for any mount X belonging
+to S all subtrees mounted strictly inside of X (i.e. not overmounting
+the root of X) contain only elements of S.
+
+2) A set S is non-revealing if all locked mounts that belong to S have
+parents that also belong to S.
+
+3) A set S is closed if it contains all children of its elements.
+
+The set of mounts taken out by umount(2) must be non-shifting and
+non-revealing; the first constraint is what allows to reparent
+any remaining mounts and the second is what prevents the exposure
+of any concealed mountpoints.
+
+propagate_umount() takes the original set as an argument and tries to
+extend that set. The original set is a full subtree and its root is
+unlocked; what matters is that it's closed and non-revealing.
+Resulting set may not be closed; there might still be mounts outside
+of that set, but only on top of stacks of root-overmounting elements
+of set. They can be reparented to the place where the bottom of
+stack is attached to a mount that will survive. NOTE: doing that
+will violate a constraint on having no more than one mount with
+the same parent/mountpoint pair; however, the caller (umount_tree())
+will immediately remedy that - it may keep unmounted element attached
+to parent, but only if the parent itself is unmounted. Since all
+conflicts created by reparenting have common parent *not* in the
+set and one side of the conflict (bottom of the stack of overmounts)
+is in the set, it will be resolved. However, we rely upon umount_tree()
+doing that pretty much immediately after the call of propagate_umount().
+
+Algorithm is based on two statements:
+ 1) for any set S, there is a maximal non-shifting subset of S
+and it can be calculated in O(#S) time.
+ 2) for any non-shifting set S, there is a maximal non-revealing
+subset of S. That subset is also non-shifting and it can be calculated
+in O(#S) time.
+
+ Finding candidates.
+
+We are given a closed set U and we want to find all mounts that have
+the same mountpoint as some mount m in U *and* whose parent receives
+propagation from the parent of the same mount m. Naive implementation
+would be
+ S = {}
+ for each m in U
+ add m to S
+ p = parent(m)
+ for each q in Propagation(p) - {p}
+ child = look_up(q, mountpoint(m))
+ if child
+ add child to S
+but that can lead to excessive work - there might be propagation among the
+subtrees of U, in which case we'd end up examining the same candidates
+many times. Since propagation is transitive, the same will happen to
+everything downstream of that candidate and it's not hard to construct
+cases where the approach above leads to the time quadratic by the actual
+number of candidates.
+
+Note that if we run into a candidate we'd already seen, it must've been
+added on an earlier iteration of the outer loop - all additions made
+during one iteration of the outer loop have different parents. So
+if we find a child already added to the set, we know that everything
+in Propagation(parent(child)) with the same mountpoint has been already
+added.
+ S = {}
+ for each m in U
+ if m in S
+ continue
+ add m to S
+ p = parent(m)
+ q = propagation_next(p, p)
+ while q
+ child = look_up(q, mountpoint(m))
+ if child
+ if child in S
+ q = skip_them(q, p)
+ continue;
+ add child to S
+ q = propagation_next(q, p)
+where
+skip_them(q, p)
+ keep walking Propagation(p) from q until we find something
+ not in Propagation(q)
+
+would get rid of that problem, but we need a sane implementation of
+skip_them(). That's not hard to do - split propagation_next() into
+"down into mnt_slave_list" and "forward-and-up" parts, with the
+skip_them() being "repeat the forward-and-up part until we get NULL
+or something that isn't a peer of the one we are skipping".
+
+Note that there can be no absolute roots among the extra candidates -
+they all come from mount lookups. Absolute root among the original
+set is _currently_ impossible, but it might be worth protecting
+against.
+
+ Maximal non-shifting subsets.
+
+Let's call a mount m in a set S forbidden in that set if there is a
+subtree mounted strictly inside m and containing mounts that do not
+belong to S.
+
+The set is non-shifting when none of its elements are forbidden in it.
+
+If mount m is forbidden in a set S, it is forbidden in any subset S' it
+belongs to. In other words, it can't belong to any of the non-shifting
+subsets of S. If we had a way to find a forbidden mount or show that
+there's none, we could use it to find the maximal non-shifting subset
+simply by finding and removing them until none remain.
+
+Suppose mount m is forbidden in S; then any mounts forbidden in S - {m}
+must have been forbidden in S itself. Indeed, since m has descendents
+that do not belong to S, any subtree that fits into S will fit into
+S - {m} as well.
+
+So in principle we could go through elements of S, checking if they
+are forbidden in S and removing the ones that are. Removals will
+not invalidate the checks done for earlier mounts - if they were not
+forbidden at the time we checked, they won't become forbidden later.
+It's too costly to be practical, but there is a similar approach that
+is linear by size of S.
+
+Let's say that mount x in a set S is forbidden by mount y, if
+ * both x and y belong to S.
+ * there is a chain of mounts starting at x and leaving S
+ immediately after passing through y, with the first
+ mountpoint strictly inside x.
+Note 1: x may be equal to y - that's the case when something not
+belonging to S is mounted strictly inside x.
+Note 2: if y does not belong to S, it can't forbid anything in S.
+Note 3: if y has no children outside of S, it can't forbid anything in S.
+
+It's easy to show that mount x is forbidden in S if and only if x is
+forbidden in S by some mount y. And it's easy to find all mounts in S
+forbidden by a given mount.
+
+Consider the following operation:
+ Trim(S, m) = S - {x : x is forbidden by m in S}
+
+Note that if m does not belong to S or has no children outside of S we
+are guaranteed that Trim(S, m) is equal to S.
+
+The following is true: if x is forbidden by y in Trim(S, m), it was
+already forbidden by y in S.
+
+Proof: Suppose x is forbidden by y in Trim(S, m). Then there is a
+chain of mounts (x_0 = x, ..., x_k = y, x_{k+1} = r), such that x_{k+1}
+is the first element that doesn't belong to Trim(S, m) and the
+mountpoint of x_1 is strictly inside x. If mount r belongs to S, it must
+have been removed by Trim(S, m), i.e. it was forbidden in S by m.
+Then there was a mount chain from r to some child of m that stayed in
+S all the way until m, but that's impossible since x belongs to Trim(S, m)
+and prepending (x_0, ..., x_k) to that chain demonstrates that x is also
+forbidden in S by m, and thus can't belong to Trim(S, m).
+Therefore r can not belong to S and our chain demonstrates that
+x is forbidden by y in S. QED.
+
+Corollary: no mount is forbidden by m in Trim(S, m). Indeed, any
+such mount would have been forbidden by m in S and thus would have been
+in the part of S removed in Trim(S, m).
+
+Corollary: no mount is forbidden by m in Trim(Trim(S, m), n). Indeed,
+any such would have to have been forbidden by m in Trim(S, m), which
+is impossible.
+
+Corollary: after
+ S = Trim(S, x_1)
+ S = Trim(S, x_2)
+ ...
+ S = Trim(S, x_k)
+no mount remaining in S will be forbidden by either of x_1,...,x_k.
+
+The following will reduce S to its maximal non-shifting subset:
+ visited = {}
+ while S contains elements not belonging to visited
+ let m be an arbitrary such element of S
+ S = Trim(S, m)
+ add m to visited
+
+S never grows, so the number of elements of S not belonging to visited
+decreases at least by one on each iteration. When the loop terminates,
+all mounts remaining in S belong to visited. It's easy to see that at
+the beginning of each iteration no mount remaining in S will be forbidden
+by any element of visited. In other words, no mount remaining in S will
+be forbidden, i.e. final value of S will be non-shifting. It will be
+the maximal non-shifting subset, since we were removing only forbidden
+elements.
+
+ There are two difficulties in implementing the above in linear
+time, both due to the fact that Trim() might need to remove more than one
+element. Naive implementation of Trim() is vulnerable to running into a
+long chain of mounts, each mounted on top of parent's root. Nothing in
+that chain is forbidden, so nothing gets removed from it. We need to
+recognize such chains and avoid walking them again on subsequent calls of
+Trim(), otherwise we will end up with worst-case time being quadratic by
+the number of elements in S. Another difficulty is in implementing the
+outer loop - we need to iterate through all elements of a shrinking set.
+That would be trivial if we never removed more than one element at a time
+(linked list, with list_for_each_entry_safe for iterator), but we may
+need to remove more than one entry, possibly including the ones we have
+already visited.
+
+ Let's start with naive algorithm for Trim():
+
+Trim_one(m)
+ found = false
+ for each n in children(m)
+ if n not in S
+ found = true
+ if (mountpoint(n) != root(m))
+ remove m from S
+ break
+ if found
+ Trim_ancestors(m)
+
+Trim_ancestors(m)
+ for (; parent(m) in S; m = parent(m)) {
+ if (mountpoint(m) != root(parent(m)))
+ remove parent(m) from S
+ }
+
+If m belongs to S, Trim_one(m) will replace S with Trim(S, m).
+Proof:
+ Consider the chains excluding elements from Trim(S, m). The last
+two elements in such chain are m and some child of m that does not belong
+to S. If m has no such children, Trim(S, m) is equal to S.
+ m itself is removed if and only if the chain has exactly two
+elements, i.e. when the last element does not overmount the root of m.
+In other words, that happens when m has a child not in S that does not
+overmount the root of m.
+ All other elements to remove will be ancestors of m, such that
+the entire descent chain from them to m is contained in S. Let
+(x_0, x_1, ..., x_k = m) be the longest such chain. x_i needs to be
+removed if and only if x_{i+1} does not overmount its root. It's easy
+to see that Trim_ancestors(m) will iterate through that chain from
+x_k to x_1 and that it will remove exactly the elements that need to be
+removed.
+
+ Note that if the loop in Trim_ancestors() walks into an already
+visited element, we are guaranteed that remaining iterations will see
+only elements that had already been visited and remove none of them.
+That's the weakness that makes it vulnerable to long chains of full
+overmounts.
+
+ It's easy to deal with, if we can afford setting marks on
+elements of S; we would mark all elements already visited by
+Trim_ancestors() and have it bail out as soon as it sees an already
+marked element.
+
+ The problems with iterating through the set can be dealt with in
+several ways, depending upon the representation we choose for our set.
+One useful observation is that we are given a closed subset in S - the
+original set passed to propagate_umount(). Its elements can neither
+forbid anything nor be forbidden by anything - all their descendents
+belong to S, so they can not occur anywhere in any excluding chain.
+In other words, the elements of that subset will remain in S until
+the end and Trim_one(S, m) is a no-op for all m from that subset.
+
+ That suggests keeping S as a disjoint union of a closed set U
+('will be unmounted, no matter what') and the set of all elements of
+S that do not belong to U. That set ('candidates') is all we need
+to iterate through. Let's represent it as a subset in a cyclic list,
+consisting of all list elements that are marked as candidates (initially -
+all of them). Then we could have Trim_ancestors() only remove the mark,
+leaving the elements on the list. Then Trim_one() would never remove
+anything other than its argument from the containing list, allowing to
+use list_for_each_entry_safe() as iterator.
+
+ Assuming that representation we get the following:
+
+ list_for_each_entry_safe(m, ..., Candidates, ...)
+ Trim_one(m)
+where
+Trim_one(m)
+ if (m is not marked as a candidate)
+ strip the "seen by Trim_ancestors" mark from m
+ remove m from the Candidates list
+ return
+
+ remove_this = false
+ found = false
+ for each n in children(m)
+ if n not in S
+ found = true
+ if (mountpoint(n) != root(m))
+ remove_this = true
+ break
+ if found
+ Trim_ancestors(m)
+ if remove_this
+ strip the "seen by Trim_ancestors" mark from m
+ strip the "candidate" mark from m
+ remove m from the Candidate list
+
+Trim_ancestors(m)
+ for (p = parent(m); p is marked as candidate ; m = p, p = parent(p)) {
+ if m is marked as seen by Trim_ancestors
+ return
+ mark m as seen by Trim_ancestors
+ if (mountpoint(m) != root(p))
+ strip the "candidate" mark from p
+ }
+
+ Terminating condition in the loop in Trim_ancestors() is correct,
+since that that loop will never run into p belonging to U - p is always
+an ancestor of argument of Trim_one() and since U is closed, the argument
+of Trim_one() would also have to belong to U. But Trim_one() is never
+called for elements of U. In other words, p belongs to S if and only
+if it belongs to candidates.
+
+ Time complexity:
+* we get no more than O(#S) calls of Trim_one()
+* the loop over children in Trim_one() never looks at the same child
+twice through all the calls.
+* iterations of that loop for children in S are no more than O(#S)
+in the worst case
+* at most two children that are not elements of S are considered per
+call of Trim_one().
+* the loop in Trim_ancestors() sets its mark once per iteration and
+no element of S has is set more than once.
+
+ In the end we may have some elements excluded from S by
+Trim_ancestors() still stuck on the list. We could do a separate
+loop removing them from the list (also no worse than O(#S) time),
+but it's easier to leave that until the next phase - there we will
+iterate through the candidates anyway.
+
+ The caller has already removed all elements of U from their parents'
+lists of children, which means that checking if child belongs to S is
+equivalent to checking if it's marked as a candidate; we'll never see
+the elements of U in the loop over children in Trim_one().
+
+ What's more, if we see that children(m) is empty and m is not
+locked, we can immediately move m into the committed subset (remove
+from the parent's list of children, etc.). That's one fewer mount we'll
+have to look into when we check the list of children of its parent *and*
+when we get to building the non-revealing subset.
+
+ Maximal non-revealing subsets
+
+If S is not a non-revealing subset, there is a locked element x in S
+such that parent of x is not in S.
+
+Obviously, no non-revealing subset of S may contain x. Removing such
+elements one by one will obviously end with the maximal non-revealing
+subset (possibly empty one). Note that removal of an element will
+require removal of all its locked children, etc.
+
+If the set had been non-shifting, it will remain non-shifting after
+such removals.
+Proof: suppose S was non-shifting, x is a locked element of S, parent of x
+is not in S and S - {x} is not non-shifting. Then there is an element m
+in S - {x} and a subtree mounted strictly inside m, such that m contains
+an element not in in S - {x}. Since S is non-shifting, everything in
+that subtree must belong to S. But that means that this subtree must
+contain x somewhere *and* that parent of x either belongs that subtree
+or is equal to m. Either way it must belong to S. Contradiction.
+
+// same representation as for finding maximal non-shifting subsets:
+// S is a disjoint union of a non-revealing set U (the ones we are committed
+// to unmount) and a set of candidates, represented as a subset of list
+// elements that have "is a candidate" mark on them.
+// Elements of U are removed from their parents' lists of children.
+// In the end candidates becomes empty and maximal non-revealing non-shifting
+// subset of S is now in U
+ while (Candidates list is non-empty)
+ handle_locked(first(Candidates))
+
+handle_locked(m)
+ if m is not marked as a candidate
+ strip the "seen by Trim_ancestors" mark from m
+ remove m from the list
+ return
+ cutoff = m
+ for (p = m; p in candidates; p = parent(p)) {
+ strip the "seen by Trim_ancestors" mark from p
+ strip the "candidate" mark from p
+ remove p from the Candidates list
+ if (!locked(p))
+ cutoff = parent(p)
+ }
+ if p in U
+ cutoff = p
+ while m != cutoff
+ remove m from children(parent(m))
+ add m to U
+ m = parent(m)
+
+Let (x_0, ..., x_n = m) be the maximal chain of descent of m within S.
+* If it contains some elements of U, let x_k be the last one of those.
+Then union of U with {x_{k+1}, ..., x_n} is obviously non-revealing.
+* otherwise if all its elements are locked, then none of {x_0, ..., x_n}
+may be elements of a non-revealing subset of S.
+* otherwise let x_k be the first unlocked element of the chain. Then none
+of {x_0, ..., x_{k-1}} may be an element of a non-revealing subset of
+S and union of U and {x_k, ..., x_n} is non-revealing.
+
+handle_locked(m) finds which of these cases applies and adjusts Candidates
+and U accordingly. U remains non-revealing, union of Candidates and
+U still contains any non-revealing subset of S and after the call of
+handle_locked(m) m is guaranteed to be not in Candidates list. So having
+it called for each element of S would suffice to empty Candidates,
+leaving U the maximal non-revealing subset of S.
+
+However, handle_locked(m) is a no-op when m belongs to U, so it's enough
+to have it called for elements of Candidates list until none remain.
+
+Time complexity: number of calls of handle_locked() is limited by
+#Candidates, each iteration of the first loop in handle_locked() removes
+an element from the list, so their total number of executions is also
+limited by #Candidates; number of iterations in the second loop is no
+greater than the number of iterations of the first loop.
+
+
+ Reparenting
+
+After we'd calculated the final set, we still need to deal with
+reparenting - if an element of the final set has a child not in it,
+we need to reparent such child.
+
+Such children can only be root-overmounting (otherwise the set wouldn't
+be non-shifting) and their parents can not belong to the original set,
+since the original is guaranteed to be closed.
+
+
+ Putting all of that together
+
+The plan is to
+ * find all candidates
+ * trim down to maximal non-shifting subset
+ * trim down to maximal non-revealing subset
+ * reparent anything that needs to be reparented
+ * return the resulting set to the caller
+
+For the 2nd and 3rd steps we want to separate the set into growing
+non-revealing subset, initially containing the original set ("U" in
+terms of the pseudocode above) and everything we are still not sure about
+("candidates"). It means that for the output of the 1st step we'd like
+the extra candidates separated from the stuff already in the original set.
+For the 4th step we would like the additions to U separate from the
+original set.
+
+So let's go for
+ * original set ("set"). Linkage via mnt_list
+ * undecided candidates ("candidates"). Subset of a list,
+consisting of all its elements marked with a new flag (T_UMOUNT_CANDIDATE).
+Initially all elements of the list will be marked that way; in the
+end the list will become empty and no mounts will remain marked with
+that flag.
+ * Reuse T_MARKED for "has been already seen by trim_ancestors()".
+ * anything in U that hadn't been in the original set - elements of
+candidates will gradually be either discarded or moved there. In other
+words, it's the candidates we have already decided to unmount. Its role
+is reasonably close to the old "to_umount", so let's use that name.
+Linkage via mnt_list.
+
+For gather_candidates() we'll need to maintain both candidates (S -
+set) and intersection of S with set. Use T_UMOUNT_CANDIDATE for
+all elements we encounter, putting the ones not already in the original
+set into the list of candidates. When we are done, strip that flag from
+all elements of the original set. That gives a cheap way to check
+if element belongs to S (in gather_candidates) and to candidates
+itself (at later stages). Call that predicate is_candidate(); it would
+be m->mnt_t_flags & T_UMOUNT_CANDIDATE.
+
+All elements of the original set are marked with MNT_UMOUNT and we'll
+need the same for elements added when joining the contents of to_umount
+to set in the end. Let's set MNT_UMOUNT at the time we add an element
+to to_umount; that's close to what the old 'umount_one' is doing, so
+let's keep that name. It also gives us another predicate we need -
+"belongs to union of set and to_umount"; will_be_unmounted() for now.
+
+Removals from the candidates list should strip both T_MARKED and
+T_UMOUNT_CANDIDATE; call it remove_from_candidates_list().
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index fd32a9a17bfb..57604b07bdc9 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -758,8 +758,9 @@ process is more complicated and uses write_begin/write_end or
dirty_folio to write data into the address_space, and
writepages to writeback data to storage.
-Adding and removing pages to/from an address_space is protected by the
-inode's i_mutex.
+Removing pages from an address_space requires holding the inode's i_rwsem
+exclusively, while adding pages to the address_space requires holding the
+inode's i_mapping->invalidate_lock exclusively.
When data is written to a page, the PG_Dirty flag should be set. It
typically remains set until writepages asks for it to be written. This
@@ -822,10 +823,10 @@ cache in your filesystem. The following members are defined:
int (*writepages)(struct address_space *, struct writeback_control *);
bool (*dirty_folio)(struct address_space *, struct folio *);
void (*readahead)(struct readahead_control *);
- int (*write_begin)(struct file *, struct address_space *mapping,
+ int (*write_begin)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
- int (*write_end)(struct file *, struct address_space *mapping,
+ struct page **pagep, void **fsdata);
+ int (*write_end)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst
index b8c801e0068c..cab2e81013bc 100644
--- a/Documentation/gpu/nouveau.rst
+++ b/Documentation/gpu/nouveau.rst
@@ -25,7 +25,7 @@ providing a consistent API to upper layers of the driver stack.
GSP Support
------------------------
-.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c
:doc: GSP message queue element
.. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
diff --git a/Documentation/hwmon/ina238.rst b/Documentation/hwmon/ina238.rst
index d1b93cf8627f..9a24da4786a4 100644
--- a/Documentation/hwmon/ina238.rst
+++ b/Documentation/hwmon/ina238.rst
@@ -65,7 +65,7 @@ Additional sysfs entries for sq52206
------------------------------------
======================= =======================================================
-energy1_input Energy measurement (mJ)
+energy1_input Energy measurement (uJ)
power1_input_highest Peak Power (uW)
======================= =======================================================
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 4cbfe666e6f5..b29d62eefa16 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema
# Common defines
$defs:
+ name:
+ type: string
+ pattern: ^[0-9a-z-]+$
uint:
type: integer
minimum: 0
@@ -76,7 +79,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
header:
description: For C-compatible languages, header which already defines this value.
type: string
@@ -103,7 +106,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
value:
type: integer
doc:
@@ -132,7 +135,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
type:
description: The netlink attribute type
enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ]
@@ -169,7 +172,7 @@ properties:
name:
description: |
Name used when referring to this space in other definitions, not used outside of the spec.
- type: string
+ $ref: '#/$defs/name'
name-prefix:
description: |
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
@@ -206,7 +209,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
type: &attr-type
description: The netlink attribute type
enum: [ unused, pad, flag, binary, bitfield32,
@@ -348,7 +351,7 @@ properties:
properties:
name:
description: Name of the operation, also defining its C enum value in uAPI.
- type: string
+ $ref: '#/$defs/name'
doc:
description: Documentation for the command.
type: string
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index 40efbbad76ab..7b1ec153e834 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema
# Common defines
$defs:
+ name:
+ type: string
+ pattern: ^[0-9a-z-]+$
uint:
type: integer
minimum: 0
@@ -29,7 +32,7 @@ additionalProperties: False
properties:
name:
description: Name of the genetlink family.
- type: string
+ $ref: '#/$defs/name'
doc:
type: string
protocol:
@@ -48,7 +51,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
header:
description: For C-compatible languages, header which already defines this value.
type: string
@@ -75,7 +78,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
value:
type: integer
doc:
@@ -96,7 +99,7 @@ properties:
name:
description: |
Name used when referring to this space in other definitions, not used outside of the spec.
- type: string
+ $ref: '#/$defs/name'
name-prefix:
description: |
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
@@ -121,7 +124,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
type: &attr-type
enum: [ unused, pad, flag, binary,
uint, sint, u8, u16, u32, u64, s8, s16, s32, s64,
@@ -243,7 +246,7 @@ properties:
properties:
name:
description: Name of the operation, also defining its C enum value in uAPI.
- type: string
+ $ref: '#/$defs/name'
doc:
description: Documentation for the command.
type: string
@@ -327,7 +330,7 @@ properties:
name:
description: |
The name for the group, used to form the define and the value of the define.
- type: string
+ $ref: '#/$defs/name'
flags: *cmd_flags
kernel-family:
diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml
index e34bf23897fa..246fa07bccf6 100644
--- a/Documentation/netlink/netlink-raw.yaml
+++ b/Documentation/netlink/netlink-raw.yaml
@@ -6,6 +6,12 @@ $schema: https://json-schema.org/draft-07/schema
# Common defines
$defs:
+ name:
+ type: string
+ pattern: ^[0-9a-z-]+$
+ name-cap:
+ type: string
+ pattern: ^[0-9a-zA-Z-]+$
uint:
type: integer
minimum: 0
@@ -71,7 +77,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
header:
description: For C-compatible languages, header which already defines this value.
type: string
@@ -98,7 +104,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
value:
type: integer
doc:
@@ -124,7 +130,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name-cap'
type:
description: |
The netlink attribute type. Members of type 'binary' or 'pad'
@@ -183,7 +189,7 @@ properties:
name:
description: |
Name used when referring to this space in other definitions, not used outside of the spec.
- type: string
+ $ref: '#/$defs/name'
name-prefix:
description: |
Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
@@ -220,7 +226,7 @@ properties:
additionalProperties: False
properties:
name:
- type: string
+ $ref: '#/$defs/name'
type: &attr-type
description: The netlink attribute type
enum: [ unused, pad, flag, binary, bitfield32,
@@ -408,7 +414,7 @@ properties:
properties:
name:
description: Name of the operation, also defining its C enum value in uAPI.
- type: string
+ $ref: '#/$defs/name'
doc:
description: Documentation for the command.
type: string
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index 05fee1b7fe19..38ddc04f9e6d 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -38,15 +38,15 @@ definitions:
-
name: dsa
-
- name: pci_pf
+ name: pci-pf
-
- name: pci_vf
+ name: pci-vf
-
name: virtual
-
name: unused
-
- name: pci_sf
+ name: pci-sf
-
type: enum
name: port-fn-state
@@ -220,7 +220,7 @@ definitions:
-
name: flag
-
- name: nul_string
+ name: nul-string
value: 10
-
name: binary
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
index 8feefeae5376..f434140b538e 100644
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -188,7 +188,7 @@ definitions:
value: 10000
-
type: const
- name: pin-frequency-77_5-khz
+ name: pin-frequency-77-5-khz
value: 77500
-
type: const
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 9f98715a6512..348c6ad548f5 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -7,6 +7,9 @@ protocol: genetlink-legacy
doc: Partial family for Ethtool Netlink.
uapi-header: linux/ethtool_netlink_generated.h
+c-family-name: ethtool-genl-name
+c-version-name: ethtool-genl-version
+
definitions:
-
name: udp-tunnel-type
@@ -45,7 +48,7 @@ definitions:
name: started
doc: The firmware flashing process has started.
-
- name: in_progress
+ name: in-progress
doc: The firmware flashing process is in progress.
-
name: completed
@@ -1419,7 +1422,7 @@ attribute-sets:
name: hkey
type: binary
-
- name: input_xfrm
+ name: input-xfrm
type: u32
-
name: start-context
@@ -2235,7 +2238,7 @@ operations:
- hfunc
- indir
- hkey
- - input_xfrm
+ - input-xfrm
dump:
request:
attributes:
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index 0af5ab842c04..b02ab19817d3 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -15,7 +15,7 @@ kernel-policy: global
definitions:
-
type: enum
- name: encap_type
+ name: encap-type
name-prefix: fou-encap-
enum-name:
entries: [ unspec, direct, gue ]
@@ -43,26 +43,26 @@ attribute-sets:
name: type
type: u8
-
- name: remcsum_nopartial
+ name: remcsum-nopartial
type: flag
-
- name: local_v4
+ name: local-v4
type: u32
-
- name: local_v6
+ name: local-v6
type: binary
checks:
min-len: 16
-
- name: peer_v4
+ name: peer-v4
type: u32
-
- name: peer_v6
+ name: peer-v6
type: binary
checks:
min-len: 16
-
- name: peer_port
+ name: peer-port
type: u16
byte-order: big-endian
-
@@ -90,12 +90,12 @@ operations:
- port
- ipproto
- type
- - remcsum_nopartial
- - local_v4
- - peer_v4
- - local_v6
- - peer_v6
- - peer_port
+ - remcsum-nopartial
+ - local-v4
+ - peer-v4
+ - local-v6
+ - peer-v6
+ - peer-port
- ifindex
-
@@ -112,11 +112,11 @@ operations:
- af
- ifindex
- port
- - peer_port
- - local_v4
- - peer_v4
- - local_v6
- - peer_v6
+ - peer-port
+ - local-v4
+ - peer-v4
+ - local-v6
+ - peer-v6
-
name: get
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index dfd017780d2f..fb57860fe778 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -57,21 +57,21 @@ definitions:
doc: >-
A new subflow has been established. 'error' should not be set.
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
- daddr6, sport, dport, backup, if_idx [, error].
+ daddr6, sport, dport, backup, if-idx [, error].
-
name: sub-closed
doc: >-
A subflow has been closed. An error (copy of sk_err) could be set if an
error has been detected for this subflow.
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
- daddr6, sport, dport, backup, if_idx [, error].
+ daddr6, sport, dport, backup, if-idx [, error].
-
name: sub-priority
value: 13
doc: >-
The priority of a subflow has changed. 'error' should not be set.
Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
- daddr6, sport, dport, backup, if_idx [, error].
+ daddr6, sport, dport, backup, if-idx [, error].
-
name: listener-created
value: 15
@@ -255,7 +255,7 @@ attribute-sets:
name: timeout
type: u32
-
- name: if_idx
+ name: if-idx
type: u32
-
name: reset-reason
diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml
index c87658114852..8d1a3c01708f 100644
--- a/Documentation/netlink/specs/nfsd.yaml
+++ b/Documentation/netlink/specs/nfsd.yaml
@@ -27,7 +27,7 @@ attribute-sets:
name: proc
type: u32
-
- name: service_time
+ name: service-time
type: s64
-
name: pad
@@ -139,7 +139,7 @@ operations:
- prog
- version
- proc
- - service_time
+ - service-time
- saddr4
- daddr4
- saddr6
diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml
index 096c51f0c69a..ba76426a542d 100644
--- a/Documentation/netlink/specs/ovpn.yaml
+++ b/Documentation/netlink/specs/ovpn.yaml
@@ -161,6 +161,66 @@ attribute-sets:
type: uint
doc: Number of packets transmitted at the transport level
-
+ name: peer-new-input
+ subset-of: peer
+ attributes:
+ -
+ name: id
+ -
+ name: remote-ipv4
+ -
+ name: remote-ipv6
+ -
+ name: remote-ipv6-scope-id
+ -
+ name: remote-port
+ -
+ name: socket
+ -
+ name: vpn-ipv4
+ -
+ name: vpn-ipv6
+ -
+ name: local-ipv4
+ -
+ name: local-ipv6
+ -
+ name: keepalive-interval
+ -
+ name: keepalive-timeout
+ -
+ name: peer-set-input
+ subset-of: peer
+ attributes:
+ -
+ name: id
+ -
+ name: remote-ipv4
+ -
+ name: remote-ipv6
+ -
+ name: remote-ipv6-scope-id
+ -
+ name: remote-port
+ -
+ name: vpn-ipv4
+ -
+ name: vpn-ipv6
+ -
+ name: local-ipv4
+ -
+ name: local-ipv6
+ -
+ name: keepalive-interval
+ -
+ name: keepalive-timeout
+ -
+ name: peer-del-input
+ subset-of: peer
+ attributes:
+ -
+ name: id
+ -
name: keyconf
attributes:
-
@@ -216,6 +276,33 @@ attribute-sets:
obtain the actual cipher IV
checks:
exact-len: nonce-tail-size
+
+ -
+ name: keyconf-get
+ subset-of: keyconf
+ attributes:
+ -
+ name: peer-id
+ -
+ name: slot
+ -
+ name: key-id
+ -
+ name: cipher-alg
+ -
+ name: keyconf-swap-input
+ subset-of: keyconf
+ attributes:
+ -
+ name: peer-id
+ -
+ name: keyconf-del-input
+ subset-of: keyconf
+ attributes:
+ -
+ name: peer-id
+ -
+ name: slot
-
name: ovpn
attributes:
@@ -235,12 +322,66 @@ attribute-sets:
type: nest
doc: Peer specific cipher configuration
nested-attributes: keyconf
+ -
+ name: ovpn-peer-new-input
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: peer
+ nested-attributes: peer-new-input
+ -
+ name: ovpn-peer-set-input
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: peer
+ nested-attributes: peer-set-input
+ -
+ name: ovpn-peer-del-input
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: peer
+ nested-attributes: peer-del-input
+ -
+ name: ovpn-keyconf-get
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: keyconf
+ nested-attributes: keyconf-get
+ -
+ name: ovpn-keyconf-swap-input
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: keyconf
+ nested-attributes: keyconf-swap-input
+ -
+ name: ovpn-keyconf-del-input
+ subset-of: ovpn
+ attributes:
+ -
+ name: ifindex
+ -
+ name: keyconf
+ nested-attributes: keyconf-del-input
operations:
list:
-
name: peer-new
- attribute-set: ovpn
+ attribute-set: ovpn-peer-new-input
flags: [ admin-perm ]
doc: Add a remote peer
do:
@@ -252,7 +393,7 @@ operations:
- peer
-
name: peer-set
- attribute-set: ovpn
+ attribute-set: ovpn-peer-set-input
flags: [ admin-perm ]
doc: modify a remote peer
do:
@@ -286,7 +427,7 @@ operations:
- peer
-
name: peer-del
- attribute-set: ovpn
+ attribute-set: ovpn-peer-del-input
flags: [ admin-perm ]
doc: Delete existing remote peer
do:
@@ -316,7 +457,7 @@ operations:
- keyconf
-
name: key-get
- attribute-set: ovpn
+ attribute-set: ovpn-keyconf-get
flags: [ admin-perm ]
doc: Retrieve non-sensitive data about peer key and cipher
do:
@@ -331,7 +472,7 @@ operations:
- keyconf
-
name: key-swap
- attribute-set: ovpn
+ attribute-set: ovpn-keyconf-swap-input
flags: [ admin-perm ]
doc: Swap primary and secondary session keys for a specific peer
do:
@@ -350,7 +491,7 @@ operations:
mcgrp: peers
-
name: key-del
- attribute-set: ovpn
+ attribute-set: ovpn-keyconf-del-input
flags: [ admin-perm ]
doc: Delete cipher key for a specific peer
do:
diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
index 46f5d1cd8a5f..7974aa7d8905 100644
--- a/Documentation/netlink/specs/ovs_flow.yaml
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -216,7 +216,7 @@ definitions:
type: struct
members:
-
- name: nd_target
+ name: nd-target
type: binary
len: 16
byte-order: big-endian
@@ -258,12 +258,12 @@ definitions:
type: struct
members:
-
- name: vlan_tpid
+ name: vlan-tpid
type: u16
byte-order: big-endian
doc: Tag protocol identifier (TPID) to push.
-
- name: vlan_tci
+ name: vlan-tci
type: u16
byte-order: big-endian
doc: Tag control identifier (TCI) to push.
diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml
index b41b31eebcae..28c4cf66517c 100644
--- a/Documentation/netlink/specs/rt-link.yaml
+++ b/Documentation/netlink/specs/rt-link.yaml
@@ -603,7 +603,7 @@ definitions:
name: optmask
type: u32
-
- name: if_stats_msg
+ name: if-stats-msg
type: struct
members:
-
@@ -2486,7 +2486,7 @@ operations:
name: getstats
doc: Get / dump link stats.
attribute-set: stats-attrs
- fixed-header: if_stats_msg
+ fixed-header: if-stats-msg
do:
request:
value: 94
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index cb7ea7d62e56..42d74c9aeb54 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -232,7 +232,7 @@ definitions:
type: u8
doc: log(P_max / (qth-max - qth-min))
-
- name: Scell_log
+ name: Scell-log
type: u8
doc: cell size for idle damping
-
@@ -253,7 +253,7 @@ definitions:
name: DPs
type: u32
-
- name: def_DP
+ name: def-DP
type: u32
-
name: grio
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index af7db0e91f6b..a52850602cd8 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -66,7 +66,7 @@ Admin Function driver
As mentioned above RVU PF0 is called the admin function (AF), this driver
supports resource provisioning and configuration of functional blocks.
Doesn't handle any I/O. It sets up few basic stuff but most of the
-funcionality is achieved via configuration requests from PFs and VFs.
+functionality is achieved via configuration requests from PFs and VFs.
PF/VFs communicates with AF via a shared memory region (mailbox). Upon
receiving requests AF does resource provisioning and other HW configuration.
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index c7904a1bc167..36cc7afc2527 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -16,11 +16,13 @@ User interface
Creating a TLS connection
-------------------------
-First create a new TCP socket and set the TLS ULP.
+First create a new TCP socket and once the connection is established set the
+TLS ULP.
.. code-block:: c
sock = socket(AF_INET, SOCK_STREAM, 0);
+ connect(sock, addr, addrlen);
setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls"));
Setting the TLS ULP allows us to set/get TLS socket options. Currently
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index da6bf0f6d01e..34e00848e0da 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -290,6 +290,7 @@ an involved disclosed party. The current ambassadors list:
AMD Tom Lendacky <thomas.lendacky@amd.com>
Ampere Darren Hart <darren@os.amperecomputing.com>
ARM Catalin Marinas <catalin.marinas@arm.com>
+ IBM Power Madhavan Srinivasan <maddy@linux.ibm.com>
IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 1ac62dc3a66f..e1755610b4bc 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork
(as of patchwork 2.2.2).
Co-posting selftests
---------------------
+~~~~~~~~~~~~~~~~~~~~
Selftests should be part of the same series as the code changes.
Specifically for fixes both code change and related test should go into
diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst
index 2aa0791bcefe..d0191c8b8060 100644
--- a/Documentation/security/credentials.rst
+++ b/Documentation/security/credentials.rst
@@ -555,5 +555,5 @@ the VFS, and that can be done by calling into such as ``vfs_mkdir()`` with a
different set of credentials. This is done in the following places:
* ``sys_faccessat()``.
- * ``do_coredump()``.
+ * ``vfs_coredump()``.
* nfs4recover.c.
diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst
index 98c6f6c74394..57d1964453e1 100644
--- a/Documentation/sound/codecs/cs35l56.rst
+++ b/Documentation/sound/codecs/cs35l56.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0-only
-=====================================================================
-Audio drivers for Cirrus Logic CS35L54/56/57 Boosted Smart Amplifiers
-=====================================================================
+========================================================================
+Audio drivers for Cirrus Logic CS35L54/56/57/63 Boosted Smart Amplifiers
+========================================================================
:Copyright: 2025 Cirrus Logic, Inc. and
Cirrus Logic International Semiconductor Ltd.
@@ -13,11 +13,11 @@ Summary
The high-level summary of this document is:
-**If you have a laptop that uses CS35L54/56/57 amplifiers but audio is not
+**If you have a laptop that uses CS35L54/56/57/63 amplifiers but audio is not
working, DO NOT ATTEMPT TO USE FIRMWARE AND SETTINGS FROM ANOTHER LAPTOP,
EVEN IF THAT LAPTOP SEEMS SIMILAR.**
-The CS35L54/56/57 amplifiers must be correctly configured for the power
+The CS35L54/56/57/63 amplifiers must be correctly configured for the power
supply voltage, speaker impedance, maximum speaker voltage/current, and
other external hardware connections.
@@ -34,6 +34,7 @@ The cs35l56 drivers support:
* CS35L54
* CS35L56
* CS35L57
+* CS35L63
There are two drivers in the kernel
@@ -104,6 +105,13 @@ In this example the SSID is 10280c63.
The format of the firmware file names is:
+SoundWire (except CS35L56 Rev B0):
+ cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u?
+
+SoundWire CS35L56 Rev B0:
+ cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN
+
+Non-SoundWire (HDA and I2S):
cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN
Where:
@@ -111,12 +119,18 @@ Where:
* cs35lxx-b0 is the amplifier model and silicon revision. This information
is logged by the driver during initialization.
* SSID is the 8-digit hexadecimal SSID value.
+ * l?u? is the physical address on the SoundWire bus of the amp this
+ file applies to.
* ampN is the amplifier number (for example amp1). This is the same as
the prefix on the ALSA control names except that it is always lower-case
in the file name.
* spkidX is an optional part, used for laptops that have firmware
configurations for different makes and models of internal speakers.
+The CS35L56 Rev B0 continues to use the old filename scheme because a
+large number of firmware files have already been published with these
+names.
+
Sound Open Firmware and ALSA topology files
-------------------------------------------
diff --git a/Documentation/translations/zh_CN/security/credentials.rst b/Documentation/translations/zh_CN/security/credentials.rst
index 91c353dfb622..88fcd9152ffe 100644
--- a/Documentation/translations/zh_CN/security/credentials.rst
+++ b/Documentation/translations/zh_CN/security/credentials.rst
@@ -475,5 +475,5 @@ const指针上操作,因此不需要进行类型转换,但需要临时放弃
如 ``vfs_mkdir()`` 来实现。以下是一些进行此操作的位置:
* ``sys_faccessat()``.
- * ``do_coredump()``.
+ * ``vfs_coredump()``.
* nfs4recover.c.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1bd2d42e6424..544fb11351d9 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2008,6 +2008,13 @@ If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
be used as a vm ioctl to set the initial tsc frequency of subsequently
created vCPUs.
+For TSC protected Confidential Computing (CoCo) VMs where TSC frequency
+is configured once at VM scope and remains unchanged during VM's
+lifetime, the vm ioctl should be used to configure the TSC frequency
+and the vcpu ioctl is not supported.
+
+Example of such CoCo VMs: TDX guests.
+
4.56 KVM_GET_TSC_KHZ
--------------------
@@ -6645,7 +6652,8 @@ to the byte array.
.. note::
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
- KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
+ KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX,
+ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
operations are complete (and guest state is consistent) only after userspace
has re-entered the kernel with KVM_RUN. The kernel side will first finish
incomplete operations and then check for pending signals.
@@ -7176,6 +7184,69 @@ The valid value for 'flags' is:
::
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ u64 ret;
+ u64 data[5];
+ } unknown;
+ struct {
+ u64 ret;
+ u64 gpa;
+ u64 size;
+ } get_quote;
+ struct {
+ u64 ret;
+ u64 leaf;
+ u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ u64 ret;
+ u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
+
+Process a TDVMCALL from the guest. KVM forwards select TDVMCALL based
+on the Guest-Hypervisor Communication Interface (GHCI) specification;
+KVM bridges these requests to the userspace VMM with minimal changes,
+placing the inputs in the union and copying them back to the guest
+on re-entry.
+
+Flags are currently always zero, whereas ``nr`` contains the TDVMCALL
+number from register R11. The remaining field of the union provide the
+inputs and outputs of the TDVMCALL. Currently the following values of
+``nr`` are defined:
+
+ * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
+ signed by a service hosting TD-Quoting Enclave operating on the host.
+ Parameters and return value are in the ``get_quote`` field of the union.
+ The ``gpa`` field and ``size`` specify the guest physical address
+ (without the shared bit set) and the size of a shared-memory buffer, in
+ which the TDX guest passes a TD Report. The ``ret`` field represents
+ the return value of the GetQuote request. When the request has been
+ queued successfully, the TDX guest can poll the status field in the
+ shared-memory area to check whether the Quote generation is completed or
+ not. When completed, the generated Quote is returned via the same buffer.
+
+ * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
+ status of TDVMCALLs. The output values for the given leaf should be
+ placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
+ field of the union.
+
+ * ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to
+ set up a notification interrupt for vector ``vector``.
+
+KVM may add support for more values in the future that may cause a userspace
+exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
+it will enter with output fields already valid; in the common case, the
+``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
+Userspace need not do anything if it does not wish to support a TDVMCALL.
+::
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/Documentation/virt/kvm/review-checklist.rst b/Documentation/virt/kvm/review-checklist.rst
index dc01aea4057b..debac54e14e7 100644
--- a/Documentation/virt/kvm/review-checklist.rst
+++ b/Documentation/virt/kvm/review-checklist.rst
@@ -7,7 +7,7 @@ Review checklist for kvm patches
1. The patch must follow Documentation/process/coding-style.rst and
Documentation/process/submitting-patches.rst.
-2. Patches should be against kvm.git master branch.
+2. Patches should be against kvm.git master or next branches.
3. If the patch introduces or modifies a new userspace API:
- the API must be documented in Documentation/virt/kvm/api.rst
@@ -18,10 +18,10 @@ Review checklist for kvm patches
5. New features must default to off (userspace should explicitly request them).
Performance improvements can and should default to on.
-6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2
+6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2,
+ or its equivalent for non-x86 architectures
-7. Emulator changes should be accompanied by unit tests for qemu-kvm.git
- kvm/test directory.
+7. The feature should be testable (see below).
8. Changes should be vendor neutral when possible. Changes to common code
are better than duplicating changes to vendor code.
@@ -36,6 +36,87 @@ Review checklist for kvm patches
11. New guest visible features must either be documented in a hardware manual
or be accompanied by documentation.
-12. Features must be robust against reset and kexec - for example, shared
- host/guest memory must be unshared to prevent the host from writing to
- guest memory that the guest has not reserved for this purpose.
+Testing of KVM code
+-------------------
+
+All features contributed to KVM, and in many cases bugfixes too, should be
+accompanied by some kind of tests and/or enablement in open source guests
+and VMMs. KVM is covered by multiple test suites:
+
+*Selftests*
+ These are low level tests that allow granular testing of kernel APIs.
+ This includes API failure scenarios, invoking APIs after specific
+ guest instructions, and testing multiple calls to ``KVM_CREATE_VM``
+ within a single test. They are included in the kernel tree at
+ ``tools/testing/selftests/kvm``.
+
+``kvm-unit-tests``
+ A collection of small guests that test CPU and emulated device features
+ from a guest's perspective. They run under QEMU or ``kvmtool``, and
+ are generally not KVM-specific: they can be run with any accelerator
+ that QEMU support or even on bare metal, making it possible to compare
+ behavior across hypervisors and processor families.
+
+Functional test suites
+ Various sets of functional tests exist, such as QEMU's ``tests/functional``
+ suite and `avocado-vt <https://avocado-vt.readthedocs.io/en/latest/>`__.
+ These typically involve running a full operating system in a virtual
+ machine.
+
+The best testing approach depends on the feature's complexity and
+operation. Here are some examples and guidelines:
+
+New instructions (no new registers or APIs)
+ The corresponding CPU features (if applicable) should be made available
+ in QEMU. If the instructions require emulation support or other code in
+ KVM, it is worth adding coverage to ``kvm-unit-tests`` or selftests;
+ the latter can be a better choice if the instructions relate to an API
+ that already has good selftest coverage.
+
+New hardware features (new registers, no new APIs)
+ These should be tested via ``kvm-unit-tests``; this more or less implies
+ supporting them in QEMU and/or ``kvmtool``. In some cases selftests
+ can be used instead, similar to the previous case, or specifically to
+ test corner cases in guest state save/restore.
+
+Bug fixes and performance improvements
+ These usually do not introduce new APIs, but it's worth sharing
+ any benchmarks and tests that will validate your contribution,
+ ideally in the form of regression tests. Tests and benchmarks
+ can be included in either ``kvm-unit-tests`` or selftests, depending
+ on the specifics of your change. Selftests are especially useful for
+ regression tests because they are included directly in Linux's tree.
+
+Large scale internal changes
+ While it's difficult to provide a single policy, you should ensure that
+ the changed code is covered by either ``kvm-unit-tests`` or selftests.
+ In some cases the affected code is run for any guests and functional
+ tests suffice. Explain your testing process in the cover letter,
+ as that can help identify gaps in existing test suites.
+
+New APIs
+ It is important to demonstrate your use case. This can be as simple as
+ explaining that the feature is already in use on bare metal, or it can be
+ a proof-of-concept implementation in userspace. The latter need not be
+ open source, though that is of course preferrable for easier testing.
+ Selftests should test corner cases of the APIs, and should also cover
+ basic host and guest operation if no open source VMM uses the feature.
+
+Bigger features, usually spanning host and guest
+ These should be supported by Linux guests, with limited exceptions for
+ Hyper-V features that are testable on Windows guests. It is strongly
+ suggested that the feature be usable with an open source host VMM, such
+ as at least one of QEMU or crosvm, and guest firmware. Selftests should
+ test at least API error cases. Guest operation can be covered by
+ either selftests of ``kvm-unit-tests`` (this is especially important for
+ paravirtualized and Windows-only features). Strong selftest coverage
+ can also be a replacement for implementation in an open source VMM,
+ but this is generally not recommended.
+
+Following the above suggestions for testing in selftests and
+``kvm-unit-tests`` will make it easier for the maintainers to review
+and accept your code. In fact, even before you contribute your changes
+upstream it will make it easier for you to develop for KVM.
+
+Of course, the KVM maintainers reserve the right to require more tests,
+though they may also waive the requirement from time to time.
diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst
index 76bdd95334d6..5efac62c92c7 100644
--- a/Documentation/virt/kvm/x86/intel-tdx.rst
+++ b/Documentation/virt/kvm/x86/intel-tdx.rst
@@ -79,7 +79,20 @@ to be configured to the TDX guest.
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to
+ * userspace, respectively
+ */
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+
+ /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel
+ * and forwarded to userspace, respectively
+ */
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/Documentation/wmi/acpi-interface.rst b/Documentation/wmi/acpi-interface.rst
index f1b28835d23c..1ef003b033bf 100644
--- a/Documentation/wmi/acpi-interface.rst
+++ b/Documentation/wmi/acpi-interface.rst
@@ -36,7 +36,7 @@ Offset Size (in bytes) Content
The WMI object flags control whether the method or notification ID is used:
-- 0x1: Data block usage is expensive and must be explicitly enabled/disabled.
+- 0x1: Data block is expensive to collect.
- 0x2: Data block contains WMI methods.
- 0x4: Data block contains ASCIZ string.
- 0x8: Data block describes a WMI event, use notification ID instead
@@ -83,14 +83,18 @@ event as hexadecimal value. Their first parameter is an integer with a value
of 0 if the WMI event should be disabled, other values will enable
the WMI event.
+Those ACPI methods are always called even for WMI events not registered as
+being expensive to collect to match the behavior of the Windows driver.
+
WCxx ACPI methods
-----------------
-Similar to the ``WExx`` ACPI methods, except that it controls data collection
-instead of events and thus the last two characters of the ACPI method name are
-the method ID of the data block to enable/disable.
+Similar to the ``WExx`` ACPI methods, except that instead of WMI events it controls
+data collection of data blocks registered as being expensive to collect. Thus the
+last two characters of the ACPI method name are the method ID of the data block
+to enable/disable.
Those ACPI methods are also called before setting data blocks to match the
-behaviour of the Windows driver.
+behavior of the Windows driver.
_WED ACPI method
----------------
diff --git a/MAINTAINERS b/MAINTAINERS
index a92290fffa16..c0b444e5fd5a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -207,7 +207,7 @@ X: arch/*/include/uapi/
X: include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-hwmon@vger.kernel.org
S: Maintained
F: drivers/hwmon/abituguru.c
@@ -371,7 +371,7 @@ S: Maintained
F: drivers/platform/x86/quickstart.c
ACPI SERIAL MULTI INSTANTIATE DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/serial-multi-instantiate.c
@@ -1157,7 +1157,6 @@ F: arch/x86/include/asm/amd/node.h
F: arch/x86/kernel/amd_node.c
AMD PDS CORE DRIVER
-M: Shannon Nelson <shannon.nelson@amd.com>
M: Brett Creeley <brett.creeley@amd.com>
L: netdev@vger.kernel.org
S: Maintained
@@ -3551,7 +3550,7 @@ F: arch/arm64/boot/Makefile
F: scripts/make_fit.py
ARM64 PLATFORM DRIVERS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
R: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
L: platform-driver-x86@vger.kernel.org
@@ -3712,7 +3711,7 @@ F: drivers/platform/x86/asus*.c
F: drivers/platform/x86/eeepc*.c
ASUS TF103C DOCK DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
@@ -4182,6 +4181,7 @@ F: include/linux/cpumask_types.h
F: include/linux/find.h
F: include/linux/nodemask.h
F: include/linux/nodemask_types.h
+F: include/uapi/linux/bits.h
F: include/vdso/bits.h
F: lib/bitmap-str.c
F: lib/bitmap.c
@@ -4194,6 +4194,7 @@ F: tools/include/linux/bitfield.h
F: tools/include/linux/bitmap.h
F: tools/include/linux/bits.h
F: tools/include/linux/find.h
+F: tools/include/uapi/linux/bits.h
F: tools/include/vdso/bits.h
F: tools/lib/bitmap.c
F: tools/lib/find_bit.c
@@ -4555,6 +4556,7 @@ BPF [NETWORKING] (tcx & tc BPF, sock_addr)
M: Martin KaFai Lau <martin.lau@linux.dev>
M: Daniel Borkmann <daniel@iogearbox.net>
R: John Fastabend <john.fastabend@gmail.com>
+R: Stanislav Fomichev <sdf@fomichev.me>
L: bpf@vger.kernel.org
L: netdev@vger.kernel.org
S: Maintained
@@ -5568,6 +5570,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
F: drivers/char/
F: drivers/misc/
F: include/linux/miscdevice.h
+F: rust/kernel/miscdevice.rs
F: samples/rust/rust_misc_device.rs
X: drivers/char/agp/
X: drivers/char/hw_random/
@@ -5613,14 +5616,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
F: drivers/usb/chipidea/
CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
F: drivers/input/touchscreen/chipone_icn8318.c
CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/input/touchscreen/chipone_icn8505.c
@@ -6254,6 +6257,7 @@ F: include/linux/cpuhotplug.h
F: include/linux/smpboot.h
F: kernel/cpu.c
F: kernel/smpboot.*
+F: rust/helper/cpu.c
F: rust/kernel/cpu.rs
CPU IDLE TIME MANAGEMENT FRAMEWORK
@@ -6917,7 +6921,7 @@ F: include/dt-bindings/pmu/exynos_ppmu.h
F: include/linux/devfreq-event.h
DEVICE RESOURCE MANAGEMENT HELPERS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
R: Matti Vaittinen <mazziesaccount@gmail.com>
S: Maintained
F: include/linux/devm-helpers.h
@@ -7516,7 +7520,7 @@ F: drivers/gpu/drm/gud/
F: include/drm/gud.h
DRM DRIVER FOR GRAIN MEDIA GM12U320 PROJECTORS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/gpu/drm/tiny/gm12u320.c
@@ -7916,7 +7920,7 @@ F: drivers/gpu/drm/ci/xfails/vkms*
F: drivers/gpu/drm/vkms/
DRM DRIVER FOR VIRTUALBOX VIRTUAL GPU
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
@@ -8317,7 +8321,7 @@ F: drivers/gpu/drm/panel/
F: include/drm/drm_panel.h
DRM PRIVACY-SCREEN CLASS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
@@ -9940,7 +9944,6 @@ F: drivers/fwctl/mlx5/
FWCTL PDS DRIVER
M: Brett Creeley <brett.creeley@amd.com>
-R: Shannon Nelson <shannon.nelson@amd.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/fwctl/pds/
@@ -10221,7 +10224,7 @@ S: Maintained
F: Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml
GOODIX TOUCHSCREEN
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/input/touchscreen/goodix*
@@ -10260,7 +10263,7 @@ F: include/dt-bindings/clock/google,gs101.h
K: [gG]oogle.?[tT]ensor
GPD POCKET FAN DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/gpd-pocket-fan.c
@@ -10504,7 +10507,7 @@ S: Maintained
F: block/partitions/efi.*
HABANALABS PCI DRIVER
-M: Ofir Bitton <obitton@habana.ai>
+M: Yaron Avizrat <yaron.avizrat@intel.com>
L: dri-devel@lists.freedesktop.org
S: Supported
C: irc://irc.oftc.net/dri-devel
@@ -10839,7 +10842,7 @@ S: Maintained
F: drivers/dma/hisi_dma.c
HISILICON GPIO DRIVER
-M: Jay Fang <f.fangjian@huawei.com>
+M: Yang Shen <shenyang39@huawei.com>
L: linux-gpio@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml
@@ -11006,7 +11009,8 @@ F: Documentation/ABI/testing/debugfs-hisi-zip
F: drivers/crypto/hisilicon/zip/
HMM - Heterogeneous Memory Management
-M: Jérôme Glisse <jglisse@redhat.com>
+M: Jason Gunthorpe <jgg@nvidia.com>
+M: Leon Romanovsky <leonro@nvidia.com>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/mm/hmm.rst
@@ -11155,7 +11159,8 @@ F: include/linux/platform_data/huawei-gaokun-ec.h
HUGETLB SUBSYSTEM
M: Muchun Song <muchun.song@linux.dev>
-R: Oscar Salvador <osalvador@suse.de>
+M: Oscar Salvador <osalvador@suse.de>
+R: David Hildenbrand <david@redhat.com>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -11166,6 +11171,7 @@ F: fs/hugetlbfs/
F: include/linux/hugetlb.h
F: include/trace/events/hugetlbfs.h
F: mm/hugetlb.c
+F: mm/hugetlb_cgroup.c
F: mm/hugetlb_cma.c
F: mm/hugetlb_cma.h
F: mm/hugetlb_vmemmap.c
@@ -11421,7 +11427,7 @@ F: drivers/i2c/busses/i2c-via.c
F: drivers/i2c/busses/i2c-viapro.c
I2C/SMBUS INTEL CHT WHISKEY COVE PMIC DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-cht-wc.c
@@ -12011,13 +12017,13 @@ S: Supported
F: sound/soc/intel/
INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/intel/atomisp2/pm.c
INTEL ATOMISP2 LED DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/intel/atomisp2/led.c
@@ -12183,9 +12189,8 @@ F: drivers/dma/idxd/*
F: include/uapi/linux/idxd.h
INTEL IN FIELD SCAN (IFS) DEVICE
-M: Jithu Joseph <jithu.joseph@intel.com>
+M: Tony Luck <tony.luck@intel.com>
R: Ashok Raj <ashok.raj.linux@gmail.com>
-R: Tony Luck <tony.luck@intel.com>
S: Maintained
F: drivers/platform/x86/intel/ifs
F: include/trace/events/intel_ifs.h
@@ -12525,8 +12530,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
F: drivers/net/wireless/intel/iwlwifi/
INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
-M: Jithu Joseph <jithu.joseph@intel.com>
-S: Maintained
+S: Orphan
W: https://slimbootloader.github.io/security/firmware-update.html
F: drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -13345,6 +13349,7 @@ M: Alexander Graf <graf@amazon.com>
M: Mike Rapoport <rppt@kernel.org>
M: Changyuan Lyu <changyuanl@google.com>
L: kexec@lists.infradead.org
+L: linux-mm@kvack.org
S: Maintained
F: Documentation/admin-guide/mm/kho.rst
F: Documentation/core-api/kho/*
@@ -13678,7 +13683,7 @@ S: Maintained
F: drivers/platform/x86/lenovo-wmi-hotkey-utilities.c
LETSKETCH HID TABLET DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
@@ -13728,7 +13733,7 @@ F: drivers/ata/sata_gemini.c
F: drivers/ata/sata_gemini.h
LIBATA SATA AHCI PLATFORM devices support
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-ide@vger.kernel.org
S: Maintained
F: drivers/ata/ahci_platform.c
@@ -13798,7 +13803,7 @@ M: Oliver O'Halloran <oohall@gmail.com>
L: nvdimm@lists.linux.dev
S: Supported
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
-F: Documentation/devicetree/bindings/pmem/pmem-region.txt
+F: Documentation/devicetree/bindings/pmem/pmem-region.yaml
F: drivers/nvdimm/of_pmem.c
LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
@@ -14098,7 +14103,7 @@ F: Documentation/admin-guide/ldm.rst
F: block/partitions/ldm.*
LOGITECH HID GAMING KEYBOARDS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
@@ -14780,7 +14785,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml
F: drivers/power/supply/max17040_battery.c
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
-R: Hans de Goede <hdegoede@redhat.com>
+R: Hans de Goede <hansg@kernel.org>
R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
@@ -15547,6 +15552,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_*
MELLANOX ETHERNET DRIVER (mlx5e)
M: Saeed Mahameed <saeedm@nvidia.com>
M: Tariq Toukan <tariqt@nvidia.com>
+M: Mark Bloch <mbloch@nvidia.com>
L: netdev@vger.kernel.org
S: Maintained
W: https://www.nvidia.com/networking/
@@ -15582,7 +15588,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/
F: drivers/net/ethernet/mellanox/mlxfw/
MELLANOX HARDWARE PLATFORM SUPPORT
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
M: Vadim Pasternak <vadimp@nvidia.com>
L: platform-driver-x86@vger.kernel.org
@@ -15616,6 +15622,7 @@ MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@nvidia.com>
M: Leon Romanovsky <leonro@nvidia.com>
M: Tariq Toukan <tariqt@nvidia.com>
+M: Mark Bloch <mbloch@nvidia.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Maintained
@@ -15673,11 +15680,16 @@ MEMBLOCK AND MEMORY MANAGEMENT INITIALIZATION
M: Mike Rapoport <rppt@kernel.org>
L: linux-mm@kvack.org
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes
F: Documentation/core-api/boot-time-mm.rst
F: Documentation/core-api/kho/bindings/memblock/*
F: include/linux/memblock.h
+F: mm/bootmem_info.c
F: mm/memblock.c
+F: mm/memtest.c
F: mm/mm_init.c
+F: mm/rodata_test.c
F: tools/testing/memblock/
MEMORY ALLOCATION PROFILING
@@ -15732,7 +15744,6 @@ F: Documentation/admin-guide/mm/
F: Documentation/mm/
F: include/linux/gfp.h
F: include/linux/gfp_types.h
-F: include/linux/memfd.h
F: include/linux/memory_hotplug.h
F: include/linux/memory-tiers.h
F: include/linux/mempolicy.h
@@ -15792,6 +15803,10 @@ S: Maintained
W: http://www.linux-mm.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
F: mm/gup.c
+F: mm/gup_test.c
+F: mm/gup_test.h
+F: tools/testing/selftests/mm/gup_longterm.c
+F: tools/testing/selftests/mm/gup_test.c
MEMORY MANAGEMENT - KSM (Kernel Samepage Merging)
M: Andrew Morton <akpm@linux-foundation.org>
@@ -15839,6 +15854,17 @@ F: mm/numa.c
F: mm/numa_emulation.c
F: mm/numa_memblks.c
+MEMORY MANAGEMENT - OOM KILLER
+M: Michal Hocko <mhocko@suse.com>
+R: David Rientjes <rientjes@google.com>
+R: Shakeel Butt <shakeel.butt@linux.dev>
+L: linux-mm@kvack.org
+S: Maintained
+F: include/linux/oom.h
+F: include/trace/events/oom.h
+F: include/uapi/linux/oom.h
+F: mm/oom_kill.c
+
MEMORY MANAGEMENT - PAGE ALLOCATOR
M: Andrew Morton <akpm@linux-foundation.org>
M: Vlastimil Babka <vbabka@suse.cz>
@@ -15853,8 +15879,17 @@ F: include/linux/compaction.h
F: include/linux/gfp.h
F: include/linux/page-isolation.h
F: mm/compaction.c
+F: mm/debug_page_alloc.c
+F: mm/fail_page_alloc.c
F: mm/page_alloc.c
+F: mm/page_ext.c
+F: mm/page_frag_cache.c
F: mm/page_isolation.c
+F: mm/page_owner.c
+F: mm/page_poison.c
+F: mm/page_reporting.c
+F: mm/show_mem.c
+F: mm/shuffle.c
MEMORY MANAGEMENT - RECLAIM
M: Andrew Morton <akpm@linux-foundation.org>
@@ -15868,6 +15903,7 @@ L: linux-mm@kvack.org
S: Maintained
F: mm/pt_reclaim.c
F: mm/vmscan.c
+F: mm/workingset.c
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
@@ -15880,6 +15916,7 @@ R: Harry Yoo <harry.yoo@oracle.com>
L: linux-mm@kvack.org
S: Maintained
F: include/linux/rmap.h
+F: mm/page_vma_mapped.c
F: mm/rmap.c
MEMORY MANAGEMENT - SECRETMEM
@@ -15912,13 +15949,14 @@ F: mm/swapfile.c
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
+M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Zi Yan <ziy@nvidia.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
-R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Nico Pache <npache@redhat.com>
R: Ryan Roberts <ryan.roberts@arm.com>
R: Dev Jain <dev.jain@arm.com>
+R: Barry Song <baohua@kernel.org>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
@@ -15971,11 +16009,14 @@ S: Maintained
W: http://www.linux-mm.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
F: include/trace/events/mmap.h
+F: mm/mincore.c
F: mm/mlock.c
F: mm/mmap.c
F: mm/mprotect.c
F: mm/mremap.c
F: mm/mseal.c
+F: mm/msync.c
+F: mm/nommu.c
F: mm/vma.c
F: mm/vma.h
F: mm/vma_exec.c
@@ -16538,7 +16579,7 @@ S: Maintained
F: drivers/platform/surface/surface_gpe.c
MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
M: Maximilian Luz <luzmaximilian@gmail.com>
L: platform-driver-x86@vger.kernel.org
@@ -16783,8 +16824,8 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h
MODULE SUPPORT
M: Luis Chamberlain <mcgrof@kernel.org>
M: Petr Pavlu <petr.pavlu@suse.com>
+M: Daniel Gomez <da.gomez@kernel.org>
R: Sami Tolvanen <samitolvanen@google.com>
-R: Daniel Gomez <da.gomez@samsung.com>
L: linux-modules@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Maintained
@@ -17183,10 +17224,10 @@ F: drivers/rtc/rtc-ntxec.c
F: include/linux/mfd/ntxec.h
NETRONOME ETHERNET DRIVERS
-M: Louis Peens <louis.peens@corigine.com>
R: Jakub Kicinski <kuba@kernel.org>
+R: Simon Horman <horms@kernel.org>
L: oss-drivers@corigine.com
-S: Maintained
+S: Odd Fixes
F: drivers/net/ethernet/netronome/
NETWORK BLOCK DEVICE (NBD)
@@ -17343,6 +17384,7 @@ F: include/linux/ethtool.h
F: include/linux/framer/framer-provider.h
F: include/linux/framer/framer.h
F: include/linux/in.h
+F: include/linux/in6.h
F: include/linux/indirect_call_wrapper.h
F: include/linux/inet.h
F: include/linux/inet_diag.h
@@ -17493,7 +17535,7 @@ F: tools/testing/selftests/net/srv6*
NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com>
M: Neal Cardwell <ncardwell@google.com>
-R: Kuniyuki Iwashima <kuniyu@amazon.com>
+R: Kuniyuki Iwashima <kuniyu@google.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/net_cachelines/tcp_sock.rst
@@ -17523,7 +17565,7 @@ F: net/tls/*
NETWORKING [SOCKETS]
M: Eric Dumazet <edumazet@google.com>
-M: Kuniyuki Iwashima <kuniyu@amazon.com>
+M: Kuniyuki Iwashima <kuniyu@google.com>
M: Paolo Abeni <pabeni@redhat.com>
M: Willem de Bruijn <willemb@google.com>
S: Maintained
@@ -17538,7 +17580,7 @@ F: net/core/scm.c
F: net/socket.c
NETWORKING [UNIX SOCKETS]
-M: Kuniyuki Iwashima <kuniyu@amazon.com>
+M: Kuniyuki Iwashima <kuniyu@google.com>
S: Maintained
F: include/net/af_unix.h
F: include/net/netns/unix.h
@@ -17706,7 +17748,7 @@ F: tools/include/nolibc/
F: tools/testing/selftests/nolibc/
NOVATEK NVT-TS I2C TOUCHSCREEN DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml
@@ -19376,7 +19418,7 @@ F: crypto/pcrypt.c
F: include/crypto/pcrypt.h
PDS DSC VIRTIO DATA PATH ACCELERATOR
-R: Shannon Nelson <shannon.nelson@amd.com>
+R: Brett Creeley <brett.creeley@amd.com>
F: drivers/vdpa/pds/
PECI HARDWARE MONITORING DRIVERS
@@ -19398,7 +19440,6 @@ F: include/linux/peci-cpu.h
F: include/linux/peci.h
PENSANDO ETHERNET DRIVERS
-M: Shannon Nelson <shannon.nelson@amd.com>
M: Brett Creeley <brett.creeley@amd.com>
L: netdev@vger.kernel.org
S: Maintained
@@ -19563,8 +19604,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
F: drivers/pinctrl/intel/
PIN CONTROLLER - KEEMBAY
-M: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
-S: Supported
+S: Orphan
F: drivers/pinctrl/pinctrl-keembay*
PIN CONTROLLER - MEDIATEK
@@ -20117,21 +20157,15 @@ S: Supported
F: Documentation/devicetree/bindings/soc/qcom/qcom,apr*
F: Documentation/devicetree/bindings/sound/qcom,*
F: drivers/soc/qcom/apr.c
-F: include/dt-bindings/sound/qcom,wcd9335.h
-F: include/dt-bindings/sound/qcom,wcd934x.h
-F: sound/soc/codecs/lpass-rx-macro.*
-F: sound/soc/codecs/lpass-tx-macro.*
-F: sound/soc/codecs/lpass-va-macro.c
-F: sound/soc/codecs/lpass-wsa-macro.*
+F: drivers/soundwire/qcom.c
+F: include/dt-bindings/sound/qcom,wcd93*
+F: sound/soc/codecs/lpass-*.*
F: sound/soc/codecs/msm8916-wcd-analog.c
F: sound/soc/codecs/msm8916-wcd-digital.c
F: sound/soc/codecs/wcd-clsh-v2.*
F: sound/soc/codecs/wcd-mbhc-v2.*
-F: sound/soc/codecs/wcd9335.*
-F: sound/soc/codecs/wcd934x.c
-F: sound/soc/codecs/wsa881x.c
-F: sound/soc/codecs/wsa883x.c
-F: sound/soc/codecs/wsa884x.c
+F: sound/soc/codecs/wcd93*.*
+F: sound/soc/codecs/wsa88*.*
F: sound/soc/qcom/
QCOM EMBEDDED USB DEBUGGER (EUD)
@@ -21162,7 +21196,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml
+F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml
F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c
RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER
@@ -21374,7 +21408,7 @@ N: spacemit
K: spacemit
RISC-V THEAD SoC SUPPORT
-M: Drew Fustini <drew@pdp7.com>
+M: Drew Fustini <fustini@kernel.org>
M: Guo Ren <guoren@kernel.org>
M: Fu Wei <wefu@redhat.com>
L: linux-riscv@lists.infradead.org
@@ -22171,7 +22205,7 @@ R: Tejun Heo <tj@kernel.org>
R: David Vernet <void@manifault.com>
R: Andrea Righi <arighi@nvidia.com>
R: Changwoo Min <changwoo@igalia.com>
-L: linux-kernel@vger.kernel.org
+L: sched-ext@lists.linux.dev
S: Maintained
W: https://github.com/sched-ext/scx
T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
@@ -22550,9 +22584,11 @@ S: Maintained
F: drivers/misc/sgi-xp/
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
+M: D. Wythe <alibuda@linux.alibaba.com>
+M: Dust Li <dust.li@linux.alibaba.com>
+M: Sidraya Jayagond <sidraya@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
-M: Jan Karcher <jaka@linux.ibm.com>
-R: D. Wythe <alibuda@linux.alibaba.com>
+R: Mahanta Jambigi <mjambigi@linux.ibm.com>
R: Tony Lu <tonylu@linux.alibaba.com>
R: Wen Gu <guwen@linux.alibaba.com>
L: linux-rdma@vger.kernel.org
@@ -22708,7 +22744,7 @@ K: fu[57]40
K: [^@]sifive
SILEAD TOUCHSCREEN DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
L: platform-driver-x86@vger.kernel.org
S: Maintained
@@ -22741,7 +22777,7 @@ F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
F: drivers/i3c/master/svc-i3c-master.c
SIMPLEFB FB DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-fbdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@@ -22870,7 +22906,7 @@ F: Documentation/hwmon/emc2103.rst
F: drivers/hwmon/emc2103.c
SMSC SCH5627 HARDWARE MONITOR DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-hwmon@vger.kernel.org
S: Supported
F: Documentation/hwmon/sch5627.rst
@@ -23525,7 +23561,7 @@ S: Supported
F: Documentation/process/stable-kernel-rules.rst
STAGING - ATOMISP DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
R: Sakari Ailus <sakari.ailus@linux.intel.com>
L: linux-media@vger.kernel.org
@@ -23661,7 +23697,6 @@ F: include/dt-bindings/clock/starfive?jh71*.h
STARFIVE JH71X0 PINCTRL DRIVERS
M: Emil Renner Berthing <kernel@esmil.dk>
-M: Jianlong Huang <jianlong.huang@starfivetech.com>
M: Hal Feng <hal.feng@starfivetech.com>
L: linux-gpio@vger.kernel.org
S: Maintained
@@ -23822,7 +23857,7 @@ F: arch/m68k/sun3*/
F: drivers/net/ethernet/i825xx/sun3*
SUN4I LOW RES ADC ATTACHED TABLET KEYS DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
@@ -24064,6 +24099,7 @@ M: Bin Du <bin.du@amd.com>
L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-designware-amdisp.c
+F: include/linux/soc/amd/isp4_misc.h
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
M: Jaehoon Chung <jh80.chung@samsung.com>
@@ -25028,8 +25064,11 @@ M: Hugh Dickins <hughd@google.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
L: linux-mm@kvack.org
S: Maintained
+F: include/linux/memfd.h
F: include/linux/shmem_fs.h
+F: mm/memfd.c
F: mm/shmem.c
+F: mm/shmem_quota.c
TOMOYO SECURITY MODULE
M: Kentaro Takeda <takedakn@nttdata.co.jp>
@@ -25590,7 +25629,7 @@ F: Documentation/hid/hiddev.rst
F: drivers/hid/usbhid/
USB INTEL XHCI ROLE MUX DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
F: drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -25781,7 +25820,7 @@ F: Documentation/firmware-guide/acpi/intel-pmc-mux.rst
F: drivers/usb/typec/mux/intel_pmc_mux.c
USB TYPEC PI3USB30532 MUX DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
F: drivers/usb/typec/mux/pi3usb30532.c
@@ -25810,7 +25849,7 @@ F: drivers/usb/host/uhci*
USB VIDEO CLASS
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-media@vger.kernel.org
S: Maintained
W: http://www.ideasonboard.org/uvc/
@@ -25864,6 +25903,8 @@ F: fs/hostfs/
USERSPACE COPYIN/COPYOUT (UIOVEC)
M: Alexander Viro <viro@zeniv.linux.org.uk>
+L: linux-block@vger.kernel.org
+L: linux-fsdevel@vger.kernel.org
S: Maintained
F: include/linux/uio.h
F: lib/iov_iter.c
@@ -26341,7 +26382,7 @@ F: include/uapi/linux/virtio_snd.h
F: sound/virtio/*
VIRTUAL BOX GUEST DEVICE DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Arnd Bergmann <arnd@arndb.de>
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
S: Maintained
@@ -26350,7 +26391,7 @@ F: include/linux/vbox_utils.h
F: include/uapi/linux/vbox*.h
VIRTUAL BOX SHARED FOLDER VFS DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/vboxsf/*
@@ -26604,7 +26645,7 @@ F: drivers/mmc/host/wbsd.*
WACOM PROTOCOL 4 SERIAL TABLETS
M: Julian Squires <julian@cipht.net>
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/input/tablet/wacom_serial4.c
@@ -26771,7 +26812,7 @@ F: include/linux/wwan.h
F: include/uapi/linux/wwan.h
X-POWERS AXP288 PMIC DRIVERS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
S: Maintained
F: drivers/acpi/pmic/intel_pmic_xpower.c
N: axp288
@@ -26863,14 +26904,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
F: arch/x86/mm/
X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
L: platform-driver-x86@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
F: drivers/platform/x86/x86-android-tablets/
X86 PLATFORM DRIVERS
-M: Hans de Goede <hdegoede@redhat.com>
+M: Hans de Goede <hansg@kernel.org>
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
@@ -26901,7 +26942,7 @@ F: arch/x86/kernel/stacktrace.c
F: arch/x86/kernel/unwind_*.c
X86 TRUST DOMAIN EXTENSIONS (TDX)
-M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+M: Kirill A. Shutemov <kas@kernel.org>
R: Dave Hansen <dave.hansen@linux.intel.com>
L: x86@kernel.org
L: linux-coco@lists.linux.dev
@@ -26967,6 +27008,7 @@ M: David S. Miller <davem@davemloft.net>
M: Jakub Kicinski <kuba@kernel.org>
M: Jesper Dangaard Brouer <hawk@kernel.org>
M: John Fastabend <john.fastabend@gmail.com>
+R: Stanislav Fomichev <sdf@fomichev.me>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Supported
@@ -26988,6 +27030,7 @@ M: Björn Töpel <bjorn@kernel.org>
M: Magnus Karlsson <magnus.karlsson@intel.com>
M: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
R: Jonathan Lemon <jonathan.lemon@gmail.com>
+R: Stanislav Fomichev <sdf@fomichev.me>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
@@ -27268,13 +27311,6 @@ S: Supported
W: http://www.marvell.com
F: drivers/i2c/busses/i2c-xlp9xx.c
-XRA1403 GPIO EXPANDER
-M: Nandor Han <nandor.han@ge.com>
-L: linux-gpio@vger.kernel.org
-S: Maintained
-F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
-F: drivers/gpio/gpio-xra1403.c
-
XTENSA XTFPGA PLATFORM SUPPORT
M: Max Filippov <jcmvbkbc@gmail.com>
S: Maintained
diff --git a/Makefile b/Makefile
index 35e6e5240c61..478f2004602d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 16
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
NAME = Baby Opossum Posse
# *DOCUMENTATION*
@@ -1832,12 +1832,9 @@ rustfmtcheck: rustfmt
# Misc
# ---------------------------------------------------------------------------
-# Run misc checks when ${KBUILD_EXTRA_WARN} contains 1
PHONY += misc-check
-ifneq ($(findstring 1,$(KBUILD_EXTRA_WARN)),)
misc-check:
$(Q)$(srctree)/scripts/misc-check
-endif
all: misc-check
diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
deleted file mode 100644
index cfe947ce9461..000000000000
--- a/arch/alpha/include/asm/param.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ALPHA_PARAM_H
-#define _ASM_ALPHA_PARAM_H
-
-#include <uapi/asm/param.h>
-
-# undef HZ
-# define HZ CONFIG_HZ
-# define USER_HZ 1024
-# define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */
-
-#endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 2676017f42f1..44e7aedac6e8 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
index 49c7119934e2..e4e410f9bf85 100644
--- a/arch/alpha/include/uapi/asm/param.h
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -2,14 +2,9 @@
#ifndef _UAPI_ASM_ALPHA_PARAM_H
#define _UAPI_ASM_ALPHA_PARAM_H
-#define HZ 1024
-
+#define __USER_HZ 1024
#define EXEC_PAGESIZE 8192
-#ifndef NOGROUP
-#define NOGROUP (-1)
-#endif
-
-#define MAXHOSTNAMELEN 64 /* max length of hostname */
+#include <asm-generic/param.h>
#endif /* _UAPI_ASM_ALPHA_PARAM_H */
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 005d9e4d187a..a31bbf5c8bbc 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -144,7 +144,7 @@
#define ARC_AUX_AGU_MOD2 0x5E2
#define ARC_AUX_AGU_MOD3 0x5E3
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <soc/arc/arc_aux.h>
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 592d7fffc223..e615c42b93ba 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_ATOMIC_H
#define _ASM_ARC_ATOMIC_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -31,6 +31,6 @@
#include <asm/atomic64-arcv2.h>
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 9b5791b85471..73080a664369 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new)
{
- s64 prev;
-
- smp_mb();
+ u64 prev;
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
@@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
- : "r"(ptr), "ir"(expected), "r"(new)
- : "cc"); /* memory clobber comes from smp_mb() */
-
- smp_mb();
+ : "r"(ptr), "ir"(old), "r"(new)
+ : "memory", "cc");
return prev;
}
-#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index f5a936496f06..5340c2871392 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -10,7 +10,7 @@
#error only <linux/bitops.h> can be included directly
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 4c453ba96c51..171c16021f70 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_BUG_H
#define _ASM_ARC_BUG_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
@@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);
#include <asm-generic/bug.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f0f1fc5d62b6..040a97f4dd82 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -23,7 +23,7 @@
*/
#define ARC_UNCACHED_ADDR_SPACE 0xc0000000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/build_bug.h>
@@ -65,7 +65,7 @@
extern int ioc_enable;
extern unsigned long perip_base, perip_end;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Instruction cache related Auxiliary registers */
#define ARC_REG_IC_BCR 0x77 /* Build Config reg */
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 06be89f6f2f0..03ffd005f3fa 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -9,7 +9,7 @@
#ifndef _ASM_ARC_CURRENT_H
#define _ASM_ARC_CURRENT_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_ARC_CURR_IN_REG
@@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp");
#include <asm-generic/current.h>
#endif /* ! CONFIG_ARC_CURR_IN_REG */
-#endif /* ! __ASSEMBLY__ */
+#endif /* ! __ASSEMBLER__ */
#endif /* _ASM_ARC_CURRENT_H */
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
index cd5636dfeb6f..fd5fdaad90c1 100644
--- a/arch/arc/include/asm/dsp-impl.h
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -11,7 +11,7 @@
#define DSP_CTRL_DISABLED_ALL 0
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* clobbers r5 register */
.macro DSP_EARLY_INIT
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
index f496dbc4640b..eeaaf4e4eabd 100644
--- a/arch/arc/include/asm/dsp.h
+++ b/arch/arc/include/asm/dsp.h
@@ -7,7 +7,7 @@
#ifndef __ASM_ARC_DSP_H
#define __ASM_ARC_DSP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* DSP-related saved registers - need to be saved only when you are
@@ -24,6 +24,6 @@ struct dsp_callee_regs {
#endif
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
index a0d5ebe1bc3f..1524c5cf8b59 100644
--- a/arch/arc/include/asm/dwarf.h
+++ b/arch/arc/include/asm/dwarf.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_DWARF_H
#define _ASM_ARC_DWARF_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef ARC_DW2_UNWIND_AS_CFI
@@ -38,6 +38,6 @@
#endif /* !ARC_DW2_UNWIND_AS_CFI */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ARC_DWARF_H */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 38c35722cebf..f453af251a1a 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -13,7 +13,7 @@
#include <asm/processor.h> /* For VMALLOC_START */
#include <asm/mmu.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CONFIG_ISA_ARCOMPACT
#include <asm/entry-compact.h> /* ISA specific bits */
@@ -146,7 +146,7 @@
#endif /* CONFIG_ARC_CURR_IN_REG */
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
extern void do_signal(struct pt_regs *);
extern void do_notify_resume(struct pt_regs *);
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index fb3c21f1a238..30aea562f8aa 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -50,7 +50,7 @@
#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \
(ARCV2_IRQ_DEF_PRIO << 1))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Save IRQ state and disable IRQs
@@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq)
seti
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 936a2f21f315..85c2f6bcde0c 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -40,7 +40,7 @@
#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/******************************************************************
* IRQ Control Macros
@@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void)
flag \scratch
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
index a339223d9e05..66ead75784d9 100644
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ARC_JUMP_LABEL_H
#define _ASM_ARC_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
#include <linux/types.h>
@@ -68,5 +68,5 @@ struct jump_entry {
jump_label_t key;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 8a3fb71e9cfa..ba3cb65b5eaa 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -12,7 +12,7 @@
#define __ALIGN .align 4
#define __ALIGN_STR __stringify(__ALIGN)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro ST2 e, o, off
#ifdef CONFIG_ARC_HAS_LL64
@@ -61,7 +61,7 @@
CFI_ENDPROC ASM_NL \
.size name, .-name
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#ifdef CONFIG_ARC_HAS_ICCM
#define __arcfp_code __section(".text.arcfp")
@@ -75,6 +75,6 @@
#define __arcfp_data __section(".data")
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
index 41412642f279..5e5482026ac9 100644
--- a/arch/arc/include/asm/mmu-arcv2.h
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -69,7 +69,7 @@
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct mm_struct;
extern int pae40_exist_but_not_enab(void);
@@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
sr \reg, [ARC_REG_PID]
.endm
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 4ae2db59d494..e3b35ceab582 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_MMU_H
#define _ASM_ARC_MMU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/threads.h> /* NR_CPUS */
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index def0dfb95b43..9720fe6b2c24 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -19,7 +19,7 @@
#endif /* CONFIG_ARC_HAS_PAE40 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define clear_page(paddr) memset((paddr), 0, PAGE_SIZE)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
@@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#include <asm-generic/memory_model.h> /* page_to_pfn, pfn_to_page */
#include <asm-generic/getorder.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index 8ebec1b21d24..4630c5acca05 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -75,7 +75,7 @@
* This is to enable COW mechanism
*/
/* xwr */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
@@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE));
#include <asm/hugepage.h>
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index d1ce4b0f1071..c8f9273372c0 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -85,7 +85,7 @@
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/pgtable-nop4d.h>
@@ -181,6 +181,6 @@
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4cf45a99fd79..bd580e2b62d7 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -19,7 +19,7 @@
*/
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern char empty_zero_page[PAGE_SIZE];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
@@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
/* to cope with aliasing VIPT cache */
#define HAVE_ARCH_UNMAPPED_AREA
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d606658e2fe7..7f7901ac6643 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -11,7 +11,7 @@
#ifndef __ASM_ARC_PROCESSOR_H
#define __ASM_ARC_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
#include <asm/dsp.h>
@@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc,
extern unsigned int __get_wchan(struct task_struct *p);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Default System Memory Map on ARC
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index cf79df0b2570..f6c052af8f4d 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -10,7 +10,7 @@
#include <uapi/asm/ptrace.h>
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef union {
struct {
@@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
extern int syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_exit(struct pt_regs *);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 1f85de8288b1..5806106a65f9 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -6,7 +6,7 @@
#ifndef _ASM_ARC_SWITCH_TO_H
#define _ASM_ARC_SWITCH_TO_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/sched.h>
#include <asm/dsp-impl.h>
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 12daaf3a61ea..255d2c774219 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -24,7 +24,7 @@
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/thread_info.h>
@@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* thread information flags
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 2a6eff57f6dd..3ae832db278c 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -14,7 +14,7 @@
#define PTRACE_GET_THREAD_AREA 25
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Userspace ABI: Register state needed by
* -ptrace (gdbserver)
@@ -53,6 +53,6 @@ struct user_regs_arcv2 {
unsigned long r30, r58, r59;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI__ASM_ARC_PTRACE_H */
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index d8969dab12d4..789cfb9ea14e 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
return (e1->start > e2->start) - (e1->start < e2->start);
}
-static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
-{
- struct eh_frame_hdr_table_entry *e1 = p1;
- struct eh_frame_hdr_table_entry *e2 = p2;
-
- swap(e1->start, e2->start);
- swap(e1->fde, e2->fde);
-}
-
static void init_unwind_hdr(struct unwind_table *table,
void *(*alloc) (unsigned long))
{
@@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table,
sort(header->table,
n,
sizeof(*header->table),
- cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries);
+ cmp_eh_frame_hdr_table_entries, NULL);
table->hdrsz = hdrSize;
smp_wmb();
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3072731fe09c..962451e54fdd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -121,7 +121,7 @@ config ARM
select HAVE_KERNEL_XZ
select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
select HAVE_KRETPROBES if HAVE_KPROBES
- select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_CAN_USE_KEEP_IN_OVERLAY)
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD) && LD_CAN_USE_KEEP_IN_OVERLAY
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4808d3ed98e4..e31e95ffd33f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -149,7 +149,7 @@ endif
# Need -Uarm for gcc < 3.x
KBUILD_CPPFLAGS +=$(cpp-y)
KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
-KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float
+KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include $(srctree)/arch/arm/include/asm/unified.h -msoft-float
KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi
CHECKFLAGS += -D__arm__
diff --git a/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi b/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi
index f909b1d4dbca..e82cf312da25 100644
--- a/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi
+++ b/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi
@@ -652,7 +652,7 @@
reg = <0x01cb4000 0x3000>;
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_BUS_CSI>,
- <&ccu CLK_CSI1_SCLK>,
+ <&ccu CLK_CSI_SCLK>,
<&ccu CLK_DRAM_CSI>;
clock-names = "bus", "mod", "ram";
resets = <&ccu RST_BUS_CSI>;
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 7f1c3b4e3e04..86378eec7757 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(swp) __pte((swp).val)
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_isset(pte, L_PTE_SWP_EXCLUSIVE);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 55fc331af337..393d71124f5d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -256,6 +256,7 @@ config ARM64
select HOTPLUG_SMT if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select JUMP_LABEL
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi
index 8b7cbc2e78f5..51cd148f4227 100644
--- a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi
@@ -131,7 +131,7 @@
"PH5", "PH6", "PH7", "PH9", "PH10",
"PH14", "PH15", "PH16", "PH17", "PH18";
allwinner,pinmux = <5>;
- function = "emac0";
+ function = "gmac0";
drive-strength = <40>;
bias-disable;
};
@@ -540,8 +540,8 @@
status = "disabled";
};
- emac0: ethernet@4500000 {
- compatible = "allwinner,sun55i-a523-emac0",
+ gmac0: ethernet@4500000 {
+ compatible = "allwinner,sun55i-a523-gmac0",
"allwinner,sun50i-a64-emac";
reg = <0x04500000 0x10000>;
clocks = <&ccu CLK_BUS_EMAC0>;
diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts b/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts
index 0f58d92a6adc..8bc0f2c72a24 100644
--- a/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts
+++ b/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts
@@ -12,7 +12,7 @@
compatible = "radxa,cubie-a5e", "allwinner,sun55i-a527";
aliases {
- ethernet0 = &emac0;
+ ethernet0 = &gmac0;
serial0 = &uart0;
};
@@ -55,7 +55,7 @@
status = "okay";
};
-&emac0 {
+&gmac0 {
phy-mode = "rgmii-id";
phy-handle = <&ext_rgmii_phy>;
phy-supply = <&reg_cldo3>;
diff --git a/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts b/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts
index 08127f0cdd35..142177c1f737 100644
--- a/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts
@@ -12,7 +12,7 @@
compatible = "yuzukihd,avaota-a1", "allwinner,sun55i-t527";
aliases {
- ethernet0 = &emac0;
+ ethernet0 = &gmac0;
serial0 = &uart0;
};
@@ -65,7 +65,7 @@
status = "okay";
};
-&emac0 {
+&gmac0 {
phy-mode = "rgmii-id";
phy-handle = <&ext_rgmii_phy>;
phy-supply = <&reg_dcdc4>;
diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
index 3df2fd3993b5..9740fbf200f0 100644
--- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
+++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi
@@ -20,8 +20,6 @@
compatible = "jedec,spi-nor";
reg = <0x0>;
spi-max-frequency = <25000000>;
- #address-cells = <1>;
- #size-cells = <1>;
partitions {
compatible = "fixed-partitions";
diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts
index e2d9439397f7..5b3c42e9f0e6 100644
--- a/arch/arm64/boot/dts/apple/t8103-j293.dts
+++ b/arch/arm64/boot/dts/apple/t8103-j293.dts
@@ -100,6 +100,8 @@
&displaydfr_mipi {
status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
dfr_panel: panel@0 {
compatible = "apple,j293-summit", "apple,summit";
diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
index 8e82231acab5..0c8206156bfe 100644
--- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
@@ -71,7 +71,7 @@
*/
&port00 {
bus-range = <1 1>;
- wifi0: network@0,0 {
+ wifi0: wifi@0,0 {
compatible = "pci14e4,4425";
reg = <0x10000 0x0 0x0 0x0 0x0>;
/* To be filled by the loader */
diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index 20faf0c0d809..3a204845b85b 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -405,8 +405,6 @@
compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi";
reg = <0x2 0x28600000 0x0 0x100000>;
power-domains = <&ps_mipi_dsi>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts
index be86d34c6696..fb8ad7d4c65a 100644
--- a/arch/arm64/boot/dts/apple/t8112-j493.dts
+++ b/arch/arm64/boot/dts/apple/t8112-j493.dts
@@ -63,6 +63,8 @@
&displaydfr_mipi {
status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
dfr_panel: panel@0 {
compatible = "apple,j493-summit", "apple,summit";
diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi
index e95711d8337f..f68354194355 100644
--- a/arch/arm64/boot/dts/apple/t8112.dtsi
+++ b/arch/arm64/boot/dts/apple/t8112.dtsi
@@ -420,8 +420,6 @@
compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi";
reg = <0x2 0x28600000 0x0 0x100000>;
power-domains = <&ps_mipi_dsi>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 0baf256b4400..983b2f0e8797 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -687,11 +687,12 @@
};
wdog0: watchdog@2ad0000 {
- compatible = "fsl,imx21-wdt";
+ compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt";
reg = <0x0 0x2ad0000 0x0 0x10000>;
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
QORIQ_CLK_PLL_DIV(2)>;
+ big-endian;
};
edma0: dma-controller@2c00000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index d29710772569..1594ce9182a5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -464,6 +464,7 @@
};
reg_nvcc_sd: LDO5 {
+ regulator-always-on;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <1800000>;
regulator-name = "On-module +V3.3_1.8_SD (LDO5)";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
index 2f740d74707b..4bf818873fe3 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
@@ -70,7 +70,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 5ab3ffe9931d..cf747ec6fa16 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -110,7 +110,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index e2b5e7ac3e46..5eb114d2360a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -122,7 +122,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index 6daa2313f879..568d24265ddf 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -201,7 +201,7 @@
tpm@0 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x0>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
index 6c47f4b47356..9f4d0899a94d 100644
--- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
@@ -574,17 +574,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp {
fsl,pins = <
- IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
@@ -598,10 +598,10 @@
pinctrl_enetc1: enetc1grp {
fsl,pins = <
- IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e
- IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e
- IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e
- IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e
+ IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e
+ IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e
+ IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e
+ IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e
IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
index 6886ea766655..d7d845231312 100644
--- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
@@ -566,17 +566,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp{
fsl,pins = <
- IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi
index 632631a29112..5aecdd9b62ff 100644
--- a/arch/arm64/boot/dts/freescale/imx95.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx95.dtsi
@@ -1708,7 +1708,7 @@
<0x9 0 1 0>;
reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space";
num-lanes = <1>;
- interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "dma";
clocks = <&scmi_clk IMX95_CLK_HSIO>,
<&scmi_clk IMX95_CLK_HSIOPLL>,
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index ae7a275fd223..cefecb7a23cf 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -1090,6 +1090,8 @@
};
&pmk8280_rtc {
+ qcom,uefi-rtc-info;
+
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
index c02fd4d15c96..e3888bc143a0 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
@@ -224,6 +224,7 @@
reg-names = "rtc", "alarm";
interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>;
qcom,no-alarm; /* alarm owned by ADSP */
+ qcom,uefi-rtc-info;
};
pmk8550_sdam_2: nvram@7100 {
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index ab232e5c7ad6..4203b335a263 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -379,6 +379,18 @@
<0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
+
+ spi1 {
+ spi1_csn0_gpio_pin: spi1-csn0-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+
+ spi1_csn1_gpio_pin: spi1-csn1-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+ };
};
&pmu_io_domains {
@@ -396,6 +408,17 @@
vqmmc-supply = <&vccio_sd>;
};
+&spi1 {
+ /*
+ * Hardware CS has a very slow rise time of about 6us,
+ * causing transmission errors.
+ * With cs-gpios we have a rise time of about 20ns.
+ */
+ cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>;
+};
+
&tsadc {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 3c127c5c2607..a9021c524afb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -30,6 +30,7 @@
fan: gpio_fan {
compatible = "gpio-fan";
+ fan-supply = <&vcc12v_dcin>;
gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>;
gpio-fan,speed-map =
< 0 0>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
index 3b31f0dd8f3b..539edc3c535f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
@@ -29,7 +29,6 @@
function-enumerator = <1>;
gpios = <&gpio3 RK_PD6 GPIO_ACTIVE_HIGH>;
label = "LAN-1";
- linux,default-trigger = "netdev";
};
led-lan2 {
@@ -39,7 +38,6 @@
function-enumerator = <2>;
gpios = <&gpio3 RK_PD7 GPIO_ACTIVE_HIGH>;
label = "LAN-2";
- linux,default-trigger = "netdev";
};
power_led: led-sys {
@@ -56,7 +54,6 @@
function = LED_FUNCTION_WAN;
gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>;
label = "WAN";
- linux,default-trigger = "netdev";
};
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
index b09e789c75c4..801b40fea4e8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
@@ -211,10 +211,38 @@
status = "okay";
};
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
&cpu_l0 {
cpu-supply = <&vdd_cpu_lit_s0>;
};
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
&gmac0 {
phy-mode = "rgmii-id";
clock_in_out = "output";
diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
index 1086482f0479..64812e3bcb61 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
@@ -615,7 +615,7 @@
<0 0 0 2 &pcie1_intc 1>,
<0 0 0 3 &pcie1_intc 2>,
<0 0 0 4 &pcie1_intc 3>;
- linux,pci-domain = <0>;
+ linux,pci-domain = <1>;
max-link-speed = <2>;
num-ib-windows = <8>;
num-viewport = <8>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
index 7f874c77410c..6584d73660f6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
@@ -578,14 +578,14 @@
hdmim0_tx0_scl: hdmim0-tx0-scl {
rockchip,pins =
/* hdmim0_tx0_scl */
- <4 RK_PB7 5 &pcfg_pull_none>;
+ <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim0_tx0_sda: hdmim0-tx0-sda {
rockchip,pins =
/* hdmim0_tx0_sda */
- <4 RK_PC0 5 &pcfg_pull_none>;
+ <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -640,14 +640,14 @@
hdmim1_tx0_scl: hdmim1-tx0-scl {
rockchip,pins =
/* hdmim1_tx0_scl */
- <0 RK_PD5 11 &pcfg_pull_none>;
+ <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx0_sda: hdmim1-tx0-sda {
rockchip,pins =
/* hdmim1_tx0_sda */
- <0 RK_PD4 11 &pcfg_pull_none>;
+ <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -668,14 +668,14 @@
hdmim1_tx1_scl: hdmim1-tx1-scl {
rockchip,pins =
/* hdmim1_tx1_scl */
- <3 RK_PC6 5 &pcfg_pull_none>;
+ <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx1_sda: hdmim1-tx1-sda {
rockchip,pins =
/* hdmim1_tx1_sda */
- <3 RK_PC5 5 &pcfg_pull_none>;
+ <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
hdmim2_rx_cec: hdmim2-rx-cec {
@@ -709,14 +709,14 @@
hdmim2_tx0_scl: hdmim2-tx0-scl {
rockchip,pins =
/* hdmim2_tx0_scl */
- <3 RK_PC7 5 &pcfg_pull_none>;
+ <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx0_sda: hdmim2-tx0-sda {
rockchip,pins =
/* hdmim2_tx0_sda */
- <3 RK_PD0 5 &pcfg_pull_none>;
+ <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -730,14 +730,14 @@
hdmim2_tx1_scl: hdmim2-tx1-scl {
rockchip,pins =
/* hdmim2_tx1_scl */
- <1 RK_PA4 5 &pcfg_pull_none>;
+ <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx1_sda: hdmim2-tx1-sda {
rockchip,pins =
/* hdmim2_tx1_sda */
- <1 RK_PA3 5 &pcfg_pull_none>;
+ <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
index cc37f082adea..b07543315f87 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -321,6 +321,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
index 244c66faa161..fb48ddc04bcb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
@@ -160,14 +160,15 @@
hdmim0_tx1_scl: hdmim0-tx1-scl {
rockchip,pins =
/* hdmim0_tx1_scl */
- <2 RK_PB5 4 &pcfg_pull_none>;
+ <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>;
};
/omit-if-no-ref/
hdmim0_tx1_sda: hdmim0-tx1-sda {
rockchip,pins =
/* hdmim0_tx1_sda */
- <2 RK_PB4 4 &pcfg_pull_none>;
+ <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>;
+
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 8b717c4017a4..b2947b36fada 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -474,6 +474,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
index 5c645437b507..b0475b7c655a 100644
--- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
@@ -333,6 +333,41 @@
};
/omit-if-no-ref/
+ pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt {
+ bias-disable;
+ drive-strength = <1>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt {
+ bias-disable;
+ drive-strength = <2>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt {
+ bias-disable;
+ drive-strength = <3>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt {
+ bias-disable;
+ drive-strength = <4>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt {
+ bias-disable;
+ drive-strength = <5>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
pcfg_output_high: pcfg-output-high {
output-high;
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 897fc686e6a9..eb5c17d4c7ec 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y
CONFIG_BCM2835_MBOX=y
CONFIG_QCOM_APCS_IPC=y
CONFIG_MTK_ADSP_MBOX=m
+CONFIG_QCOM_CPUCP_MBOX=m
CONFIG_QCOM_IPCC=y
CONFIG_ROCKCHIP_IOMMU=y
CONFIG_TEGRA_IOMMU_SMMU=y
@@ -1573,6 +1574,7 @@ CONFIG_RESET_QCOM_AOSS=y
CONFIG_RESET_QCOM_PDC=m
CONFIG_RESET_RZG2L_USBPHY_CTRL=y
CONFIG_RESET_TI_SCI=y
+CONFIG_PHY_SNPS_EUSB2=m
CONFIG_PHY_XGENE=y
CONFIG_PHY_CAN_TRANSCEIVER=m
CONFIG_PHY_NXP_PTN3222=m
@@ -1597,7 +1599,6 @@ CONFIG_PHY_QCOM_EDP=m
CONFIG_PHY_QCOM_PCIE2=m
CONFIG_PHY_QCOM_QMP=m
CONFIG_PHY_QCOM_QUSB2=m
-CONFIG_PHY_QCOM_SNPS_EUSB2=m
CONFIG_PHY_QCOM_EUSB2_REPEATER=m
CONFIG_PHY_QCOM_M31_USB=m
CONFIG_PHY_QCOM_USB_HS=m
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ad63457a05c5..c56c21bb1eec 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -41,6 +41,11 @@
/*
* Save/restore interrupts.
*/
+ .macro save_and_disable_daif, flags
+ mrs \flags, daif
+ msr daifset, #0xf
+ .endm
+
.macro save_and_disable_irq, flags
mrs \flags, daif
msr daifset, #3
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index ba5df0df02a4..9f38340d24c2 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -287,17 +287,6 @@
.Lskip_fgt2_\@:
.endm
-.macro __init_el2_gcs
- mrs_s x1, SYS_ID_AA64PFR1_EL1
- ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
- cbz x1, .Lskip_gcs_\@
-
- /* Ensure GCS is not enabled when we start trying to do BLs */
- msr_s SYS_GCSCR_EL1, xzr
- msr_s SYS_GCSCRE0_EL1, xzr
-.Lskip_gcs_\@:
-.endm
-
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
@@ -319,7 +308,6 @@
__init_el2_cptr
__init_el2_fgt
__init_el2_fgt2
- __init_el2_gcs
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
@@ -371,6 +359,13 @@
msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
.Lskip_mpam_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2
+
+.Linit_gcs_\@:
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+
+.Lskip_gcs_\@:
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index bd020fc28aa9..0720898f563e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
-#define __build_check_all_or_none(r, bits) \
- BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
-
-#define __cpacr_to_cptr_clr(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((set) & CPACR_EL1_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((set) & CPACR_EL1_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((set) & CPACR_EL1_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((clr) & CPACR_EL1_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((clr) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((clr) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define __cpacr_to_cptr_set(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((clr) & CPACR_EL1_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((clr) & CPACR_EL1_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((clr) & CPACR_EL1_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((set) & CPACR_EL1_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((set) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((set) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define cpacr_clear_set(clr, set) \
- do { \
- BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
- BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \
- __build_check_all_or_none((clr), CPACR_EL1_FPEN); \
- __build_check_all_or_none((set), CPACR_EL1_FPEN); \
- __build_check_all_or_none((clr), CPACR_EL1_ZEN); \
- __build_check_all_or_none((set), CPACR_EL1_ZEN); \
- __build_check_all_or_none((clr), CPACR_EL1_SMEN); \
- __build_check_all_or_none((set), CPACR_EL1_SMEN); \
- \
- if (has_vhe() || has_hvhe()) \
- sysreg_clear_set(cpacr_el1, clr, set); \
- else \
- sysreg_clear_set(cptr_el2, \
- __cpacr_to_cptr_clr(clr, set), \
- __cpacr_to_cptr_set(clr, set));\
- } while (0)
-
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6ce2c5173482..3e41a880b062 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1107,14 +1107,36 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
+
+#define __vcpu_assign_sys_reg(v, r, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
+#define __vcpu_rmw_sys_reg(v, r, op, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ __v op (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
#define __vcpu_sys_reg(v,r) \
- (*({ \
+ ({ \
const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
- u64 *__r = __ctxt_sys_reg(ctxt, (r)); \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
- *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\
- __r; \
- }))
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ __v; \
+ })
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1267,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
})
/*
- * The couple of isb() below are there to guarantee the same behaviour
- * on VHE as on !VHE, where the eret to EL1 acts as a context
- * synchronization event.
+ * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
+ * where the eret to EL1 acts as a context synchronization event.
*/
#define kvm_call_hyp(f, ...) \
do { \
@@ -1287,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
\
if (has_vhe()) { \
ret = f(__VA_ARGS__); \
- isb(); \
} else { \
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
@@ -1460,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 88db8a0c0b37..192d86e1cc76 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2920b0a51403..a2faf0049dab 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idle.o patching.o pi/ \
- rsi.o
+ rsi.o jump_label.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b34044e20128..e151585c6cca 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
}
#endif
+#ifdef CONFIG_ARM64_SME
+static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sme() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
- HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 3857fd7ee8d4..62230d6dd919 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -15,6 +15,7 @@
#include <asm/efi.h>
#include <asm/stacktrace.h>
+#include <asm/vmap_stack.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
-l: if (!p) {
+ if (!IS_ENABLED(CONFIG_VMAP_STACK)) {
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return -ENOMEM;
+ }
+
+ p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
+ if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5ae2a34b50bd..30dcb719685b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -825,6 +825,7 @@ SYM_CODE_END(__bp_harden_el1_vectors)
*
*/
SYM_FUNC_START(cpu_switch_to)
+ save_and_disable_daif x11
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -848,6 +849,7 @@ SYM_FUNC_START(cpu_switch_to)
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
scs_load_current
+ restore_irq x11
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -874,6 +876,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
scs_save x16
@@ -888,8 +891,10 @@ SYM_FUNC_START(call_on_irq_stack)
/* Move to the new stack and call the function there */
add sp, x16, #IRQ_STACK_SIZE
+ restore_irq x9
blr x1
+ save_and_disable_daif x9
/*
* Restore the SP from the FP, and restore the FP and LR from the frame
* record.
@@ -897,6 +902,7 @@ SYM_FUNC_START(call_on_irq_stack)
mov sp, x29
ldp x29, x30, [sp], #16
scs_load_current
+ restore_irq x9
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a5ca15daeb8a..08b7042a2e2d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -288,7 +288,9 @@ static void flush_gcs(void)
if (!system_supports_gcs())
return;
- gcs_free(current);
+ current->thread.gcspr_el0 = 0;
+ current->thread.gcs_base = 0;
+ current->thread.gcs_size = 0;
current->thread.gcs_el0_mode = 0;
write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
write_sysreg_s(0, SYS_GCSPR_EL0);
@@ -671,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next)
current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (current->thread.por_el0 != next->thread.por_el0) {
write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
}
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index a360e52db02f..ee94b72bf8fb 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b3f6b56e733..21a795303568 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
static unsigned long stop_in_progress;
- cpumask_t mask;
+ static cpumask_t mask;
unsigned long timeout;
/*
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index fdbc8beec930..701ea10a63f1 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
break;
case TIMER_PTIMER:
- __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
break;
case TIMER_HVTIMER:
- __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
break;
case TIMER_HPTIMER:
- __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
break;
default:
WARN_ON(1);
@@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
break;
case TIMER_PTIMER:
- __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
break;
case TIMER_HVTIMER:
- __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
break;
case TIMER_HPTIMER:
- __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
break;
default:
WARN_ON(1);
@@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
if (vcpu_has_nv(vcpu)) {
struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
- offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index de2b4e9c9f9f..23dd3f3fc3eb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- ret = kvm_arch_vcpu_run_map_fp(vcpu);
- if (ret)
- return ret;
-
if (likely(vcpu_has_run_once(vcpu)))
return 0;
@@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard)
static void cpu_hyp_uninit(void *discard)
{
- if (__this_cpu_read(kvm_hyp_initialized)) {
+ if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) {
cpu_hyp_reset();
__this_cpu_write(kvm_hyp_initialized, 0);
}
@@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
+ if (per_cpu(kvm_hyp_initialized, cpu))
+ continue;
+
free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT);
- free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu])
+ continue;
if (free_sve) {
struct cpu_sve_state *sve_state;
@@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void)
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
}
+
+ free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
}
}
@@ -2764,7 +2768,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
struct kvm_kernel_irq_routing_entry *new)
{
- if (new->type != KVM_IRQ_ROUTING_MSI)
+ if (old->type != KVM_IRQ_ROUTING_MSI ||
+ new->type != KVM_IRQ_ROUTING_MSI)
return true;
return memcmp(&old->msi, &new->msi, sizeof(new->msi));
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 0e4c805e7e89..1a7dab333f55 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu)
void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
{
if (val & OSLAR_EL1_OSLK)
- __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK;
+ __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK);
else
- __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK;
+ __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK);
preempt_disable();
kvm_arch_vcpu_put(vcpu);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 7f6e43d25691..15e17aca1dec 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -15,32 +15,6 @@
#include <asm/sysreg.h>
/*
- * Called on entry to KVM_RUN unless this vcpu previously ran at least
- * once and the most recent prior KVM_RUN for this vcpu was called from
- * the same task as current (highly likely).
- *
- * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
- * such that on entering hyp the relevant parts of current are already
- * mapped.
- */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
-{
- struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- int ret;
-
- /* pKVM has its own tracking of the host fpsimd state. */
- if (is_protected_kvm_enabled())
- return 0;
-
- /* Make sure the host task fpsimd state is visible to hyp: */
- ret = kvm_share_hyp(fpsimd, fpsimd + 1);
- if (ret)
- return ret;
-
- return 0;
-}
-
-/*
* Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
* The actual loading is done by the FPSIMD access trap taken to hyp.
*
@@ -103,8 +77,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
- fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR);
- fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR);
+ fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR);
+ fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR);
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 424a5107cddb..6a2a899a344e 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
}
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
@@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
} else if (has_vhe()) {
write_sysreg_el1(val, SYS_SPSR);
} else {
- __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val);
}
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index bb9f2eecfb67..2ad57b117385 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
if (!vcpu_el1_is_32bit(vcpu))
return;
- __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
+ __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2));
}
static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
@@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
}
}
+static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA;
+
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
+ val |= CPTR_EL2_TSM;
+
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
+
+ write_sysreg(val, cptr_el2);
+}
+
+static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+ u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA;
+ u64 cptr;
+
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ }
+
+ if (!vcpu_has_nv(vcpu))
+ goto write;
+
+ /*
+ * The architecture is a bit crap (what a surprise): an EL2 guest
+ * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
+ * as they are RES0 in the guest's view. To work around it, trap the
+ * sucker using the very same bit it can't set...
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
+ val |= CPTR_EL2_TCPAC;
+
+ /*
+ * Layer the guest hypervisor's trap configuration on top of our own if
+ * we're in a nested context.
+ */
+ if (is_hyp_ctxt(vcpu))
+ goto write;
+
+ cptr = vcpu_sanitised_cptr_el2(vcpu);
+
+ /*
+ * Pay attention, there's some interesting detail here.
+ *
+ * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
+ * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
+ *
+ * - CPTR_EL2.xEN = x0, traps are enabled
+ * - CPTR_EL2.xEN = x1, traps are disabled
+ *
+ * In other words, bit[0] determines if guest accesses trap or not. In
+ * the interest of simplicity, clear the entire field if the guest
+ * hypervisor has traps enabled to dispel any illusion of something more
+ * complicated taking place.
+ */
+ if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_FPEN;
+ if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_ZEN;
+
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+ val |= cptr & CPACR_EL1_E0POE;
+
+ val |= cptr & CPTR_EL2_TCPAC;
+
+write:
+ write_sysreg(val, cpacr_el1);
+}
+
+static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ __activate_traps_fpsimd32(vcpu);
+
+ if (has_vhe() || has_hvhe())
+ __activate_cptr_traps_vhe(vcpu);
+ else
+ __activate_cptr_traps_nvhe(vcpu);
+}
+
+static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPTR_NVHE_EL2_RES1;
+
+ if (!cpus_have_final_cap(ARM64_SVE))
+ val |= CPTR_EL2_TZ;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
+
+ write_sysreg(val, cptr_el2);
+}
+
+static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPACR_EL1_FPEN;
+
+ if (cpus_have_final_cap(ARM64_SVE))
+ val |= CPACR_EL1_ZEN;
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN;
+
+ write_sysreg(val, cpacr_el1);
+}
+
+static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ if (has_vhe() || has_hvhe())
+ __deactivate_cptr_traps_vhe(vcpu);
+ else
+ __deactivate_cptr_traps_nvhe(vcpu);
+}
+
#define reg_to_fgt_masks(reg) \
({ \
struct fgt_masks *m; \
@@ -456,7 +586,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
*/
if (vcpu_has_sve(vcpu)) {
zcr_el1 = read_sysreg_el1(SYS_ZCR);
- __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
+ __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1);
/*
* The guest's state is always saved using the guest's max VL.
@@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
*/
if (system_supports_sve()) {
__hyp_sve_save_host();
-
- /* Re-enable SVE traps if not supported for the guest vcpu. */
- if (!vcpu_has_sve(vcpu))
- cpacr_clear_set(CPACR_EL1_ZEN, 0);
-
} else {
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
}
@@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
- cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- else
- cpacr_clear_set(0, CPACR_EL1_FPEN);
+ __deactivate_cptr_traps(vcpu);
isb();
/* Write out the host state if it's in the registers */
@@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
*host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
+ /*
+ * Re-enable traps necessary for the current state of the guest, e.g.
+ * those enabled by a guest hypervisor. The ERET to the guest will
+ * provide the necessary context synchronization.
+ */
+ __activate_cptr_traps(vcpu);
+
return true;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index b9cff893bbe0..4d0dbea4c56f 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
- __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
- __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
+ __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2));
+ __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2));
if (has_vhe() || kvm_debug_regs_in_use(vcpu))
- __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
+ __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2));
}
static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 8e8848de4d47..3206b2c07f82 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
{
- __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR));
/*
* On saving/restoring guest sve state, always use the maximum VL for
* the guest. The layout of the data when saving the sve state depends
@@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (!guest_owns_fp_regs())
return;
- cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ /*
+ * Traps have been disabled by __deactivate_cptr_traps(), but there
+ * hasn't necessarily been a context synchronization event yet.
+ */
isb();
if (vcpu_has_sve(vcpu))
@@ -79,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
if (has_fpmr)
- __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
+ __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR));
if (system_supports_sve())
__hyp_sve_restore_host();
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 95d7534c9679..8957734d6183 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
+ u64 granule;
s8 level;
int ret;
@@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
return -EPERM;
}
- do {
- u64 granule = kvm_granule_size(level);
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
+ if (!kvm_level_supports_block_mapping(level))
+ continue;
+ granule = kvm_granule_size(level);
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
- level++;
- } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
- !(kvm_level_supports_block_mapping(level) &&
- range_included(&cur, range)));
+ if (!range_included(&cur, range))
+ continue;
+ *range = cur;
+ return 0;
+ }
- *range = cur;
+ WARN_ON(1);
- return 0;
+ return -EINVAL;
}
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 73affe1333a4..0e752b515d0f 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks;
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
-
- if (!guest_owns_fp_regs())
- __activate_traps_fpsimd32(vcpu);
-
- if (has_hvhe()) {
- val |= CPACR_EL1_TTA;
-
- if (guest_owns_fp_regs()) {
- val |= CPACR_EL1_FPEN;
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- }
-
- write_sysreg(val, cpacr_el1);
- } else {
- val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
-
- /*
- * Always trap SME since it's not supported in KVM.
- * TSM is RES1 if SME isn't implemented.
- */
- val |= CPTR_EL2_TSM;
-
- if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPTR_EL2_TZ;
-
- if (!guest_owns_fp_regs())
- val |= CPTR_EL2_TFP;
-
- write_sysreg(val, cptr_el2);
- }
-}
-
-static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- if (has_hvhe()) {
- u64 val = CPACR_EL1_FPEN;
-
- if (cpus_have_final_cap(ARM64_SVE))
- val |= CPACR_EL1_ZEN;
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN;
-
- write_sysreg(val, cpacr_el1);
- } else {
- u64 val = CPTR_NVHE_EL2_RES1;
-
- if (!cpus_have_final_cap(ARM64_SVE))
- val |= CPTR_EL2_TZ;
- if (!cpus_have_final_cap(ARM64_SME))
- val |= CPTR_EL2_TSM;
-
- write_sysreg(val, cptr_el2);
- }
-}
-
static void __activate_traps(struct kvm_vcpu *vcpu)
{
___activate_traps(vcpu, vcpu->arch.hcr_el2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index c9b330dc2066..477f1580ffea 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE);
}
-static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 cptr;
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
- u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM;
-
- if (guest_owns_fp_regs()) {
- val |= CPACR_EL1_FPEN;
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- } else {
- __activate_traps_fpsimd32(vcpu);
- }
-
- if (!vcpu_has_nv(vcpu))
- goto write;
-
- /*
- * The architecture is a bit crap (what a surprise): an EL2 guest
- * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
- * as they are RES0 in the guest's view. To work around it, trap the
- * sucker using the very same bit it can't set...
- */
- if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
- val |= CPTR_EL2_TCPAC;
-
- /*
- * Layer the guest hypervisor's trap configuration on top of our own if
- * we're in a nested context.
- */
- if (is_hyp_ctxt(vcpu))
- goto write;
-
- cptr = vcpu_sanitised_cptr_el2(vcpu);
-
- /*
- * Pay attention, there's some interesting detail here.
- *
- * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
- * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
- *
- * - CPTR_EL2.xEN = x0, traps are enabled
- * - CPTR_EL2.xEN = x1, traps are disabled
- *
- * In other words, bit[0] determines if guest accesses trap or not. In
- * the interest of simplicity, clear the entire field if the guest
- * hypervisor has traps enabled to dispel any illusion of something more
- * complicated taking place.
- */
- if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
- val &= ~CPACR_EL1_FPEN;
- if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
- val &= ~CPACR_EL1_ZEN;
-
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
- val |= cptr & CPACR_EL1_E0POE;
-
- val |= cptr & CPTR_EL2_TCPAC;
-
-write:
- write_sysreg(val, cpacr_el1);
-}
-
-static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
-{
- u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN;
-
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN;
-
- write_sysreg(val, cpacr_el1);
-}
-
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -223,9 +142,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
val = read_sysreg_el0(SYS_CNTP_CVAL);
if (map.direct_ptimer == vcpu_ptimer(vcpu))
- __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val);
if (map.direct_ptimer == vcpu_hptimer(vcpu))
- __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val);
offset = read_sysreg_s(SYS_CNTPOFF_EL2);
@@ -639,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- sysreg_save_host_state_vhe(host_ctxt);
-
fpsimd_lazy_switch_to_guest(vcpu);
+ sysreg_save_host_state_vhe(host_ctxt);
+
/*
* Note that ARM erratum 1165522 requires us to configure both stage 1
* and stage 2 translation for the guest context before we clear
@@ -667,15 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
- fpsimd_lazy_switch_to_host(vcpu);
-
sysreg_restore_host_state_vhe(host_ctxt);
+ __debug_switch_to_host(vcpu);
+
+ /*
+ * Ensure that all system register writes above have taken effect
+ * before returning to the host. In VHE mode, CPTR traps for
+ * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be
+ * manipulated after the ISB.
+ */
+ isb();
+
+ fpsimd_lazy_switch_to_host(vcpu);
+
if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
- __debug_switch_to_host(vcpu);
-
return exit_code;
}
NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
@@ -705,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. We rely on the isb() in kvm_call_hyp*()
- * to make sure these changes take effect before running the host or
- * additional guests.
- */
return ret;
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 3814b0b2c937..73e4bc7fde9e 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -18,17 +18,17 @@
static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
{
/* These registers are common with EL1 */
- __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1);
- __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1);
-
- __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR);
- __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0);
- __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1);
- __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR);
- __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR);
- __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR);
- __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
- __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR);
+ __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1));
+ __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1));
+
+ __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR));
+ __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0));
+ __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1));
+ __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR));
+ __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR));
+ __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR));
+ __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR));
+ __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR));
/*
* In VHE mode those registers are compatible between EL1 and EL2,
@@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
* are always trapped, ensuring that the in-memory
* copy is always up-to-date. A small blessing...
*/
- __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR);
- __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0);
- __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1);
- __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR);
+ __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR));
+ __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0));
+ __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1));
+ __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR));
if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
- __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2);
+ __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2));
if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
- __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0);
- __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR);
+ __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0));
+ __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR));
}
if (ctxt_has_s1poe(&vcpu->arch.ctxt))
- __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR);
+ __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR));
}
/*
@@ -70,13 +70,13 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
*/
val = read_sysreg_el1(SYS_CNTKCTL);
val &= CNTKCTL_VALID_BITS;
- __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
- __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+ __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS);
+ __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val);
}
- __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1);
- __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR);
- __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR);
+ __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1));
+ __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR));
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR));
}
static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 4a53e4147fb0..dc1d26559bfa 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
}
}
+#define has_tgran_2(__r, __sz) \
+ ({ \
+ u64 _s1, _s2, _mmfr0 = __r; \
+ \
+ _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz##_2, _mmfr0); \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
+ _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
+ (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
+ })
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
*/
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
+ u64 orig_val = val;
+
switch (reg) {
case SYS_ID_AA64ISAR0_EL1:
/* Support everything but TME */
@@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
*/
switch (PAGE_SIZE) {
case SZ_4K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
+ if (has_tgran_2(orig_val, 4))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
+ if (has_tgran_2(orig_val, 16))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
+ if (has_tgran_2(orig_val, 64))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}
@@ -1757,7 +1777,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
out:
for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++)
- (void)__vcpu_sys_reg(vcpu, sr);
+ __vcpu_rmw_sys_reg(vcpu, sr, |=, 0);
return 0;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 25c29107f13f..b03dbda7f1ab 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
val |= lower_32_bits(val);
}
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
/* Recreate the perf event to reflect the updated sample_period */
kvm_pmu_create_perf_event(pmc);
@@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
- __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val;
+ __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
}
@@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
reg = counter_index_to_reg(pmc->idx);
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
kvm_pmu_release_perf_event(pmc);
}
@@ -503,14 +503,14 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
if (!kvm_pmc_is_64bit(pmc))
reg = lower_32_bits(reg);
- __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
+ __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg);
/* No overflow? move on */
if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
continue;
/* Mark overflow */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i));
if (kvm_pmu_counter_can_chain(pmc))
kvm_pmu_counter_increment(vcpu, BIT(i + 1),
@@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
perf_event->attr.sample_period = period;
perf_event->hw.sample_period = period;
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx));
if (kvm_pmu_counter_can_chain(pmc))
kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
@@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
/* The reset bits don't indicate any state, and shouldn't be saved. */
- __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
+ __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P)));
if (val & ARMV8_PMU_PMCR_C)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
@@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 reg;
reg = counter_index_to_evtreg(pmc->idx);
- __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
+ __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm)));
kvm_pmu_create_perf_event(pmc);
}
@@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
{
u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask);
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask);
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask);
kvm_pmu_reprogram_counter_mask(vcpu, mask);
}
@@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr)
u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2);
val &= ~MDCR_EL2_HPMN;
val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters);
- __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val);
}
}
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a6cf2888d150..c20bd6f21e60 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
* to reverse-translate virtual EL2 system registers for a
* non-VHE guest hypervisor.
*/
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
switch (reg) {
case CNTHCTL_EL2:
@@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
return;
memory_write:
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_assign_sys_reg(vcpu, reg, val);
}
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
@@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
if ((val ^ rd->val) & ~OSLSR_EL1_OSLK)
return -EINVAL;
- __vcpu_sys_reg(vcpu, rd->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, rd->reg, val);
return 0;
}
@@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
mask |= GENMASK(n - 1, 0);
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= mask;
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0));
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return 0;
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm));
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK;
+ __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
* The value of PMCR.N field is included when the
* vCPU register is read via kvm_vcpu_read_pmcr().
*/
- __vcpu_sys_reg(vcpu, r->reg) = pmcr;
+ __vcpu_assign_sys_reg(vcpu, r->reg, pmcr);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
if (p->is_write)
- __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval);
else
/* return PMSELR.SEL field */
p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
@@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va
{
u64 mask = kvm_pmu_accessible_counter_mask(vcpu);
- __vcpu_sys_reg(vcpu, r->reg) = val & mask;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val & mask);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return 0;
@@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = p->regval & mask;
if (r->Op2 & 0x1)
/* accessing PMCNTENSET_EL0 */
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val);
else
/* accessing PMCNTENCLR_EL0 */
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val);
kvm_pmu_reprogram_counter_mask(vcpu, val);
} else {
@@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val);
else
/* accessing PMINTENCLR_EL1 */
- __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val);
} else {
p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
}
@@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask));
else
/* accessing PMOVSCLR_EL0 */
- __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask));
} else {
p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
}
@@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (!vcpu_mode_priv(vcpu))
return undef_access(vcpu, p, r);
- __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
- p->regval & ARMV8_PMU_USERENR_MASK;
+ __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0,
+ (p->regval & ARMV8_PMU_USERENR_MASK));
} else {
p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
@@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
- __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return 0;
@@ -2213,7 +2213,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
if (kvm_has_mte(vcpu->kvm))
clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc);
- __vcpu_sys_reg(vcpu, r->reg) = clidr;
+ __vcpu_assign_sys_reg(vcpu, r->reg, clidr);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -2227,7 +2227,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
return -EINVAL;
- __vcpu_sys_reg(vcpu, rd->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, rd->reg, val);
return 0;
}
@@ -2404,7 +2404,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, SP_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, SP_EL1);
@@ -2428,7 +2428,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1);
@@ -2440,7 +2440,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write)
- __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval;
+ __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval);
else
p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1);
@@ -2454,7 +2454,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
val |= HCR_E2H;
- return __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
+
+ return __vcpu_sys_reg(vcpu, r->reg);
}
static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu,
@@ -2622,10 +2624,10 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
*/
if (hpmn > vcpu->kvm->arch.nr_pmu_counters) {
hpmn = vcpu->kvm->arch.nr_pmu_counters;
- u64_replace_bits(val, hpmn, MDCR_EL2_HPMN);
+ u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN);
}
- __vcpu_sys_reg(vcpu, MDCR_EL2) = val;
+ __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val);
/*
* Request a reload of the PMU to enable/disable the counters
@@ -2754,7 +2756,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters;
+ __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters);
return vcpu->kvm->arch.nr_pmu_counters;
}
@@ -4790,7 +4792,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
r->reset(vcpu, r);
if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS)
- (void)__vcpu_sys_reg(vcpu, r->reg);
+ __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0);
}
set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
@@ -5012,7 +5014,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
if (r->set_user) {
ret = (r->set_user)(vcpu, r, val);
} else {
- __vcpu_sys_reg(vcpu, r->reg) = val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
ret = 0;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index cc6338d38766..ef97d9fc67cc 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu,
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+ __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- __vcpu_sys_reg(vcpu, r->reg) = r->val;
+ __vcpu_assign_sys_reg(vcpu, r->reg, r->val);
return __vcpu_sys_reg(vcpu, r->reg);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index 4f6954c30674..679aafe77de2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -36,6 +36,11 @@ struct shadow_if {
static DEFINE_PER_CPU(struct shadow_if, shadow_if);
+static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
+{
+ return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
+}
+
/*
* Nesting GICv3 support
*
@@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
return reg;
}
+static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
+{
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW))
+ return lr;
+
+ /* We have the HW bit set, check for validity of pINTID */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ /* If there was no real mapping, nuke the HW bit */
+ if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
+ lr &= ~ICH_LR_HW;
+
+ /* Translate the virtual mapping to the real one, even if invalid */
+ if (irq) {
+ lr &= ~ICH_LR_PHYS_ID_MASK;
+ lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return lr;
+}
+
/*
* For LRs which have HW bit set such as timer interrupts, we modify them to
* have the host hardware interrupt number instead of the virtual one programmed
@@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
struct vgic_v3_cpu_if *s_cpu_if)
{
- unsigned long lr_map = 0;
- int index = 0;
+ struct shadow_if *shadow_if;
+
+ shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
+ shadow_if->lr_map = 0;
for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
- struct vgic_irq *irq;
if (!(lr & ICH_LR_STATE))
- lr = 0;
-
- if (!(lr & ICH_LR_HW))
- goto next;
-
- /* We have the HW bit set, check for validity of pINTID */
- irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
- if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
- /* There was no real mapping, so nuke the HW bit */
- lr &= ~ICH_LR_HW;
- if (irq)
- vgic_put_irq(vcpu->kvm, irq);
- goto next;
- }
-
- /* Translate the virtual mapping to the real one */
- lr &= ~ICH_LR_PHYS_ID_MASK;
- lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ continue;
- vgic_put_irq(vcpu->kvm, irq);
+ lr = translate_lr_pintid(vcpu, lr);
-next:
- s_cpu_if->vgic_lr[index] = lr;
- if (lr) {
- lr_map |= BIT(i);
- index++;
- }
+ s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
+ shadow_if->lr_map |= BIT(i);
}
- container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
- s_cpu_if->used_lrs = index;
+ s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
}
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
- int i, index = 0;
+ int i;
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
struct vgic_irq *irq;
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
- goto next;
+ continue;
/*
* If we had a HW lr programmed by the guest hypervisor, we
@@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
*/
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
- goto next;
+ continue;
- lr = __gic_v3_get_lr(index);
+ lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
if (!(lr & ICH_LR_STATE))
irq->active = false;
vgic_put_irq(vcpu->kvm, irq);
- next:
- index++;
}
}
@@ -356,25 +361,23 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
val &= ~ICH_HCR_EL2_EOIcount_MASK;
val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
- __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val;
- __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr;
+ __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
+ __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
for (i = 0; i < 4; i++) {
- __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i];
- __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i];
+ __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
+ __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
}
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
val &= ~ICH_LR_STATE;
- val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
+ val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
- __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
- s_cpu_if->vgic_lr[i] = 0;
+ __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
}
- shadow_if->lr_map = 0;
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
}
@@ -398,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
{
bool level;
- level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En;
- if (level)
- level &= vgic_v3_get_misr(vcpu);
+ level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
}
diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c
index 6a661cf04821..c9a74766785b 100644
--- a/arch/arm64/lib/crypto/poly1305-glue.c
+++ b/arch/arm64/lib/crypto/poly1305-glue.c
@@ -38,14 +38,14 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, 1);
+ poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else
- poly1305_blocks(state, src, len, 1);
+ poly1305_blocks(state, src, len, padbit);
}
EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ec0a337891dd..11eb8d1adc84 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
-static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
- unsigned int mm_flags)
+static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags)
{
- unsigned long iss2 = ESR_ELx_ISS2(esr);
-
if (!system_supports_poe())
return false;
- if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
- return true;
-
+ /*
+ * We do not check whether an Overlay fault has occurred because we
+ * cannot make a decision based solely on its value:
+ *
+ * - If Overlay is set, a fault did occur due to POE, but it may be
+ * spurious in those cases where we update POR_EL0 without ISB (e.g.
+ * on context-switch). We would then need to manually check POR_EL0
+ * against vma_pkey(vma), which is exactly what
+ * arch_vma_access_permitted() does.
+ *
+ * - If Overlay is not set, we may still need to report a pkey fault.
+ * This is the case if an access was made within a mapping but with no
+ * page mapped, and POR_EL0 forbids the access (according to
+ * vma_pkey()). Such access will result in a SIGSEGV regardless
+ * because core code checks arch_vma_access_permitted(), but in order
+ * to report the correct error code - SEGV_PKUERR - we must handle
+ * that case here.
+ */
return !arch_vma_access_permitted(vma,
mm_flags & FAULT_FLAG_WRITE,
mm_flags & FAULT_FLAG_INSTRUCTION,
@@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
vma_end_read(vma);
fault = 0;
@@ -679,7 +691,7 @@ retry:
goto bad_area;
}
- if (fault_from_pkey(esr, vma, mm_flags)) {
+ if (fault_from_pkey(vma, mm_flags)) {
pkey = vma_pkey(vma);
mmap_read_unlock(mm);
fault = 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8fcf59ba39db..00ab1d648db6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
next = addr;
end = addr + PUD_SIZE;
do {
- pmd_free_pte_page(pmdp, next);
+ if (pmd_present(pmdp_get(pmdp)))
+ pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 80d470aa469d..54dccfd6aa11 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -518,7 +518,6 @@ alternative_else_nop_endif
msr REG_PIR_EL1, x0
orr tcr2, tcr2, TCR2_EL1_PIE
- msr REG_TCR2_EL1, x0
.Lskip_indirection:
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index b8378431aeff..5a394be09c35 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 9fbdfdbc539f..fbf24d1d1ca6 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
(((type & 0x1f) << 1) | \
((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 80ddb5edb845..b04d2cef935f 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -10,5 +10,4 @@ generic-y += user.h
generic-y += ioctl.h
generic-y += mmzone.h
generic-y += statfs.h
-generic-y += param.h
generic-y += text-patching.h
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index fe198b473f84..e739dbc6329d 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -18,12 +18,12 @@
/*
* This gives the physical RAM offset.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef PHYS_OFFSET
#define PHYS_OFFSET _UL(0)
#endif
extern unsigned long vm_map_base;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifndef IO_BASE
#define IO_BASE CSR_DMW0_BASE
@@ -66,7 +66,7 @@ extern unsigned long vm_map_base;
#define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000)
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ATYPE_
#define _ATYPE32_
#define _ATYPE64_
@@ -85,7 +85,7 @@ extern unsigned long vm_map_base;
/*
* 32/64-bit LoongArch address spaces
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ACAST32_
#define _ACAST64_
#else
diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h
index ff3d10ac393f..7dc29bd9b2f0 100644
--- a/arch/loongarch/include/asm/alternative-asm.h
+++ b/arch/loongarch/include/asm/alternative-asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ALTERNATIVE_ASM_H
#define _ASM_ALTERNATIVE_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/asm.h>
@@ -77,6 +77,6 @@
.previous
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ALTERNATIVE_ASM_H */
diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h
index cee7b29785ab..b5bae21fb3c8 100644
--- a/arch/loongarch/include/asm/alternative.h
+++ b/arch/loongarch/include/asm/alternative.h
@@ -2,7 +2,7 @@
#ifndef _ASM_ALTERNATIVE_H
#define _ASM_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stddef.h>
@@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
(asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory"))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_ALTERNATIVE_H */
diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index df05005f2b80..d60bdf2e6377 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -7,7 +7,7 @@
#define EX_TYPE_UACCESS_ERR_ZERO 2
#define EX_TYPE_BPF 3
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
.pushsection __ex_table, "a"; \
@@ -22,7 +22,7 @@
__ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
.endm
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#include <linux/bits.h>
#include <linux/stringify.h>
@@ -60,6 +60,6 @@
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h
index f591b3245def..f018d26fc995 100644
--- a/arch/loongarch/include/asm/asm.h
+++ b/arch/loongarch/include/asm/asm.h
@@ -110,7 +110,7 @@
#define LONG_SRA srai.w
#define LONG_SRAV sra.w
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define LONG .word
#endif
#define LONGSIZE 4
@@ -131,7 +131,7 @@
#define LONG_SRA srai.d
#define LONG_SRAV sra.d
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define LONG .dword
#endif
#define LONGSIZE 8
@@ -158,7 +158,7 @@
#define PTR_SCALESHIFT 2
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define PTR .word
#endif
#define PTRSIZE 4
@@ -181,7 +181,7 @@
#define PTR_SCALESHIFT 3
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define PTR .dword
#endif
#define PTRSIZE 8
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 98cf4d7b4b0a..dfb982fe8701 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -46,7 +46,7 @@
#define PRID_PRODUCT_MASK 0x0fff
-#if !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLER__)
enum cpu_type_enum {
CPU_UNKNOWN,
@@ -55,7 +55,7 @@ enum cpu_type_enum {
CPU_LAST
};
-#endif /* !__ASSEMBLY */
+#endif /* !__ASSEMBLER__ */
/*
* ISA Level encodings
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 6e0a99763a9a..f4caaf764f9e 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -14,7 +14,7 @@
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef CONFIG_DYNAMIC_FTRACE
@@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h
index 996038da806d..af95b941f48b 100644
--- a/arch/loongarch/include/asm/gpr-num.h
+++ b/arch/loongarch/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.equ .L__gpr_num_zero, 0
.irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
@@ -25,7 +25,7 @@
.equ .L__gpr_num_$s\num, 23 + \num
.endr
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __DEFINE_ASM_GPR_NUMS \
" .equ .L__gpr_num_zero, 0\n" \
@@ -47,6 +47,6 @@
" .equ .L__gpr_num_$s\\num, 23 + \\num\n" \
" .endr\n" \
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h
index 003172b8406b..620163628a7f 100644
--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -5,7 +5,7 @@
#ifndef _ASM_IRQFLAGS_H
#define _ASM_IRQFLAGS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/stringify.h>
@@ -80,6 +80,6 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
-#endif /* #ifndef __ASSEMBLY__ */
+#endif /* #ifndef __ASSEMBLER__ */
#endif /* _ASM_IRQFLAGS_H */
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h
index 8a924bd69d19..4000c7603d8e 100644
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -7,7 +7,7 @@
#ifndef __ASM_JUMP_LABEL_H
#define __ASM_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -50,5 +50,5 @@ l_yes:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
index 7f52bd31b9d4..62f139a9c87d 100644
--- a/arch/loongarch/include/asm/kasan.h
+++ b/arch/loongarch/include/asm/kasan.h
@@ -2,7 +2,7 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
#include <linux/mmzone.h>
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index d84dac88a584..a0994d226eff 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -9,15 +9,15 @@
#include <linux/linkage.h>
#include <linux/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <larchintrin.h>
/* CPUCFG */
#define read_cpucfg(reg) __cpucfg(reg)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* LoongArch Registers */
#define REG_ZERO 0x0
@@ -53,7 +53,7 @@
#define REG_S7 0x1e
#define REG_S8 0x1f
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* Bit fields for CPUCFG registers */
#define LOONGARCH_CPUCFG0 0x0
@@ -171,7 +171,7 @@
* SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* CSR */
#define csr_read32(reg) __csrrd_w(reg)
@@ -187,7 +187,7 @@
#define iocsr_write32(val, reg) __iocsrwr_w(val, reg)
#define iocsr_write64(val, reg) __iocsrwr_d(val, reg)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* CSR register number */
@@ -1195,7 +1195,7 @@
#define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00
#define IOCSR_EXTIOI_VECTOR_NUM 256
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static __always_inline u64 drdtime(void)
{
@@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx)
#define clear_csr_estat(val) \
csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* Generic EntryLo bit definitions */
#define ENTRYLO_V (_ULCAST_(1) << 0)
diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
index caf1f71a1057..d5fa98d1d177 100644
--- a/arch/loongarch/include/asm/orc_types.h
+++ b/arch/loongarch/include/asm/orc_types.h
@@ -34,7 +34,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -53,6 +53,6 @@ struct orc_entry {
unsigned int type:3;
unsigned int signal:1;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 7368f12b7cb1..a3aaf34fba16 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -15,7 +15,7 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
#include <linux/pfn.h>
@@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr);
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PAGE_H */
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 45bfc65a0c9f..7bbfb04a54cc 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -92,7 +92,7 @@
#define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
_PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC)
@@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
return __pgprot(prot);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PGTABLE_BITS_H */
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index a3f17914dbab..f2aeff544cee 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -55,7 +55,7 @@
#define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
#include <linux/mmzone.h>
@@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
#define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd)
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_PGTABLE_H */
diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h
index 1672262a5e2e..0b168cdaae9a 100644
--- a/arch/loongarch/include/asm/prefetch.h
+++ b/arch/loongarch/include/asm/prefetch.h
@@ -8,7 +8,7 @@
#define Pref_Load 0
#define Pref_Store 8
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro __pref hint addr
#ifdef CONFIG_CPU_HAS_PREFETCH
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index ad0bd234a0f1..3a47f52959a8 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -39,7 +39,7 @@ int loongson_cpu_disable(void);
void loongson_cpu_die(unsigned int cpu);
#endif
-static inline void plat_smp_setup(void)
+static inline void __init plat_smp_setup(void)
{
loongson_smp_setup();
}
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 4f5a9441754e..9dfa2ef00816 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -10,7 +10,7 @@
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
@@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void)
register unsigned long current_stack_pointer __asm__("$sp");
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* thread information allocation */
#define THREAD_SIZE SZ_16K
diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h
index baf15a0dcf8b..0edd731f3d6a 100644
--- a/arch/loongarch/include/asm/types.h
+++ b/arch/loongarch/include/asm/types.h
@@ -8,7 +8,7 @@
#include <asm-generic/int-ll64.h>
#include <uapi/asm/types.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define _ULCAST_
#define _U64CAST_
#else
diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
index 2c68bc72736c..16c7f7e465a0 100644
--- a/arch/loongarch/include/asm/unwind_hints.h
+++ b/arch/loongarch/include/asm/unwind_hints.h
@@ -5,7 +5,7 @@
#include <linux/objtool.h>
#include <asm/orc_types.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro UNWIND_HINT_UNDEFINED
UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
@@ -23,7 +23,7 @@
UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
.endm
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#define UNWIND_HINT_SAVE \
UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
@@ -31,6 +31,6 @@
#define UNWIND_HINT_RESTORE \
UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h
index 322d0a5f1c84..395ec223bcbe 100644
--- a/arch/loongarch/include/asm/vdso/arch_data.h
+++ b/arch/loongarch/include/asm/vdso/arch_data.h
@@ -7,7 +7,7 @@
#ifndef _VDSO_ARCH_DATA_H
#define _VDSO_ARCH_DATA_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/vdso.h>
@@ -20,6 +20,6 @@ struct vdso_arch_data {
struct vdso_pcpu_data pdata[NR_CPUS];
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h
index a81724b69f29..2ff05003c6e7 100644
--- a/arch/loongarch/include/asm/vdso/getrandom.h
+++ b/arch/loongarch/include/asm/vdso/getrandom.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_GETRANDOM_H
#define __ASM_VDSO_GETRANDOM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
#include <asm/vdso/vdso.h>
@@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
return ret;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h
index f15503e3336c..dcafabca9bb6 100644
--- a/arch/loongarch/include/asm/vdso/gettimeofday.h
+++ b/arch/loongarch/include/asm/vdso/gettimeofday.h
@@ -7,7 +7,7 @@
#ifndef __ASM_VDSO_GETTIMEOFDAY_H
#define __ASM_VDSO_GETTIMEOFDAY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
#include <asm/vdso/vdso.h>
@@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void)
}
#define __arch_vdso_hres_capable loongarch_vdso_hres_capable
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h
index ef5770b343a0..1e255373b0b8 100644
--- a/arch/loongarch/include/asm/vdso/processor.h
+++ b/arch/loongarch/include/asm/vdso/processor.h
@@ -5,10 +5,10 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define cpu_relax() barrier()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h
index 50c65fb29daf..04bd2d452876 100644
--- a/arch/loongarch/include/asm/vdso/vdso.h
+++ b/arch/loongarch/include/asm/vdso/vdso.h
@@ -7,7 +7,7 @@
#ifndef _ASM_VDSO_VDSO_H
#define _ASM_VDSO_VDSO_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/page.h>
@@ -16,6 +16,6 @@
#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h
index 1140b54b4bc8..558eb9dfda52 100644
--- a/arch/loongarch/include/asm/vdso/vsyscall.h
+++ b/arch/loongarch/include/asm/vdso/vsyscall.h
@@ -2,13 +2,13 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <vdso/datapage.h>
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index a54cd6fd3796..1367ca759468 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/efi-bgrt.h>
+#include <linux/export.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/memblock.h>
diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c
index 4ad13847e962..0e0c766df1e3 100644
--- a/arch/loongarch/kernel/alternative.c
+++ b/arch/loongarch/kernel/alternative.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/alternative.h>
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index de21e72759ee..860a3bc030e0 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -144,6 +144,18 @@ void __init efi_init(void)
if (efi_memmap_init_early(&data) < 0)
panic("Unable to map EFI memory map.\n");
+ /*
+ * Reserve the physical memory region occupied by the EFI
+ * memory map table (header + descriptors). This is crucial
+ * for kdump, as the kdump kernel relies on this original
+ * memmap passed by the bootloader. Without reservation,
+ * this region could be overwritten by the primary kernel.
+ * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that
+ * critical boot services code/data regions like this are preserved.
+ */
+ memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size);
+ set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags);
+
early_memunmap(tbl, sizeof(*tbl));
}
diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c
index 0fa81ced28dc..3d98c6aa00db 100644
--- a/arch/loongarch/kernel/elf.c
+++ b/arch/loongarch/kernel/elf.c
@@ -6,7 +6,6 @@
#include <linux/binfmts.h>
#include <linux/elf.h>
-#include <linux/export.h>
#include <linux/sched.h>
#include <asm/cpu-features.h>
diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c
index 4c476904227f..141b49bd989c 100644
--- a/arch/loongarch/kernel/kfpu.c
+++ b/arch/loongarch/kernel/kfpu.c
@@ -4,6 +4,7 @@
*/
#include <linux/cpu.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <asm/fpu.h>
#include <asm/smp.h>
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index e5a39bbad078..b1b51f920b23 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/irq_work.h>
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index bc75a3a69fc8..367906b10f81 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev
return 0;
}
-static unsigned long __init get_loops_per_jiffy(void)
+static unsigned long get_loops_per_jiffy(void)
{
unsigned long lpj = (unsigned long)const_clock_freq;
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 47fc2de6d150..3d9be6ca7ec5 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/module.h>
+#include <linux/export.h>
#include <linux/extable.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c
index 98379b7d4147..08d7951b2f60 100644
--- a/arch/loongarch/kernel/unwind_guess.c
+++ b/arch/loongarch/kernel/unwind_guess.c
@@ -3,6 +3,7 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <asm/unwind.h>
+#include <linux/export.h>
unsigned long unwind_get_return_address(struct unwind_state *state)
{
diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
index d623935a7547..0005be49b056 100644
--- a/arch/loongarch/kernel/unwind_orc.c
+++ b/arch/loongarch/kernel/unwind_orc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/objtool.h>
+#include <linux/export.h>
#include <linux/module.h>
+#include <linux/objtool.h>
#include <linux/sort.h>
#include <asm/exception.h>
#include <asm/orc_header.h>
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 929ae240280a..729e775bd40d 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -3,6 +3,7 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
#include <linux/cpumask.h>
+#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index f39929d7bf8a..a75f865d6fb9 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -9,7 +9,8 @@
static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
{
- int ipnum, cpu, irq_index, irq_mask, irq;
+ int ipnum, cpu, cpuid, irq_index, irq_mask, irq;
+ struct kvm_vcpu *vcpu;
for (irq = 0; irq < EIOINTC_IRQS; irq++) {
ipnum = s->ipmap.reg_u8[irq / 32];
@@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
irq_index = irq / 32;
irq_mask = BIT(irq & 0x1f);
- cpu = s->coremap.reg_u8[irq];
+ cpuid = s->coremap.reg_u8[irq];
+ vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
+ if (!vcpu)
+ continue;
+
+ cpu = vcpu->vcpu_id;
if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask))
set_bit(irq, s->sw_coreisr[cpu][ipnum]);
else
@@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level)
}
static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s,
- int irq, void *pvalue, u32 len, bool notify)
+ int irq, u64 val, u32 len, bool notify)
{
- int i, cpu;
- u64 val = *(u64 *)pvalue;
+ int i, cpu, cpuid;
+ struct kvm_vcpu *vcpu;
for (i = 0; i < len; i++) {
- cpu = val & 0xff;
+ cpuid = val & 0xff;
val = val >> 8;
if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) {
- cpu = ffs(cpu) - 1;
- cpu = (cpu >= 4) ? 0 : cpu;
+ cpuid = ffs(cpuid) - 1;
+ cpuid = (cpuid >= 4) ? 0 : cpuid;
}
+ vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
+ if (!vcpu)
+ continue;
+
+ cpu = vcpu->vcpu_id;
if (s->sw_coremap[irq + i] == cpu)
continue;
@@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (addr & (len - 1)) {
+ kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len);
+ return -EINVAL;
+ }
+
vcpu->kvm->stat.eiointc_read_exits++;
spin_lock_irqsave(&eiointc->lock, flags);
switch (len) {
@@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq;
s->coremap.reg_u8[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
break;
case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
index = (offset - EIOINTC_ENABLE_START) >> 1;
- old_data = s->enable.reg_u32[index];
+ old_data = s->enable.reg_u16[index];
s->enable.reg_u16[index] = data;
/*
* 1: enable irq.
* update irq when isr is set.
*/
data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index];
- index = index << 1;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 1;
s->coremap.reg_u16[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
* update irq when isr is set.
*/
data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index];
- index = index << 2;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 2;
s->coremap.reg_u32[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
* update irq when isr is set.
*/
data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index];
- index = index << 3;
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1);
}
/*
* 0: disable irq.
@@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index];
for (i = 0; i < sizeof(data); i++) {
u8 mask = (data >> (i * 8)) & 0xff;
- eiointc_enable_irq(vcpu, s, index, mask, 0);
+ eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0);
}
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
@@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
irq = offset - EIOINTC_COREMAP_START;
index = irq >> 3;
s->coremap.reg_u64[index] = data;
- eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ eiointc_update_sw_coremap(s, irq, data, sizeof(data), true);
break;
default:
ret = -EINVAL;
@@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+ if (addr & (len - 1)) {
+ kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len);
+ return -EINVAL;
+ }
+
vcpu->kvm->stat.eiointc_write_exits++;
spin_lock_irqsave(&eiointc->lock, flags);
switch (len) {
@@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
int ret = 0;
unsigned long flags;
unsigned long type = (unsigned long)attr->attr;
- u32 i, start_irq;
+ u32 i, start_irq, val;
void __user *data;
struct loongarch_eiointc *s = dev->kvm->arch.eiointc;
@@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
spin_lock_irqsave(&s->lock, flags);
switch (type) {
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
- if (copy_from_user(&s->num_cpu, data, 4))
+ if (copy_from_user(&val, data, 4))
ret = -EFAULT;
+ else {
+ if (val >= EIOINTC_ROUTE_MAX_VCPUS)
+ ret = -EINVAL;
+ else
+ s->num_cpu = val;
+ }
break;
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE:
if (copy_from_user(&s->features, data, 4))
@@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
for (i = 0; i < (EIOINTC_IRQS / 4); i++) {
start_irq = i * 4;
eiointc_update_sw_coremap(s, start_irq,
- (void *)&s->coremap.reg_u32[i], sizeof(u32), false);
+ s->coremap.reg_u32[i], sizeof(u32), false);
}
break;
default:
@@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
bool is_write)
{
- int addr, cpuid, offset, ret = 0;
+ int addr, cpu, offset, ret = 0;
unsigned long flags;
void *p = NULL;
void __user *data;
@@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
s = dev->kvm->arch.eiointc;
addr = attr->attr;
- cpuid = addr >> 16;
+ cpu = addr >> 16;
addr &= 0xffff;
data = (void __user *)attr->addr;
switch (addr) {
@@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
p = &s->isr.reg_u32[offset];
break;
case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ if (cpu >= s->num_cpu)
+ return -EINVAL;
+
offset = (addr - EIOINTC_COREISR_START) / 4;
- p = &s->coreisr.reg_u32[cpuid][offset];
+ p = &s->coreisr.reg_u32[cpu][offset];
break;
case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
offset = (addr - EIOINTC_COREMAP_START) / 4;
@@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev,
data = (void __user *)attr->addr;
switch (addr) {
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU:
+ if (is_write)
+ return ret;
+
p = &s->num_cpu;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE:
+ if (is_write)
+ return ret;
+
p = &s->features;
break;
case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE:
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index b37cd8537b45..db22c2ec55e2 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -11,6 +11,7 @@
#include <asm/cpu-features.h>
#include <linux/crc32.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/unaligned.h>
diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c
index df309ae4045d..bcc9d01d8c41 100644
--- a/arch/loongarch/lib/csum.c
+++ b/arch/loongarch/lib/csum.c
@@ -2,6 +2,7 @@
// Copyright (C) 2019-2020 Arm Ltd.
#include <linux/compiler.h>
+#include <linux/export.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c
index 70ca73019811..df949a3d0f34 100644
--- a/arch/loongarch/mm/ioremap.c
+++ b/arch/loongarch/mm/ioremap.c
@@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
}
-void *early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size)
{
return early_memremap(phys_addr, size);
}
-void *early_memremap_prot(resource_size_t phys_addr, unsigned long size,
+void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
return early_memremap(phys_addr, size);
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c
index 2726639150bc..5bc9627a6cf9 100644
--- a/arch/loongarch/pci/pci.c
+++ b/arch/loongarch/pci/pci.c
@@ -3,7 +3,6 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/kernel.h>
-#include <linux/export.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/types.h>
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index f5c596b211d4..d79fef609194 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) (__pte((x).val))
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 040ac3bad713..14fee64d3e60 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index 73745dc0ec0e..858cbe936f5b 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index b1bb2c65dd04..bae1abfa6f6b 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4852b005a72d..ae73ecf4c41a 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#endif
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
}
@@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte;
}
#else
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index e98578e27e26..844dce55569f 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 71bfb8c8c482..5bd6463bd514 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 80f5e2a28413..1a86a4370b29 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index c4e4d2a9b460..b7eac4e56019 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -4,7 +4,7 @@
/ {
#size-cells = <0x02>;
#address-cells = <0x02>;
- model-name = "microwatt";
+ model = "microwatt";
compatible = "microwatt-soc";
aliases {
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts
index e09b37d7489d..a89cb3139ca8 100644
--- a/arch/powerpc/boot/dts/mpc8315erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -6,6 +6,7 @@
*/
/dts-v1/;
+#include <dt-bindings/interrupt-controller/irq.h>
/ {
compatible = "fsl,mpc8315erdb";
@@ -358,6 +359,15 @@
interrupt-parent = <&ipic>;
fsl,mpc8313-wakeup-timer = <&gtm1>;
};
+
+ gpio: gpio-controller@c00 {
+ compatible = "fsl,mpc8314-gpio";
+ reg = <0xc00 0x100>;
+ interrupts = <74 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&ipic>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
};
pci0: pci@e0008500 {
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 42c3af90d1f0..92d21c6faf1e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6ed93e290c2f..a2ddcbb3fcb9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
}
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 8d1f0b7062eb..7d6b9e5b286e 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 02897f4b0dbf..b891910fce8a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -183,7 +183,7 @@
/*
* Used to name C functions called from asm
*/
-#ifdef CONFIG_PPC_KERNEL_PCREL
+#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL)
#define CFUNC(name) name@notoc
#else
#define CFUNC(name) name
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 2c145da3b774..b5211e413829 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -23,10 +23,10 @@
#define TCSETSW _IOW('t', 21, struct termios)
#define TCSETSF _IOW('t', 22, struct termios)
-#define TCGETA _IOR('t', 23, struct termio)
-#define TCSETA _IOW('t', 24, struct termio)
-#define TCSETAW _IOW('t', 25, struct termio)
-#define TCSETAF _IOW('t', 28, struct termio)
+#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */
+#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */
+#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */
+#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */
#define TCSBRK _IO('t', 29)
#define TCXONC _IO('t', 30)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 83fe99861eb1..ca7f7bb2b478 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe)
/* Invalid PE ? */
if (!pe)
return -ENODEV;
+ else
+ ret = eeh_ops->configure_bridge(pe);
return ret;
}
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index e8824f933326..8834dfe9d727 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR
ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS))
CC32FLAGS := -m32
-CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc
+CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel
ifdef CONFIG_CC_IS_CLANG
# This flag is supported by clang for 64-bit but not 32-bit so it will cause
# an unused command line flag warning for this file.
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index 0b6365d85d11..dc6f75d3ac6e 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
return -EINVAL;
}
+ /*
+ * Map complete page to the paste address. So the user
+ * space should pass 0ULL to the offset parameter.
+ */
+ if (vma->vm_pgoff) {
+ pr_debug("Page offset unsupported to map paste address\n");
+ return -EINVAL;
+ }
+
/* Ensure instance has an open send window */
if (!txwin) {
pr_err("No send window open?\n");
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 9f9e4b871627..7ec60290abe6 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -143,42 +143,13 @@ spufs_evict_inode(struct inode *inode)
put_spu_gang(ei->i_gang);
}
-static void spufs_prune_dir(struct dentry *dir)
-{
- struct dentry *dentry;
- struct hlist_node *n;
-
- inode_lock(d_inode(dir));
- hlist_for_each_entry_safe(dentry, n, &dir->d_children, d_sib) {
- spin_lock(&dentry->d_lock);
- if (simple_positive(dentry)) {
- dget_dlock(dentry);
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
- simple_unlink(d_inode(dir), dentry);
- /* XXX: what was dcache_lock protecting here? Other
- * filesystems (IB, configfs) release dcache_lock
- * before unlink */
- dput(dentry);
- } else {
- spin_unlock(&dentry->d_lock);
- }
- }
- shrink_dcache_parent(dir);
- inode_unlock(d_inode(dir));
-}
-
/* Caller must hold parent->i_mutex */
-static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+static void spufs_rmdir(struct inode *parent, struct dentry *dir)
{
- /* remove all entries */
- int res;
- spufs_prune_dir(dir);
- d_drop(dir);
- res = simple_rmdir(parent, dir);
- /* We have to give up the mm_struct */
- spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
- return res;
+ struct spu_context *ctx = SPUFS_I(d_inode(dir))->i_ctx;
+
+ locked_recursive_removal(dir, NULL);
+ spu_forget(ctx);
}
static int spufs_fill_dir(struct dentry *dir,
@@ -222,15 +193,13 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
{
struct inode *parent;
struct dentry *dir;
- int ret;
dir = file->f_path.dentry;
parent = d_inode(dir->d_parent);
inode_lock_nested(parent, I_MUTEX_PARENT);
- ret = spufs_rmdir(parent, dir);
+ spufs_rmdir(parent, dir);
inode_unlock(parent);
- WARN_ON(ret);
unuse_gang(dir->d_parent);
return dcache_dir_close(inode, file);
@@ -288,11 +257,11 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
mode, ctx);
+ inode_unlock(inode);
+
if (ret)
spufs_rmdir(dir, dentry);
- inode_unlock(inode);
-
return ret;
}
@@ -475,7 +444,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
ret = spufs_context_open(&path);
if (ret < 0)
- WARN_ON(spufs_rmdir(inode, dentry));
+ spufs_rmdir(inode, dentry);
out_aff_unlock:
if (affinity)
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 4ac9808e55a4..2ea30b343354 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct memtrace_entry *ent = filp->private_data;
+ unsigned long ent_nrpages = ent->size >> PAGE_SHIFT;
+ unsigned long vma_nrpages = vma_pages(vma);
- if (ent->size < vma->vm_end - vma->vm_start)
+ /* The requested page offset should be within object's page count */
+ if (vma->vm_pgoff >= ent_nrpages)
return -EINVAL;
- if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+ /* The requested mapping range should remain within the bounds */
+ if (vma_nrpages > ent_nrpages - vma->vm_pgoff)
return -EINVAL;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 36061f4732b7..1c5544401530 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -63,7 +63,8 @@ config RISCV
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
- select ARCH_SUPPORTS_CFI_CLANG
+ # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2
+ select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_HUGETLBFS if MMU
@@ -97,6 +98,7 @@ config RISCV
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
+ select DYNAMIC_FTRACE if FUNCTION_TRACER
select EDAC_SUPPORT
select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
@@ -161,7 +163,7 @@ config RISCV
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_FUNCTION_GRAPH_FREGS
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if MMU
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 3b643b9efc07..5acce285e56e 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
extern struct kvm_device_ops kvm_riscv_aia_device_ops;
+bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu);
+void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
@@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
void __iomem **hgei_va, phys_addr_t *hgei_pa);
void kvm_riscv_aia_free_hgei(int cpu, int hgei);
-void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
void kvm_riscv_aia_enable(void);
void kvm_riscv_aia_disable(void);
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 85cfebc32e4c..bcbf8b1ec115 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -306,6 +306,9 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a11816bbf9e7..5bd5aae60d53 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -1028,7 +1028,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -1075,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
*/
#ifdef CONFIG_64BIT
#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
-#define TASK_SIZE_MAX LONG_MAX
#ifdef CONFIG_COMPAT
#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h
index 451fd76b8811..d766e2b9e6df 100644
--- a/arch/riscv/include/asm/runtime-const.h
+++ b/arch/riscv/include/asm/runtime-const.h
@@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u
addi_insn_mask &= 0x07fff;
}
- if (lower_immediate & 0x00000fff) {
+ if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) {
/* replace upper 12 bits of addi with lower 12 bits of val */
addi_insn &= addi_insn_mask;
addi_insn |= (lower_immediate & 0x00000fff) << 20;
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index d472da4450e6..b88a6218b7f2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -127,6 +127,7 @@ do { \
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_user_8(x, ptr, label) \
+do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
asm_goto_output( \
@@ -141,7 +142,7 @@ do { \
: : label); \
(x) = (__typeof__(x))((__typeof__((x) - (x)))( \
(((u64)__hi << 32) | __lo))); \
-
+} while (0)
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
#define __get_user_8(x, ptr, label) \
do { \
@@ -310,8 +311,8 @@ do { \
do { \
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
!IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \
- __inttype(x) val = (__inttype(x))x; \
- if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(val), sizeof(*__gu_ptr))) \
+ __inttype(x) ___val = (__inttype(x))x; \
+ if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \
goto label; \
break; \
} \
diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h
index 8dc92441702a..c6d66895c1f5 100644
--- a/arch/riscv/include/asm/vdso/getrandom.h
+++ b/arch/riscv/include/asm/vdso/getrandom.h
@@ -18,7 +18,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
register unsigned int flags asm("a2") = _flags;
asm volatile ("ecall\n"
- : "+r" (ret)
+ : "=r" (ret)
: "r" (nr), "r" (buffer), "r" (len), "r" (flags)
: "memory");
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 45c9b426fcc5..b61786d43c20 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -205,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VSB_V_V0T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V8T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V16T0
"add t0, t0, t4\n\t"
- THEAD_VSB_V_V0T0
+ THEAD_VSB_V_V24T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
@@ -241,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VLB_V_V0T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V8T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V16T0
"add t0, t0, t4\n\t"
- THEAD_VLB_V_V0T0
+ THEAD_VLB_V_V24T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index e6fbaaf54956..87d655944803 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi;
/*
* Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
- * be invoked from multiple threads in parallel. Define a per cpu data
+ * be invoked from multiple threads in parallel. Define an array of boot data
* to handle that.
*/
-static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+static struct sbi_hart_boot_data boot_data[NR_CPUS];
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
@@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
unsigned long hartid = cpuid_to_hartid_map(cpuid);
unsigned long hsm_data;
- struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+ struct sbi_hart_boot_data *bdata = &boot_data[cpuid];
/* Make sure tidle is updated */
smp_mb();
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 4c6c24380cfd..8d18d6727f0f 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -14,6 +14,18 @@
#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE
+void ftrace_arch_code_modify_prepare(void)
+ __acquires(&text_mutex)
+{
+ mutex_lock(&text_mutex);
+}
+
+void ftrace_arch_code_modify_post_process(void)
+ __releases(&text_mutex)
+{
+ mutex_unlock(&text_mutex);
+}
+
unsigned long ftrace_call_adjust(unsigned long addr)
{
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
@@ -29,10 +41,8 @@ unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
void arch_ftrace_update_code(int command)
{
- mutex_lock(&text_mutex);
command |= FTRACE_MAY_SLEEP;
ftrace_modify_all_code(command);
- mutex_unlock(&text_mutex);
flush_icache_all();
}
@@ -149,6 +159,8 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
unsigned int nops[2], offset;
int ret;
+ guard(mutex)(&text_mutex);
+
ret = ftrace_rec_set_nop_ops(rec);
if (ret)
return ret;
@@ -157,9 +169,7 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
nops[0] = to_auipc_t0(offset);
nops[1] = RISCV_INSN_NOP4;
- mutex_lock(&text_mutex);
ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE);
- mutex_unlock(&text_mutex);
return ret;
}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index f7c9a1caa83e..14888e5ea19a 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata")
#endif
;
unsigned long boot_cpu_hartid;
+EXPORT_SYMBOL_GPL(boot_cpu_hartid);
/*
* Place kernel memory regions on the resource tree so that
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 9c83848797a7..80230de167de 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/irqflags.h>
#include <linux/randomize_kstack.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -151,7 +152,9 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
{ \
if (user_mode(regs)) { \
irqentry_enter_from_user_mode(regs); \
+ local_irq_enable(); \
do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
+ local_irq_disable(); \
irqentry_exit_to_user_mode(regs); \
} else { \
irqentry_state_t state = irqentry_nmi_enter(regs); \
@@ -173,17 +176,14 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
if (user_mode(regs)) {
irqentry_enter_from_user_mode(regs);
-
local_irq_enable();
handled = riscv_v_first_use_handler(regs);
-
- local_irq_disable();
-
if (!handled)
do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
"Oops - illegal instruction");
+ local_irq_disable();
irqentry_exit_to_user_mode(regs);
} else {
irqentry_state_t state = irqentry_nmi_enter(regs);
@@ -308,9 +308,11 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
{
if (user_mode(regs)) {
irqentry_enter_from_user_mode(regs);
+ local_irq_enable();
handle_break(regs);
+ local_irq_disable();
irqentry_exit_to_user_mode(regs);
} else {
irqentry_state_t state = irqentry_nmi_enter(regs);
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index dd8e4af6583f..f760e4fcc052 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -454,14 +454,14 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs)
val.data_u64 = 0;
if (user_mode(regs)) {
- if (copy_from_user_nofault(&val, (u8 __user *)addr, len))
+ if (copy_from_user(&val, (u8 __user *)addr, len))
return -1;
} else {
memcpy(&val, (u8 *)addr, len);
}
if (!fp)
- SET_RD(insn, regs, val.data_ulong << shift >> shift);
+ SET_RD(insn, regs, (long)(val.data_ulong << shift) >> shift);
else if (len == 8)
set_f64_rd(insn, regs, val.data_u64);
else
@@ -555,7 +555,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs)
return -EOPNOTSUPP;
if (user_mode(regs)) {
- if (copy_to_user_nofault((u8 __user *)addr, &val, len))
+ if (copy_to_user((u8 __user *)addr, &val, len))
return -1;
} else {
memcpy((u8 *)addr, &val, len);
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 7c15b0f4ee3b..c29ef12a63bb 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -30,7 +30,7 @@ SECTIONS
*(.data .data.* .gnu.linkonce.d.*)
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
- }
+ } :text
.note : { *(.note.*) } :text :note
diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c
index 1411337dc1e6..8fcf67e8c07f 100644
--- a/arch/riscv/kernel/vendor_extensions/sifive.c
+++ b/arch/riscv/kernel/vendor_extensions/sifive.c
@@ -8,7 +8,7 @@
#include <linux/types.h>
/* All SiFive vendor extensions supported in Linux */
-const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = {
+static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = {
__RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF),
__RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ),
__RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD),
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 19afd1f23537..dad318185660 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei;
unsigned int kvm_riscv_aia_max_ids;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
-static int aia_find_hgei(struct kvm_vcpu *owner)
-{
- int i, hgei;
- unsigned long flags;
- struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
-
- raw_spin_lock_irqsave(&hgctrl->lock, flags);
-
- hgei = -1;
- for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
- if (hgctrl->owners[i] == owner) {
- hgei = i;
- break;
- }
- }
-
- raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
-
- put_cpu_ptr(&aia_hgei);
- return hgei;
-}
-
static inline unsigned long aia_hvictl_value(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
- int hgei;
unsigned long seip;
if (!kvm_riscv_aia_available())
@@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
- hgei = aia_find_hgei(vcpu);
- if (hgei > 0)
- return !!(ncsr_read(CSR_HGEIP) & BIT(hgei));
-
- return false;
+ return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu);
}
void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu)
@@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
#endif
}
+
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu);
}
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
@@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
if (!kvm_riscv_aia_available())
return;
+ if (kvm_riscv_aia_initialized(vcpu->kvm))
+ kvm_riscv_vcpu_aia_imsic_put(vcpu);
+
if (kvm_riscv_nacl_available()) {
nsh = nacl_shmem();
csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT);
@@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei)
raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
}
-void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
-{
- int hgei;
-
- if (!kvm_riscv_aia_available())
- return;
-
- hgei = aia_find_hgei(owner);
- if (hgei > 0) {
- if (enable)
- csr_set(CSR_HGEIE, BIT(hgei));
- else
- csr_clear(CSR_HGEIE, BIT(hgei));
- }
-}
-
static irqreturn_t hgei_interrupt(int irq, void *dev_id)
{
int i;
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index 29ef9c2133a9..2ff865943ebb 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu,
imsic_swfile_extirq_update(vcpu);
}
+bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * The IMSIC SW-file directly injects interrupt via hvip so
+ * only check for interrupt when IMSIC VS-file is being used.
+ */
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ /*
+ * No need to explicitly clear HGEIE CSR bits because the
+ * hgei interrupt handler (aka hgei_interrupt()) will always
+ * clear it for us.
+ */
+}
+
+void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ unsigned long flags;
+
+ if (!kvm_vcpu_is_blocking(vcpu))
+ return;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ if (imsic->vsfile_cpu > -1)
+ csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei));
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+}
+
void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
* producers to the new IMSIC VS-file.
*/
+ /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */
+ csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei));
+
/* Zero-out new IMSIC VS-file */
imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e0a01af426ff..0462863206ca 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvm_riscv_vcpu_timer_pending(vcpu);
}
-void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, true);
-}
-
-void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- kvm_riscv_aia_wakeon_hgei(vcpu, false);
-}
-
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 5fbf3f94f1e8..b17fad091bab 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
else
kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
@@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
hbase, hmask, cp->a4);
else
@@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
/*
* Until nested virtualization is implemented, the
- * SBI HFENCE calls should be treated as NOPs
+ * SBI HFENCE calls should return not supported
+ * hence fallthrough.
*/
- break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
}
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index ff672fa71fcc..85a7262115e1 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
/*
* The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync()
* upon every VM exit so no need to save here.
+ *
+ * If VS-timer expires when no VCPU running on a host CPU then
+ * WFI executed by such host CPU will be effective NOP resulting
+ * in no power savings. This is because as-per RISC-V Privileged
+ * specificaiton: "WFI is also required to resume execution for
+ * locally enabled interrupts pending at any privilege level,
+ * regardless of the global interrupt enable at each privilege
+ * level."
+ *
+ * To address the above issue, vstimecmp CSR must be set to -1UL
+ * over here when VCPU is scheduled-out or exits to user space.
*/
+ csr_write(CSR_VSTIMECMP, -1UL);
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMPH, -1UL);
+#endif
+
/* timer should be enabled for the remaining operations */
if (unlikely(!t->init_done))
return;
diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh
index baeb2e7b2290..742993e6a8cb 100755
--- a/arch/riscv/tools/relocs_check.sh
+++ b/arch/riscv/tools/relocs_check.sh
@@ -14,7 +14,9 @@ bad_relocs=$(
${srctree}/scripts/relocs_check.sh "$@" |
# These relocations are okay
# R_RISCV_RELATIVE
- grep -F -w -v 'R_RISCV_RELATIVE'
+ # R_RISCV_NONE
+ grep -F -w -v 'R_RISCV_RELATIVE
+R_RISCV_NONE'
)
if [ -z "$bad_relocs" ]; then
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index d229cbd2ba22..9b0d55be1239 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc)
sctx->state[4] = SHA1_H4;
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA_1;
+ sctx->first_message_part = 0;
return 0;
}
@@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
sctx->count = ictx->count;
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
sctx->func = CPACF_KIMD_SHA_1;
+ sctx->first_message_part = 0;
return 0;
}
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 33711a29618c..6cbbf5e8555f 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc)
ctx->count = 0;
ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
+ ctx->first_message_part = 0;
return 0;
}
@@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in)
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
sctx->func = CPACF_KIMD_SHA_512;
+ sctx->first_message_part = 0;
return 0;
}
@@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc)
ctx->count = 0;
ctx->sha512.count_hi = 0;
ctx->func = CPACF_KIMD_SHA_512;
+ ctx->first_message_part = 0;
return 0;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1c661ac62ce8..6d8bc27a366e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 62c0ab4a4b9d..0905fa99a31e 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r
addr = kernel_stack_pointer(regs) + n * sizeof(long);
if (!regs_within_kernel_stack(regs, addr))
return 0;
- return READ_ONCE_NOCHECK(addr);
+ return READ_ONCE_NOCHECK(*(unsigned long *)addr);
}
/**
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index c7f8313ba449..0c9a35782c83 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -566,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
memcpy(plt, &bpf_plt, sizeof(*plt));
plt->ret = ret;
- plt->target = target;
+ /*
+ * (target == NULL) implies that the branch to this PLT entry was
+ * patched and became a no-op. However, some CPU could have jumped
+ * to this PLT entry before patching and may be still executing it.
+ *
+ * Since the intention in this case is to make the PLT entry a no-op,
+ * make the target point to the return label instead of NULL.
+ */
+ plt->target = target ?: ret;
}
/*
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
goto out_unlock;
}
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
@@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
@@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 71b18741cc11..5f51af18997b 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */
#define _PAGE_SWP_EXCLUSIVE _PAGE_USER
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte.pte_low & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 1454ebe91539..7c199c003ffe 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & SRMMU_SWP_EXCLUSIVE;
}
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 4af03e3c161b..669cd02469a1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index c5e6545f6fcf..8e8a8bf518b6 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out)
*fd_out = fds[1];
err = os_set_fd_block(*fd_out, 0);
- err = os_set_fd_block(kernel_fd, 0);
+ err |= os_set_fd_block(kernel_fd, 0);
if (err) {
printk("start_io_thread - failed to set nonblocking I/O.\n");
goto out_close;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index f292e0b4ff8b..9bbbddfe866b 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1625,35 +1625,19 @@ static void vector_eth_configure(
device->dev = dev;
- *vp = ((struct vector_private)
- {
- .list = LIST_HEAD_INIT(vp->list),
- .dev = dev,
- .unit = n,
- .options = get_transport_options(def),
- .rx_irq = 0,
- .tx_irq = 0,
- .parsed = def,
- .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
- /* TODO - we need to calculate headroom so that ip header
- * is 16 byte aligned all the time
- */
- .headroom = get_headroom(def),
- .form_header = NULL,
- .verify_header = NULL,
- .header_rxbuffer = NULL,
- .header_txbuffer = NULL,
- .header_size = 0,
- .rx_header_size = 0,
- .rexmit_scheduled = false,
- .opened = false,
- .transport_data = NULL,
- .in_write_poll = false,
- .coalesce = 2,
- .req_size = get_req_size(def),
- .in_error = false,
- .bpf = NULL
- });
+ INIT_LIST_HEAD(&vp->list);
+ vp->dev = dev;
+ vp->unit = n;
+ vp->options = get_transport_options(def);
+ vp->parsed = def;
+ vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER;
+ /*
+ * TODO - we need to calculate headroom so that ip header
+ * is 16 byte aligned all the time
+ */
+ vp->headroom = get_headroom(def);
+ vp->coalesce = 2;
+ vp->req_size = get_req_size(def);
dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
INIT_WORK(&vp->reset_tx, vector_reset_tx);
diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c
index b51fc9888ae1..13b971a2bd43 100644
--- a/arch/um/drivers/vfio_kern.c
+++ b/arch/um/drivers/vfio_kern.c
@@ -570,6 +570,17 @@ static void uml_vfio_release_device(struct uml_vfio_device *dev)
kfree(dev);
}
+static struct uml_vfio_device *uml_vfio_find_device(const char *device)
+{
+ struct uml_vfio_device *dev;
+
+ list_for_each_entry(dev, &uml_vfio_devices, list) {
+ if (!strcmp(dev->name, device))
+ return dev;
+ }
+ return NULL;
+}
+
static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
{
struct uml_vfio_device *dev;
@@ -582,6 +593,9 @@ static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *k
uml_vfio_container.fd = fd;
}
+ if (uml_vfio_find_device(device))
+ return -EEXIST;
+
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 04ab3b653a48..b6810db24ca4 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += mcs_spinlock.h
generic-y += mmiowb.h
generic-y += module.h
generic-y += module.lds.h
-generic-y += param.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index ca2a519d53ab..24fdea6f88c3 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 340e5468980e..8bed9030ad47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -89,7 +89,7 @@ config X86
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
- select ARCH_HAS_EXECMEM_ROX if X86_64
+ select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -147,7 +147,7 @@ config X86
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_GENERAL_HUGETLB
- select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_HUGE_PMD_SHARE if X86_64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
@@ -2695,6 +2695,15 @@ config MITIGATION_ITS
disabled, mitigation cannot be enabled via cmdline.
See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst>
+config MITIGATION_TSA
+ bool "Mitigate Transient Scheduler Attacks"
+ depends on CPU_SUP_AMD
+ default y
+ help
+ Enable mitigation for Transient Scheduler Attacks. TSA is a hardware
+ security vulnerability on AMD CPUs which can lead to forwarding of
+ invalid info to subsequent instructions and thus can affect their
+ timing and thereby cause a leakage.
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
index db3255b979bd..342d79f0ab6a 100644
--- a/arch/x86/coco/sev/Makefile
+++ b/arch/x86/coco/sev/Makefile
@@ -5,5 +5,6 @@ obj-y += core.o sev-nmi.o vc-handle.o
# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
UBSAN_SANITIZE_sev-nmi.o := n
-# GCC may fail to respect __no_sanitize_address when inlining
+# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining
KASAN_SANITIZE_sev-nmi.o := n
+KCSAN_SANITIZE_sev-nmi.o := n
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index b6db4e0b936b..7543a8b52c67 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = {
*/
static u64 snp_tsc_scale __ro_after_init;
static u64 snp_tsc_offset __ro_after_init;
-static u64 snp_tsc_freq_khz __ro_after_init;
+static unsigned long snp_tsc_freq_khz __ro_after_init;
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
@@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void)
void __init snp_secure_tsc_init(void)
{
- unsigned long long tsc_freq_mhz;
+ struct snp_secrets_page *secrets;
+ unsigned long tsc_freq_mhz;
+ void *mem;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
return;
+ mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
+ if (!mem) {
+ pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
+ }
+
+ secrets = (__force struct snp_secrets_page *)mem;
+
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
- snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
+
+ /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
+ tsc_freq_mhz &= GENMASK_ULL(17, 0);
+
+ snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
+
+ early_memunmap(mem, PAGE_SIZE);
}
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 175958b02f2b..8e9a0cc20a4a 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb);
/*
* Define the VERW operand that is disguised as entry code so that
- * it can be referenced with KPTI enabled. This ensure VERW can be
+ * it can be referenced with KPTI enabled. This ensures VERW can be
* used late in exit-to-user path after page tables are switched.
*/
.pushsection .entry.text, "ax"
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_START_NOALIGN(mds_verw_sel)
+SYM_CODE_START_NOALIGN(x86_verw_sel)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
.word __KERNEL_DS
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_END(mds_verw_sel);
+SYM_CODE_END(x86_verw_sel);
/* For KVM */
-EXPORT_SYMBOL_GPL(mds_verw_sel);
+EXPORT_SYMBOL_GPL(x86_verw_sel);
.popsection
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 741b229f0718..c2fb729c270e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event)
* If the PEBS counters snapshotting is enabled,
* the topdown event is available in PEBS records.
*/
- if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
+ if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
static_call(intel_pmu_update_topdown_event)(event, NULL);
else
intel_pmu_drain_pebs_buffer();
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 3d1d3547095a..afdbda2dd7b7 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -34,6 +34,7 @@
#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include <linux/highmem.h>
+#include <linux/export.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 31f0d29cbc5e..090f5ac9f492 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
static int hv_map_interrupt(union hv_device_id device_id, bool level,
@@ -46,7 +47,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
if (nr_bank < 0) {
local_irq_restore(flags);
pr_err("%s: unable to generate VP set\n", __func__);
- return EINVAL;
+ return -EINVAL;
}
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
@@ -66,7 +67,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
if (!hv_result_success(status))
hv_status_err(status, "\n");
- return hv_result(status);
+ return hv_result_to_errno(status);
}
static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
@@ -88,7 +89,10 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
local_irq_restore(flags);
- return hv_result(status);
+ if (!hv_result_success(status))
+ hv_status_err(status, "\n");
+
+ return hv_result_to_errno(status);
}
#ifdef CONFIG_PCI_MSI
@@ -169,13 +173,34 @@ static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
return dev_id;
}
-static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
- struct hv_interrupt_entry *entry)
+/**
+ * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
+ * @data: Describes the IRQ
+ * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
+ *
+ * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry)
{
- union hv_device_id device_id = hv_build_pci_dev_id(dev);
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct hv_interrupt_entry dummy;
+ union hv_device_id device_id;
+ struct msi_desc *msidesc;
+ struct pci_dev *dev;
+ int cpu;
- return hv_map_interrupt(device_id, false, cpu, vector, entry);
+ msidesc = irq_data_get_msi_desc(data);
+ dev = msi_desc_to_pci_dev(msidesc);
+ device_id = hv_build_pci_dev_id(dev);
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
+
+ return hv_map_interrupt(device_id, false, cpu, cfg->vector,
+ out_entry ? out_entry : &dummy);
}
+EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
{
@@ -188,13 +213,11 @@ static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
+ struct hv_interrupt_entry *stored_entry;
+ struct irq_cfg *cfg = irqd_cfg(data);
struct msi_desc *msidesc;
struct pci_dev *dev;
- struct hv_interrupt_entry out_entry, *stored_entry;
- struct irq_cfg *cfg = irqd_cfg(data);
- const cpumask_t *affinity;
- int cpu;
- u64 status;
+ int ret;
msidesc = irq_data_get_msi_desc(data);
dev = msi_desc_to_pci_dev(msidesc);
@@ -204,9 +227,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return;
}
- affinity = irq_data_get_effective_affinity_mask(data);
- cpu = cpumask_first_and(affinity, cpu_online_mask);
-
if (data->chip_data) {
/*
* This interrupt is already mapped. Let's unmap first.
@@ -219,14 +239,12 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
stored_entry = data->chip_data;
data->chip_data = NULL;
- status = hv_unmap_msi_interrupt(dev, stored_entry);
+ ret = hv_unmap_msi_interrupt(dev, stored_entry);
kfree(stored_entry);
- if (status != HV_STATUS_SUCCESS) {
- hv_status_debug(status, "failed to unmap\n");
+ if (ret)
return;
- }
}
stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
@@ -235,15 +253,14 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return;
}
- status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
- if (status != HV_STATUS_SUCCESS) {
+ ret = hv_map_msi_interrupt(data, stored_entry);
+ if (ret) {
kfree(stored_entry);
return;
}
- *stored_entry = out_entry;
data->chip_data = stored_entry;
- entry_to_msi_msg(&out_entry, msg);
+ entry_to_msi_msg(data->chip_data, msg);
return;
}
@@ -257,7 +274,6 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
{
struct hv_interrupt_entry old_entry;
struct msi_msg msg;
- u64 status;
if (!irqd->chip_data) {
pr_debug("%s: no chip data\n!", __func__);
@@ -270,10 +286,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
kfree(irqd->chip_data);
irqd->chip_data = NULL;
- status = hv_unmap_msi_interrupt(dev, &old_entry);
-
- if (status != HV_STATUS_SUCCESS)
- hv_status_err(status, "\n");
+ (void)hv_unmap_msi_interrupt(dev, &old_entry);
}
static void hv_msi_free_irq(struct irq_domain *domain,
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index e93a2f488ff7..ade6c665c97e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/export.h>
#include <asm/svm.h>
#include <asm/sev.h>
#include <asm/io.h>
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index 1083dc8646f9..8ccbb7c4fc27 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
+#include <linux/export.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..286d509f9363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -456,6 +456,7 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
@@ -487,6 +488,9 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
/*
* BUG word(s)
@@ -542,5 +546,5 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 363110e6b2e3..a2c1f2d24b64 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -9,6 +9,14 @@
#include <asm/cpufeature.h>
#include <asm/msr.h>
+/*
+ * Define bits that are always set to 1 in DR7, only bit 10 is
+ * architecturally reserved to '1'.
+ *
+ * This is also the init/reset value for DR7.
+ */
+#define DR7_FIXED_1 0x00000400
+
DECLARE_PER_CPU(unsigned long, cpu_dr7);
#ifndef CONFIG_PARAVIRT_XXL
@@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
static inline void hw_breakpoint_disable(void)
{
- /* Zero the control register for HW Breakpoint */
- set_debugreg(0UL, 7);
+ /* Reset the control register for HW Breakpoint */
+ set_debugreg(DR7_FIXED_1, 7);
/* Zero-out the individual HW breakpoint address registers */
set_debugreg(0UL, 0);
@@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void)
return 0;
get_debugreg(dr7, 7);
- dr7 &= ~0x400; /* architecturally set bit */
+
+ /* Architecturally set bit */
+ dr7 &= ~DR7_FIXED_1;
if (dr7)
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
+
/*
* Ensure the compiler doesn't lower the above statements into
* the critical section; disabling breakpoints late would not
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 9a9b21b78905..b30e5474c18e 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void)
static __always_inline void native_safe_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static __always_inline void native_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4a391929cdb..f7af967aa16f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
+#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
@@ -249,7 +250,6 @@ enum x86_intercept_stage;
#define DR7_BP_EN_MASK 0x000000ff
#define DR7_GE (1 << 9)
#define DR7_GD (1 << 13)
-#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
#define KVM_GUESTDBG_VALID_MASK \
@@ -700,8 +700,13 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
- /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+ /*
+ * Preallocated buffers for handling hypercalls that pass sparse vCPU
+ * sets (for high vCPU counts, they're too large to comfortably fit on
+ * the stack).
+ */
u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct hv_vp_assist_page vp_assist_page;
@@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index e988bac0a4a1..3c2de4ce3b10 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,12 +5,20 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+struct its_array {
+#ifdef CONFIG_MITIGATION_ITS
+ void **pages;
+ int num;
+#endif
+};
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+ struct its_array its_pages;
};
#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e1752ba47e67..abc4659f5809 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
-/* Hypercall to the L0 hypervisor */
-static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
-{
- return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
-}
-
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
@@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return _hv_do_fast_hypercall8(control, input1);
}
-static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall8(control, input1);
-}
-
/* Fast hypercall with 16 bytes of input */
static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
@@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return _hv_do_fast_hypercall16(control, input1, input2);
}
-static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall16(control, input1, input2);
-}
-
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {}
struct irq_domain *hv_create_pci_msi_domain(void);
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
struct hv_interrupt_entry *entry);
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b7dded3c8113..5cfb5d74dd5f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -628,6 +628,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index dd2b129b0418..6ca6516c7492 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
static __always_inline void __mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
-
/*
* Use the instruction mnemonic with implicit operands, as the LLVM
* assembler fails to assemble the mnemonic with explicit operands:
@@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx)
*/
static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
- /* No MDS buffer clear as this is AMD/HYGON only */
+ /* No need for TSA buffer clearing on AMD */
/* "mwaitx %eax, %ebx, %ecx" */
asm volatile(".byte 0x0f, 0x01, 0xfb"
@@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
*/
static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
@@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx)
*/
static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
const void *addr = &current_thread_info()->flags;
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
__monitor(addr, 0, 0);
- if (!need_resched()) {
- if (ecx & 1) {
- __mwait(eax, ecx);
- } else {
- __sti_mwait(eax, ecx);
- raw_local_irq_disable();
- }
+ if (need_resched())
+ goto out;
+
+ if (ecx & 1) {
+ __mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
}
}
+
+out:
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 20d754b98f3f..10f261678749 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -302,25 +302,31 @@
.endm
/*
- * Macro to execute VERW instruction that mitigate transient data sampling
- * attacks such as MDS. On affected systems a microcode update overloaded VERW
- * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
- *
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
-.macro CLEAR_CPU_BUFFERS
+.macro __CLEAR_CPU_BUFFERS feature
#ifdef CONFIG_X86_64
- ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
- ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature
#endif
.endm
+#define CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF
+
+#define VM_CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM
+
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
@@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear);
-extern u16 mds_verw_sel;
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static __always_inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 774430c3abff..97954c936c54 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 58e028d42e41..a631f7d7c0c0 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -223,6 +223,18 @@ struct snp_tsc_info_resp {
u8 rsvd2[100];
} __packed;
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000)
+
struct snp_guest_req {
void *req_buf;
size_t req_sz;
@@ -282,8 +294,11 @@ struct snp_secrets_page {
u8 svsm_guest_vmpl;
u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */
+ u32 tsc_factor;
+
/* Remainder of page */
- u8 rsvd4[3744];
+ u8 rsvd4[3740];
} __packed;
struct snp_msg_desc {
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 2f3820342598..8bc074c8d7c6 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -72,6 +72,7 @@
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
/*
* TDG.VP.VMCALL Status Codes (returned in R10)
@@ -80,6 +81,7 @@
#define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL
#define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL
#define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL
+#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL
/*
* Bitmasks of exposed registers (with VMM).
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+#endif
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 8b19294600c4..7ddef3a69866 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -106,7 +106,7 @@ void tdx_init(void);
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
-static inline u64 sc_retry(sc_func_t func, u64 fn,
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
struct tdx_module_args *args)
{
int retry = RDRAND_RETRY_LOOPS;
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 0007ba077c0c..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..0f15d683817d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -965,7 +965,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ecfe7b497cad..ea1d984166cd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -116,6 +116,24 @@ static struct module *its_mod;
#endif
static void *its_page;
static unsigned int its_offset;
+struct its_array its_pages;
+
+static void *__its_alloc(struct its_array *pages)
+{
+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
+ if (!page)
+ return NULL;
+
+ void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ pages->pages = tmp;
+ pages->pages[pages->num++] = page;
+
+ return no_free_ptr(page);
+}
/* Initialize a thunk with the "jmp *reg; int3" instructions. */
static void *its_init_thunk(void *thunk, int reg)
@@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg)
return thunk + offset;
}
+static void its_pages_protect(struct its_array *pages)
+{
+ for (int i = 0; i < pages->num; i++) {
+ void *page = pages->pages[i];
+ execmem_restore_rox(page, PAGE_SIZE);
+ }
+}
+
+static void its_fini_core(void)
+{
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ its_pages_protect(&its_pages);
+ kfree(its_pages.pages);
+}
+
#ifdef CONFIG_MODULES
void its_init_mod(struct module *mod)
{
@@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod)
its_page = NULL;
mutex_unlock(&text_mutex);
- for (int i = 0; i < mod->its_num_pages; i++) {
- void *page = mod->its_page_array[i];
- execmem_restore_rox(page, PAGE_SIZE);
- }
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ its_pages_protect(&mod->arch.its_pages);
}
void its_free_mod(struct module *mod)
@@ -184,37 +215,33 @@ void its_free_mod(struct module *mod)
if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
return;
- for (int i = 0; i < mod->its_num_pages; i++) {
- void *page = mod->its_page_array[i];
+ for (int i = 0; i < mod->arch.its_pages.num; i++) {
+ void *page = mod->arch.its_pages.pages[i];
execmem_free(page);
}
- kfree(mod->its_page_array);
+ kfree(mod->arch.its_pages.pages);
}
#endif /* CONFIG_MODULES */
static void *its_alloc(void)
{
- void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
-
- if (!page)
- return NULL;
+ struct its_array *pages = &its_pages;
+ void *page;
#ifdef CONFIG_MODULES
- if (its_mod) {
- void *tmp = krealloc(its_mod->its_page_array,
- (its_mod->its_num_pages+1) * sizeof(void *),
- GFP_KERNEL);
- if (!tmp)
- return NULL;
+ if (its_mod)
+ pages = &its_mod->arch.its_pages;
+#endif
- its_mod->its_page_array = tmp;
- its_mod->its_page_array[its_mod->its_num_pages++] = page;
+ page = __its_alloc(pages);
+ if (!page)
+ return NULL;
- execmem_make_temp_rw(page, PAGE_SIZE);
- }
-#endif /* CONFIG_MODULES */
+ execmem_make_temp_rw(page, PAGE_SIZE);
+ if (pages == &its_pages)
+ set_memory_x((unsigned long)page, 1);
- return no_free_ptr(page);
+ return page;
}
static void *its_allocate_thunk(int reg)
@@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg)
return thunk;
}
-#endif
+#else
+static inline void its_fini_core(void) {}
+#endif /* CONFIG_MITIGATION_ITS */
/*
* Nomenclature for variable names to simplify and clarify this code and ease
@@ -2338,6 +2367,8 @@ void __init alternative_instructions(void)
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
+ its_fini_core();
+
/*
* Adjust all CALL instructions to point to func()-10, including
* those in .altinstr_replacement.
@@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c
*/
void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate)
{
- __smp_text_poke_batch_add(addr, opcode, len, emulate);
+ smp_text_poke_batch_add(addr, opcode, len, emulate);
smp_text_poke_batch_finish();
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 93da466dfe2c..329ee185d8cc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,7 +9,7 @@
#include <linux/sched/clock.h>
#include <linux/random.h>
#include <linux/topology.h>
-#include <asm/amd/fch.h>
+#include <linux/platform_data/x86/amd-fch.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -31,7 +31,7 @@
#include "cpu.h"
-u16 invlpgb_count_max __ro_after_init;
+u16 invlpgb_count_max __ro_after_init = 1;
static inline int rdmsrq_amd_safe(unsigned msr, u64 *p)
{
@@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c)
#endif
}
+#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \
+ X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \
+ step, step, ucode)
+
+static const struct x86_cpu_id amd_tsa_microcode[] = {
+ ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008),
+ ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008),
+ ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216),
+ {},
+};
+
+static void tsa_init(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_ZEN3) ||
+ cpu_has(c, X86_FEATURE_ZEN4)) {
+ if (x86_match_min_microcode_rev(amd_tsa_microcode))
+ setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR);
+ else
+ pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode);
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO);
+ setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO);
+ }
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
bsp_determine_snp(c);
+
+ tsa_init(c);
+
return;
warn:
@@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c)
init_spectral_chicken(c);
fix_erratum_1386(c);
zen2_zenbleed_check(c);
+
+ /* Disable RDSEED on AMD Cyan Skillfish because of an error. */
+ if (c->x86_model == 0x47 && c->x86_stepping == 0x0) {
+ clear_cpu_cap(c, X86_FEATURE_RDSEED);
+ msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
+ pr_emerg("RDSEED is not reliable on this platform; disabling.\n");
+ }
+
+ /* Correct misconfigured CPUID on some clients. */
+ clear_cpu_cap(c, X86_FEATURE_INVLPGB);
}
static void init_amd_zen3(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7f94e6a5497d..f4d3abb12317 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void);
static void __init its_select_mitigation(void);
static void __init its_update_mitigation(void);
static void __init its_apply_mitigation(void);
+static void __init tsa_select_mitigation(void);
+static void __init tsa_apply_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
EXPORT_SYMBOL_GPL(switch_vcpu_ibpb);
-/* Control MDS CPU buffer clear before idling (halt, mwait) */
-DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
-EXPORT_SYMBOL_GPL(mds_idle_clear);
+/* Control CPU buffer clear before idling (halt, mwait) */
+DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+EXPORT_SYMBOL_GPL(cpu_buf_idle_clear);
/*
* Controls whether l1d flush based mitigations are enabled,
@@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void)
gds_select_mitigation();
its_select_mitigation();
bhi_select_mitigation();
+ tsa_select_mitigation();
/*
* After mitigations are selected, some may need to update their
@@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void)
gds_apply_mitigation();
its_apply_mitigation();
bhi_apply_mitigation();
+ tsa_apply_mitigation();
}
/*
@@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void)
* is required irrespective of SMT state.
*/
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
if (mmio_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void)
}
#undef pr_fmt
+#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt
+
+enum tsa_mitigations {
+ TSA_MITIGATION_NONE,
+ TSA_MITIGATION_AUTO,
+ TSA_MITIGATION_UCODE_NEEDED,
+ TSA_MITIGATION_USER_KERNEL,
+ TSA_MITIGATION_VM,
+ TSA_MITIGATION_FULL,
+};
+
+static const char * const tsa_strings[] = {
+ [TSA_MITIGATION_NONE] = "Vulnerable",
+ [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary",
+ [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM",
+ [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
+};
+
+static enum tsa_mitigations tsa_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE;
+
+static int __init tsa_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ else if (!strcmp(str, "on"))
+ tsa_mitigation = TSA_MITIGATION_FULL;
+ else if (!strcmp(str, "user"))
+ tsa_mitigation = TSA_MITIGATION_USER_KERNEL;
+ else if (!strcmp(str, "vm"))
+ tsa_mitigation = TSA_MITIGATION_VM;
+ else
+ pr_err("Ignoring unknown tsa=%s option.\n", str);
+
+ return 0;
+}
+early_param("tsa", tsa_parse_cmdline);
+
+static void __init tsa_select_mitigation(void)
+{
+ if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) {
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ return;
+ }
+
+ if (tsa_mitigation == TSA_MITIGATION_NONE)
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) {
+ tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED;
+ goto out;
+ }
+
+ if (tsa_mitigation == TSA_MITIGATION_AUTO)
+ tsa_mitigation = TSA_MITIGATION_FULL;
+
+ /*
+ * No need to set verw_clear_cpu_buf_mitigation_selected - it
+ * doesn't fit all cases here and it is not needed because this
+ * is the only VERW-based mitigation on AMD.
+ */
+out:
+ pr_info("%s\n", tsa_strings[tsa_mitigation]);
+}
+
+static void __init tsa_apply_mitigation(void)
+{
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ break;
+ case TSA_MITIGATION_VM:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ case TSA_MITIGATION_FULL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ default:
+ break;
+ }
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
@@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void)
return;
if (sched_smt_active()) {
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
- static_branch_disable(&mds_idle_clear);
+ static_branch_disable(&cpu_buf_idle_clear);
}
}
@@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_AUTO:
+ case TSA_MITIGATION_FULL:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
}
+static ssize_t tsa_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_ITS:
return its_show_state(buf);
+ case X86_BUG_TSA:
+ return tsa_show_state(buf);
+
default:
break;
}
@@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att
{
return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
}
+
+ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8feb8fd2957a..fb50c1dd53ef 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define ITS BIT(8)
/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
#define ITS_NATIVE_ONLY BIT(9)
+/* CPU is affected by Transient Scheduler Attacks */
+#define TSA BIT(10)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
@@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO),
+ VULNBL_AMD(0x19, SRSO | TSA),
VULNBL_AMD(0x1a, SRSO),
{}
};
@@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
}
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
+ !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, TSA) ||
+ /* Enable bug on Zen guests to allow for live migration. */
+ (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN)))
+ setup_force_cpu_bug(X86_BUG_TSA);
+ }
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- /* Ignore db4, db5 */
- if ((i == 4) || (i == 5))
- continue;
-
- set_debugreg(0, i);
- }
+ /* Control register first -- to make sure everything is disabled. */
+ set_debugreg(DR7_FIXED_1, 7);
+ set_debugreg(DR6_RESERVED, 6);
+ /* dr5 and dr4 don't exist */
+ set_debugreg(0, 3);
+ set_debugreg(0, 2);
+ set_debugreg(0, 1);
+ set_debugreg(0, 0);
}
#ifdef CONFIG_KGDB
@@ -2417,7 +2425,7 @@ void cpu_init(void)
load_mm_ldt(&init_mm);
- clear_all_debug_regs();
+ initialize_debug_regs();
dbg_restore_debug_regs();
doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9d852c3b2cb5..5c4eb28c3ac9 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
- int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
- tr->reset = 1; /* limit cannot be lower than err count */
-
- if (tr->reset) { /* reset err count and overflow bit */
- hi =
- (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - tr->b->threshold_limit);
+ /*
+ * Reset error count and overflow bit.
+ * This is done during init or after handling an interrupt.
+ */
+ if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
+ hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
+ hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
@@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
- if (bank_type >= N_SMCA_BANK_TYPES)
- return NULL;
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
- return NULL;
+ }
+
+ if (b && b->block) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
+ return buf_mcatype;
+ }
+
+ if (bank_type >= N_SMCA_BANK_TYPES) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
+ return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e9b3c5d4a52e..4da4eab56c81 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)
void (*mc_poll_banks)(void) = mc_poll_banks_default;
+static bool should_enable_timer(unsigned long iv)
+{
+ return !mca_cfg.ignore_ce && iv;
+}
+
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)
if (mce_get_storm_mode()) {
__start_timer(t, HZ);
- } else {
+ } else if (should_enable_timer(iv)) {
__this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
@@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
- if (mca_cfg.ignore_ce || !iv)
- return;
-
- this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (should_enable_timer(iv)) {
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+ }
}
static void __mcheck_cpu_setup_timer(void)
@@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- int ret;
mce_device_create(cpu);
-
- ret = mce_threshold_create_device(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return ret;
- }
+ mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index efcf21e9552e..9b149b9c4109 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
+ cmci_clear();
}
bool intel_filter_mce(struct mce *m)
diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c
index 2a1655b1fdd8..1fd349cfc802 100644
--- a/arch/x86/kernel/cpu/microcode/amd_shas.c
+++ b/arch/x86/kernel/cpu/microcode/amd_shas.c
@@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = {
0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21,
}
},
+ { 0xa0011d7, {
+ 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b,
+ 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12,
+ 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2,
+ 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36,
+ }
+ },
{ 0xa001223, {
0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8,
0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4,
@@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = {
0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59,
}
},
+ { 0xa00123b, {
+ 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2,
+ 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3,
+ 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28,
+ 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72,
+ }
+ },
{ 0xa00820c, {
0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3,
0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63,
@@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = {
0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2,
}
},
+ { 0xa00820d, {
+ 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4,
+ 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca,
+ 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1,
+ 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7,
+ }
+ },
{ 0xa10113e, {
0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10,
0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0,
@@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = {
0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4,
}
},
+ { 0xa10114c, {
+ 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64,
+ 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74,
+ 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a,
+ 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0,
+ }
+ },
{ 0xa10123e, {
0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18,
0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d,
@@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = {
0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75,
}
},
+ { 0xa10124c, {
+ 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90,
+ 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46,
+ 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc,
+ 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2,
+ }
+ },
{ 0xa108108, {
0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9,
0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6,
@@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = {
0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16,
}
},
+ { 0xa108109, {
+ 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa,
+ 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b,
+ 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35,
+ 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59,
+ }
+ },
{ 0xa20102d, {
0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11,
0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89,
@@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = {
0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4,
}
},
+ { 0xa20102e, {
+ 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd,
+ 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6,
+ 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5,
+ 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe,
+ }
+ },
{ 0xa201210, {
0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe,
0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9,
@@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = {
0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41,
}
},
+ { 0xa201211, {
+ 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95,
+ 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27,
+ 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff,
+ 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27,
+ }
+ },
{ 0xa404107, {
0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45,
0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0,
@@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = {
0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99,
}
},
+ { 0xa404108, {
+ 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc,
+ 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb,
+ 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19,
+ 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00,
+ }
+ },
{ 0xa500011, {
0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4,
0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1,
@@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = {
0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74,
}
},
+ { 0xa500012, {
+ 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4,
+ 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16,
+ 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f,
+ 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63,
+ }
+ },
{ 0xa601209, {
0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32,
0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30,
@@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = {
0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d,
}
},
+ { 0xa60120a, {
+ 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d,
+ 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b,
+ 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d,
+ 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c,
+ }
+ },
{ 0xa704107, {
0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6,
0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93,
@@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = {
0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39,
}
},
+ { 0xa704108, {
+ 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93,
+ 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98,
+ 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49,
+ 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a,
+ }
+ },
{ 0xa705206, {
0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4,
0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7,
@@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = {
0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc,
}
},
+ { 0xa705208, {
+ 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19,
+ 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2,
+ 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11,
+ 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff,
+ }
+ },
{ 0xa708007, {
0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3,
0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2,
@@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = {
0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93,
}
},
+ { 0xa708008, {
+ 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46,
+ 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab,
+ 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16,
+ 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b,
+ }
+ },
{ 0xa70c005, {
0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b,
0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f,
@@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = {
0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13,
}
},
+ { 0xa70c008, {
+ 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21,
+ 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e,
+ 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66,
+ 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0,
+ }
+ },
{ 0xaa00116, {
0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63,
0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5,
@@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = {
0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef,
}
},
+ { 0xaa00216, {
+ 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5,
+ 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08,
+ 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2,
+ 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1,
+ }
+ },
};
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 7109cbfcad4f..187d527ef73b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
int err;
lockdep_assert_held(&domain_list_lock);
@@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = &hw_dom->d_resctrl;
d->hdr.id = id;
d->hdr.type = RESCTRL_MON_DOMAIN;
- d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
- if (!d->ci) {
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci) {
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
mon_domain_free(hw_dom);
return;
}
+ d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
arch_mon_domain_online(r, d);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index dbf6d71bdf18..b4a1f6732a3a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
+ { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 102641fd2172..8b1a9733d13e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
struct perf_event *bp;
/* Disable hardware debugging while we are in kgdb: */
- set_debugreg(0UL, 7);
+ set_debugreg(DR7_FIXED_1, 7);
for (i = 0; i < HBP_NUM; i++) {
if (!breakinfo[i].enabled)
continue;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 704883c21f3a..a838be04f874 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void)
*/
static __cpuidle void mwait_idle(void)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (!current_set_polling_and_test()) {
const void *addr = &current_thread_info()->flags;
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
__monitor(addr, 0, 0);
- if (!need_resched()) {
- __sti_mwait(0, 0);
- raw_local_irq_disable();
- }
+ if (need_resched())
+ goto out;
+
+ __sti_mwait(0, 0);
+ raw_local_irq_disable();
}
+
+out:
__current_clr_polling();
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a10e180cbf23..3ef15c2f152f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
/* Only print out debug registers if they are in their non-default state. */
if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
- (d6 == DR6_RESERVED) && (d7 == 0x400))
+ (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))
return;
printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8d6cf25127aa..b972bf72fb8b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
/* Only print out debug registers if they are in their non-default state. */
if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
- (d6 == DR6_RESERVED) && (d7 == 0x400))) {
+ (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) {
printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
log_lvl, d0, d1, d2);
printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18266cc3d98c..b014e6d229f9 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -299,3 +299,27 @@ struct smp_ops smp_ops = {
.send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
+
+int arch_cpu_rescan_dead_smt_siblings(void)
+{
+ enum cpuhp_smt_control old = cpu_smt_control;
+ int ret;
+
+ /*
+ * If SMT has been disabled and SMT siblings are in HLT, bring them back
+ * online and offline them again so that they end up in MWAIT proper.
+ *
+ * Called with hotplug enabled.
+ */
+ if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED)
+ return 0;
+
+ ret = cpuhp_smt_enable();
+ if (ret)
+ return ret;
+
+ ret = cpuhp_smt_disable(old);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fc78c2325fd2..58ede3fa6a75 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1244,6 +1244,10 @@ void play_dead_common(void)
local_irq_disable();
}
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
void __noreturn mwait_play_dead(unsigned int eax_hint)
{
struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
@@ -1290,50 +1294,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint)
}
/*
- * We need to flush the caches before going to sleep, lest we have
- * dirty data in our caches when we come back up.
- */
-static inline void mwait_play_dead_cpuid_hint(void)
-{
- unsigned int eax, ebx, ecx, edx;
- unsigned int highest_cstate = 0;
- unsigned int highest_subcstate = 0;
- int i;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
- return;
- if (!this_cpu_has(X86_FEATURE_MWAIT))
- return;
- if (!this_cpu_has(X86_FEATURE_CLFLUSH))
- return;
-
- eax = CPUID_LEAF_MWAIT;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
-
- /*
- * eax will be 0 if EDX enumeration is not valid.
- * Initialized below to cstate, sub_cstate value when EDX is valid.
- */
- if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
- eax = 0;
- } else {
- edx >>= MWAIT_SUBSTATE_SIZE;
- for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
- if (edx & MWAIT_SUBSTATE_MASK) {
- highest_cstate = i;
- highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
- }
- }
- eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
- (highest_subcstate - 1);
- }
-
- mwait_play_dead(eax);
-}
-
-/*
* Kick all "offline" CPUs out of mwait on kexec(). See comment in
* mwait_play_dead().
*/
@@ -1383,9 +1343,9 @@ void native_play_dead(void)
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- mwait_play_dead_cpuid_hint();
- if (cpuidle_play_dead())
- hlt_play_dead();
+ /* Below returns only on error. */
+ cpuidle_play_dead();
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c5c897a86418..36354b470590 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline unsigned long debug_read_clear_dr6(void)
+static __always_inline unsigned long debug_read_reset_dr6(void)
{
unsigned long dr6;
+ get_debugreg(dr6, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
/*
* The Intel SDM says:
*
- * Certain debug exceptions may clear bits 0-3. The remaining
- * contents of the DR6 register are never cleared by the
- * processor. To avoid confusion in identifying debug
- * exceptions, debug handlers should clear the register before
- * returning to the interrupted task.
+ * Certain debug exceptions may clear bits 0-3 of DR6.
+ *
+ * BLD induced #DB clears DR6.BLD and any other debug
+ * exception doesn't modify DR6.BLD.
*
- * Keep it simple: clear DR6 immediately.
+ * RTM induced #DB clears DR6.RTM and any other debug
+ * exception sets DR6.RTM.
+ *
+ * To avoid confusion in identifying debug exceptions,
+ * debug handlers should set DR6.BLD and DR6.RTM, and
+ * clear other DR6 bits before returning.
+ *
+ * Keep it simple: write DR6 with its architectural reset
+ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately.
*/
- get_debugreg(dr6, 6);
set_debugreg(DR6_RESERVED, 6);
- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -1239,13 +1247,13 @@ out:
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- exc_debug_kernel(regs, debug_read_clear_dr6());
+ exc_debug_kernel(regs, debug_read_reset_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- exc_debug_user(regs, debug_read_clear_dr6());
+ exc_debug_user(regs, debug_read_reset_dr6());
}
#ifdef CONFIG_X86_FRED
@@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
{
/*
* FRED #DB stores DR6 on the stack in the format which
- * debug_read_clear_dr6() returns for the IDT entry points.
+ * debug_read_reset_dr6() returns for the IDT entry points.
*/
unsigned long dr6 = fred_event_data(regs);
@@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6 = debug_read_clear_dr6();
+ unsigned long dr6 = debug_read_reset_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b2d006756e02..f84bc0569c9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void)
*/
SYNTHESIZED_F(LFENCE_RDTSC),
/* SmmPgCfgLock */
+ /* 4: Resv */
+ SYNTHESIZED_F(VERW_CLEAR),
F(NULL_SEL_CLR_BASE),
/* UpperAddressIgnore */
F(AUTOIBRS),
@@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void)
F(SRSO_USER_KERNEL_NO),
);
+ kvm_cpu_cap_init(CPUID_8000_0021_ECX,
+ SYNTHESIZED_F(TSA_SQ_NO),
+ SYNTHESIZED_F(TSA_L1_NO),
+ );
+
kvm_cpu_cap_init(CPUID_8000_0022_EAX,
F(PERFMON_V2),
);
@@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->ecx = entry->edx = 0;
+ entry->ebx = entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+ cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 24f0318c50d7..ee27064dd72f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
goto out_flush_all;
+ if (is_noncanonical_invlpg_address(entries[i], vcpu))
+ continue;
+
/*
* Lower 12 bits of 'address' encode the number of additional
* pages to flush.
@@ -2001,11 +2004,11 @@ out_flush_all:
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ unsigned long *vcpu_mask = hv_vcpu->vcpu_mask;
u64 *sparse_banks = hv_vcpu->sparse_banks;
struct kvm *kvm = vcpu->kvm;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
/*
* Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cbc84c6abc2e..4e06e2e89a8f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4896,12 +4896,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
{
u64 error_code = PFERR_GUEST_FINAL_MASK;
u8 level = PG_LEVEL_4K;
+ u64 direct_bits;
u64 end;
int r;
if (!vcpu->kvm->arch.pre_fault_allowed)
return -EOPNOTSUPP;
+ if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa)))
+ return -EINVAL;
+
/*
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
@@ -4910,15 +4914,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
if (r)
return r;
+ direct_bits = 0;
if (kvm_arch_has_private_mem(vcpu->kvm) &&
kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
error_code |= PFERR_PRIVATE_ACCESS;
+ else
+ direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm));
/*
* Shadow paging uses GVA for kvm page fault, so restrict to
* two-dimensional paging.
*/
- r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level);
+ r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level);
if (r < 0)
return r;
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index fde0ae986003..c53b92379e6e 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,6 +52,10 @@
/* CPUID level 0x80000022 (EAX) */
#define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0)
+/* CPUID level 0x80000021 (ECX) */
+#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1)
+#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
+ [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
};
/*
@@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
+ KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5a69b657dae9..b201f77fcd49 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
@@ -2871,6 +2875,33 @@ void __init sev_set_cpu_caps(void)
}
}
+static bool is_sev_snp_initialized(void)
+{
+ struct sev_user_data_snp_status *status;
+ struct sev_data_snp_addr buf;
+ bool initialized = false;
+ int ret, error = 0;
+
+ status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO);
+ if (!status)
+ return false;
+
+ buf.address = __psp_pa(status);
+ ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error);
+ if (ret) {
+ pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n",
+ ret, error, error);
+ goto out;
+ }
+
+ initialized = !!status->state;
+
+out:
+ snp_free_firmware_page(status);
+
+ return initialized;
+}
+
void __init sev_hardware_setup(void)
{
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
@@ -2975,6 +3006,14 @@ void __init sev_hardware_setup(void)
sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP);
out:
+ if (sev_enabled) {
+ init_args.probe = true;
+ if (sev_platform_init(&init_args))
+ sev_supported = sev_es_supported = sev_snp_supported = false;
+ else if (sev_snp_supported)
+ sev_snp_supported = is_sev_snp_initialized();
+ }
+
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
@@ -3001,15 +3040,6 @@ out:
sev_supported_vmsa_features = 0;
if (sev_es_debug_swap_enabled)
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
-
- if (!sev_enabled)
- return;
-
- /*
- * Do both SNP and SEV initialization at KVM module load.
- */
- init_args.probe = true;
- sev_platform_init(&init_args);
}
void sev_hardware_unsetup(void)
@@ -4419,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
- if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (!svm->sev_es.snp_has_guest_vmsa) {
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ else
+ svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ }
if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES))
svm->vmcb->control.allowed_sev_features = sev->vmsa_features |
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 0c61153b275f..235c4af6b692 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
3: vmrun %_ASM_AX
4:
@@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov SVM_current_vmcb(%rdi), %rax
mov KVM_VMCB_pa(%rax), %rax
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
1: vmrun %rax
2:
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index b952bc673271..ec79aacc446f 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -173,6 +173,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i
tdx_clear_unsupported_cpuid(entry);
}
+#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1)
+
static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
struct kvm_tdx_capabilities *caps)
{
@@ -188,6 +190,9 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
caps->cpuid.nent = td_conf->num_cpuid_config;
+ caps->user_tdvmcallinfo_1_r11 =
+ TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT;
+
for (i = 0; i < td_conf->num_cpuid_config; i++)
td_init_cpuid_entry2(&caps->cpuid.entries[i], i);
@@ -1212,11 +1217,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu)
/*
* Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires
* userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE
- * bit set. If not, the error code is not defined in GHCI for TDX, use
- * TDVMCALL_STATUS_INVALID_OPERAND for this case.
+ * bit set. This is a base call so it should always be supported, but
+ * KVM has no way to ensure that userspace implements the GHCI correctly.
+ * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error
+ * to the guest.
*/
if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) {
- ret = TDVMCALL_STATUS_INVALID_OPERAND;
+ ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
goto error;
}
@@ -1449,20 +1456,106 @@ error:
return 1;
}
+static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret);
+
+ /*
+ * For now, there is no TDVMCALL beyond GHCI base API supported by KVM
+ * directly without the support from userspace, just set the value
+ * returned from userspace.
+ */
+ tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11;
+ tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12;
+ tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13;
+ tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14;
+
+ return 1;
+}
+
static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
- if (tdx->vp_enter_args.r12)
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
- else {
+ switch (tdx->vp_enter_args.r12) {
+ case 0:
tdx->vp_enter_args.r11 = 0;
+ tdx->vp_enter_args.r12 = 0;
tdx->vp_enter_args.r13 = 0;
tdx->vp_enter_args.r14 = 0;
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS);
+ return 1;
+ case 1:
+ vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12;
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO;
+ vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS;
+ vcpu->run->tdx.get_tdvmcall_info.r11 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r12 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r13 = 0;
+ vcpu->run->tdx.get_tdvmcall_info.r14 = 0;
+ vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info;
+ return 0;
+ default:
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
}
+}
+
+static int tdx_complete_simple(struct kvm_vcpu *vcpu)
+{
+ tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret);
return 1;
}
+static int tdx_get_quote(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u64 gpa = tdx->vp_enter_args.r12;
+ u64 size = tdx->vp_enter_args.r13;
+
+ /* The gpa of buffer must have shared bit set. */
+ if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) {
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE;
+ vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
+ vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm));
+ vcpu->run->tdx.get_quote.size = size;
+
+ vcpu->arch.complete_userspace_io = tdx_complete_simple;
+
+ return 0;
+}
+
+static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u64 vector = tdx->vp_enter_args.r12;
+
+ if (vector < 32 || vector > 255) {
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT;
+ vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
+ vcpu->run->tdx.setup_event_notify.vector = vector;
+
+ vcpu->arch.complete_userspace_io = tdx_complete_simple;
+
+ return 0;
+}
+
static int handle_tdvmcall(struct kvm_vcpu *vcpu)
{
switch (tdvmcall_leaf(vcpu)) {
@@ -1472,11 +1565,15 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu)
return tdx_report_fatal_error(vcpu);
case TDVMCALL_GET_TD_VM_CALL_INFO:
return tdx_get_td_vm_call_info(vcpu);
+ case TDVMCALL_GET_QUOTE:
+ return tdx_get_quote(vcpu);
+ case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
+ return tdx_setup_event_notify_interrupt(vcpu);
default:
break;
}
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED);
return 1;
}
@@ -2172,25 +2269,26 @@ static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
struct kvm_tdx_capabilities __user *user_caps;
struct kvm_tdx_capabilities *caps = NULL;
+ u32 nr_user_entries;
int ret = 0;
/* flags is reserved for future use */
if (cmd->flags)
return -EINVAL;
- caps = kmalloc(sizeof(*caps) +
+ caps = kzalloc(sizeof(*caps) +
sizeof(struct kvm_cpuid_entry2) * td_conf->num_cpuid_config,
GFP_KERNEL);
if (!caps)
return -ENOMEM;
user_caps = u64_to_user_ptr(cmd->data);
- if (copy_from_user(caps, user_caps, sizeof(*caps))) {
+ if (get_user(nr_user_entries, &user_caps->cpuid.nent)) {
ret = -EFAULT;
goto out;
}
- if (caps->cpuid.nent < td_conf->num_cpuid_config) {
+ if (nr_user_entries < td_conf->num_cpuid_config) {
ret = -E2BIG;
goto out;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4953846cb30d..191a9ed0da22 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&cpu_buf_vm_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
- mds_clear_cpu_buffers();
+ x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b58a74c1722d..93636f77c42d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
- if (kvm_caps.has_tsc_control)
+ if (kvm_caps.has_tsc_control) {
tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
v->arch.l1_tsc_scaling_ratio);
+ tgt_tsc_khz = tgt_tsc_khz ? : 1;
+ }
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
@@ -6186,6 +6188,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u32 user_tsc_khz;
r = -EINVAL;
+
+ if (vcpu->arch.guest_tsc_protected)
+ goto out;
+
user_tsc_khz = (u32)arg;
if (kvm_caps.has_tsc_control &&
@@ -11035,7 +11041,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.switch_db_regs &&
!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
@@ -11044,7 +11050,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6);
} else if (unlikely(hw_breakpoint_active())) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
}
vcpu->arch.host_debugctl = get_debugctlmsr();
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 9b029bb29a16..d6b2a665b499 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1526,7 +1526,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
sched_poll.nr_ports * sizeof(*ports), &e)) {
*r = -EFAULT;
- return true;
+ goto out;
}
for (i = 0; i < sched_poll.nr_ports; i++) {
@@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
- return -EINVAL;
+ /*
+ * Don't check for the port being within range of max_evtchn_port().
+ * Userspace can configure what ever targets it likes; events just won't
+ * be delivered if/while the target is invalid, just like userspace can
+ * configure MSIs which target non-existent APICs.
+ *
+ * This allow on Live Migration and Live Update, the IRQ routing table
+ * can be restored *independently* of other things like creating vCPUs,
+ * without imposing an ordering dependency on userspace. In this
+ * particular case, the problematic ordering would be with setting the
+ * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096
+ * instead of 1024 event channels.
+ */
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 607d6a2e66e2..8a34fff6ab2b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -30,7 +30,6 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
#include <linux/gfp.h>
-#include <linux/execmem.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
@@ -749,8 +748,6 @@ void mark_rodata_ro(void)
pr_info("Write protecting kernel text and read-only data: %luk\n",
size >> 10);
- execmem_cache_make_ro();
-
kernel_set_to_readonly = 1;
#ifdef CONFIG_CPA_DEBUG
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ee66fae9ebcc..fdb6cab524f0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -34,7 +34,6 @@
#include <linux/gfp.h>
#include <linux/kcore.h>
#include <linux/bootmem_info.h>
-#include <linux/execmem.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -1392,8 +1391,6 @@ void mark_rodata_ro(void)
(end - start) >> 10);
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
- execmem_cache_make_ro();
-
kernel_set_to_readonly = 1;
/*
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 46edc11726b7..8834c76f91c9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
pgprot_t pgprot;
int i = 0;
+ if (!cpu_feature_enabled(X86_FEATURE_PSE))
+ return 0;
+
addr &= PMD_MASK;
pte = pte_offset_kernel(pmd, addr);
first = *pte;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 190299834011..c0c40b67524e 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void)
return;
setup_force_cpu_cap(X86_FEATURE_PTI);
+
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ pr_debug("PTI enabled, disabling INVLPGB\n");
+ setup_clear_cpu_cap(X86_FEATURE_INVLPGB);
+ }
}
static int __init pti_parse_cmdline(char *arg)
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index a7c23f2a58c9..a2294c1649f6 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -192,7 +192,8 @@ out:
int arch_resume_nosmt(void)
{
- int ret = 0;
+ int ret;
+
/*
* We reached this while coming out of hibernation. This means
* that SMT siblings are sleeping in hlt, as mwait is not safe
@@ -206,18 +207,10 @@ int arch_resume_nosmt(void)
* Called with hotplug disabled.
*/
cpu_hotplug_enable();
- if (cpu_smt_control == CPU_SMT_DISABLED ||
- cpu_smt_control == CPU_SMT_FORCE_DISABLED) {
- enum cpuhp_smt_control old = cpu_smt_control;
-
- ret = cpuhp_smt_enable();
- if (ret)
- goto out;
- ret = cpuhp_smt_disable(old);
- if (ret)
- goto out;
- }
-out:
+
+ ret = arch_cpu_rescan_dead_smt_siblings();
+
cpu_hotplug_disable();
+
return ret;
}
diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c
index 3275870330fe..fae8aabad10f 100644
--- a/arch/x86/um/ptrace.c
+++ b/arch/x86/um/ptrace.c
@@ -161,7 +161,7 @@ static int fpregs_legacy_set(struct task_struct *target,
from = kbuf;
}
- return um_fxsr_from_i387(fxsave, &buf);
+ return um_fxsr_from_i387(fxsave, from);
}
#endif
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 2457d13c3f9e..c7a9a087ccaf 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err,
args->r9, args->r10, args->r11);
}
-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
- u64 fn, struct tdx_module_args *args)
+static __always_inline int sc_retry_prerr(sc_func_t func,
+ sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
{
u64 sret = sc_retry(func, fn, args);
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index cc5dba738389..13fe45dea296 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -3,7 +3,6 @@ generated-y += syscall_table.h
generic-y += extable.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
-generic-y += param.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index cb1725c40e36..d62aa1c316fc 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/xtensa/include/uapi/asm/param.h b/arch/xtensa/include/uapi/asm/param.h
deleted file mode 100644
index e6feb4ee0590..000000000000
--- a/arch/xtensa/include/uapi/asm/param.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * include/asm-xtensa/param.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _UAPI_XTENSA_PARAM_H
-#define _UAPI_XTENSA_PARAM_H
-
-#ifndef __KERNEL__
-# define HZ 100
-#endif
-
-#define EXEC_PAGESIZE 4096
-
-#ifndef NGROUPS
-#define NGROUPS 32
-#endif
-
-#ifndef NOGROUP
-#define NOGROUP (-1)
-#endif
-
-#define MAXHOSTNAMELEN 64 /* max length of hostname */
-
-#endif /* _UAPI_XTENSA_PARAM_H */
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3af1d284add5..70d704615be5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -998,20 +998,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
if (!plug || rq_list_empty(&plug->mq_list))
return false;
- rq_list_for_each(&plug->mq_list, rq) {
- if (rq->q == q) {
- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
- BIO_MERGE_OK)
- return true;
- break;
- }
+ rq = plug->mq_list.tail;
+ if (rq->q == q)
+ return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK;
+ else if (!plug->multiple_queues)
+ return false;
- /*
- * Only keep iterating plug list for merges if we have multiple
- * queues
- */
- if (!plug->multiple_queues)
- break;
+ rq_list_for_each(&plug->mq_list, rq) {
+ if (rq->q != q)
+ continue;
+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK)
+ return true;
+ break;
}
return false;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b2b9b89d6967..c611444480b3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -960,4 +960,5 @@ void blk_unregister_queue(struct gendisk *disk)
elevator_set_none(q);
blk_debugfs_remove(disk);
+ kobject_put(&disk->queue_kobj);
}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 8f15d1aa6eb8..351d659280e1 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) {
bio->bi_opf &= ~REQ_OP_MASK;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
+ bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND);
}
/*
@@ -1306,7 +1307,6 @@ again:
spin_unlock_irqrestore(&zwplug->lock, flags);
bdev = bio->bi_bdev;
- submit_bio_noacct_nocheck(bio);
/*
* blk-mq devices will reuse the extra reference on the request queue
@@ -1314,8 +1314,12 @@ again:
* path for BIO-based devices will not do that. So drop this extra
* reference here.
*/
- if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO))
+ if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
+ bdev->bd_disk->fops->submit_bio(bio);
blk_queue_exit(bdev->bd_disk->queue);
+ } else {
+ blk_mq_submit_bio(bio);
+ }
put_zwplug:
/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
diff --git a/block/elevator.c b/block/elevator.c
index ab22542e6cf0..a960bdc869bc 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -719,7 +719,8 @@ void elevator_set_default(struct request_queue *q)
.name = "mq-deadline",
.no_uevent = true,
};
- int err = 0;
+ int err;
+ struct elevator_type *e;
/* now we allow to switch elevator */
blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
@@ -732,12 +733,18 @@ void elevator_set_default(struct request_queue *q)
* have multiple queues or mq-deadline is not available, default
* to "none".
*/
- if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 ||
- blk_mq_is_shared_tags(q->tag_set->flags)))
+ e = elevator_find_get(ctx.name);
+ if (!e)
+ return;
+
+ if ((q->nr_hw_queues == 1 ||
+ blk_mq_is_shared_tags(q->tag_set->flags))) {
err = elevator_change(q, &ctx);
- if (err < 0)
- pr_warn("\"%s\" elevator initialization, failed %d, "
- "falling back to \"none\"\n", ctx.name, err);
+ if (err < 0)
+ pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
+ ctx.name, err);
+ }
+ elevator_put(e);
}
void elevator_set_none(struct request_queue *q)
diff --git a/block/fops.c b/block/fops.c
index 1309861d4c2c..f34e7315c83c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -496,18 +496,21 @@ static void blkdev_readahead(struct readahead_control *rac)
mpage_readahead(rac, blkdev_get_block);
}
-static int blkdev_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
+static int blkdev_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, struct folio **foliop,
+ void **fsdata)
{
return block_write_begin(mapping, pos, len, foliop, blkdev_get_block);
}
-static int blkdev_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct folio *folio,
- void *fsdata)
+static int blkdev_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ ret = block_write_end(pos, len, copied, folio);
folio_unlock(folio);
folio_put(folio);
diff --git a/block/genhd.c b/block/genhd.c
index 8171a6bc3210..c26733f6324b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -128,23 +128,27 @@ static void part_stat_read_all(struct block_device *part,
static void bdev_count_inflight_rw(struct block_device *part,
unsigned int inflight[2], bool mq_driver)
{
+ int write = 0;
+ int read = 0;
int cpu;
if (mq_driver) {
blk_mq_in_driver_rw(part, inflight);
- } else {
- for_each_possible_cpu(cpu) {
- inflight[READ] += part_stat_local_read_cpu(
- part, in_flight[READ], cpu);
- inflight[WRITE] += part_stat_local_read_cpu(
- part, in_flight[WRITE], cpu);
- }
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ read += part_stat_local_read_cpu(part, in_flight[READ], cpu);
+ write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu);
}
- if (WARN_ON_ONCE((int)inflight[READ] < 0))
- inflight[READ] = 0;
- if (WARN_ON_ONCE((int)inflight[WRITE] < 0))
- inflight[WRITE] = 0;
+ /*
+ * While iterating all CPUs, some IOs may be issued from a CPU already
+ * traversed and complete on a CPU that has not yet been traversed,
+ * causing the inflight number to be negative.
+ */
+ inflight[READ] = read > 0 ? read : 0;
+ inflight[WRITE] = write > 0 ? write : 0;
}
/**
diff --git a/crypto/Kconfig b/crypto/Kconfig
index e9fee7818e27..e1cfd0d4cc8f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -176,16 +176,33 @@ config CRYPTO_USER
config CRYPTO_SELFTESTS
bool "Enable cryptographic self-tests"
- depends on DEBUG_KERNEL
+ depends on EXPERT
help
Enable the cryptographic self-tests.
The cryptographic self-tests run at boot time, or at algorithm
registration time if algorithms are dynamically loaded later.
- This is primarily intended for developer use. It should not be
- enabled in production kernels, unless you are trying to use these
- tests to fulfill a FIPS testing requirement.
+ There are two main use cases for these tests:
+
+ - Development and pre-release testing. In this case, also enable
+ CRYPTO_SELFTESTS_FULL to get the full set of tests. All crypto code
+ in the kernel is expected to pass the full set of tests.
+
+ - Production kernels, to help prevent buggy drivers from being used
+ and/or meet FIPS 140-3 pre-operational testing requirements. In
+ this case, enable CRYPTO_SELFTESTS but not CRYPTO_SELFTESTS_FULL.
+
+config CRYPTO_SELFTESTS_FULL
+ bool "Enable the full set of cryptographic self-tests"
+ depends on CRYPTO_SELFTESTS
+ help
+ Enable the full set of cryptographic self-tests for each algorithm.
+
+ The full set of tests should be enabled for development and
+ pre-release testing, but not in production kernels.
+
+ All crypto code in the kernel is expected to pass the full tests.
config CRYPTO_NULL
tristate "Null algorithms"
diff --git a/crypto/ahash.c b/crypto/ahash.c
index e10bc2659ae4..bc84a07c924c 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -600,12 +600,14 @@ static void ahash_def_finup_done2(void *data, int err)
static int ahash_def_finup_finish1(struct ahash_request *req, int err)
{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
if (err)
goto out;
req->base.complete = ahash_def_finup_done2;
- err = crypto_ahash_final(req);
+ err = crypto_ahash_alg(tfm)->final(req);
if (err == -EINPROGRESS || err == -EBUSY)
return err;
diff --git a/crypto/hkdf.c b/crypto/hkdf.c
index f24c2a8d4df9..82d1b32ca6ce 100644
--- a/crypto/hkdf.c
+++ b/crypto/hkdf.c
@@ -566,7 +566,7 @@ static int __init crypto_hkdf_module_init(void)
static void __exit crypto_hkdf_module_exit(void) {}
-module_init(crypto_hkdf_module_init);
+late_initcall(crypto_hkdf_module_init);
module_exit(crypto_hkdf_module_exit);
MODULE_LICENSE("GPL");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 72005074a5c2..32f753d6c430 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -45,6 +45,7 @@ static bool notests;
module_param(notests, bool, 0644);
MODULE_PARM_DESC(notests, "disable all crypto self-tests");
+#ifdef CONFIG_CRYPTO_SELFTESTS_FULL
static bool noslowtests;
module_param(noslowtests, bool, 0644);
MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests");
@@ -52,6 +53,10 @@ MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests");
static unsigned int fuzz_iterations = 100;
module_param(fuzz_iterations, uint, 0644);
MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
+#else
+#define noslowtests 1
+#define fuzz_iterations 0
+#endif
#ifndef CONFIG_CRYPTO_SELFTESTS
@@ -319,9 +324,9 @@ struct testvec_config {
/*
* The following are the lists of testvec_configs to test for each algorithm
- * type when the fast crypto self-tests are enabled. They aim to provide good
- * test coverage, while keeping the test time much shorter than the full tests
- * so that the fast tests can be used to fulfill FIPS 140 testing requirements.
+ * type when the "fast" crypto self-tests are enabled. They aim to provide good
+ * test coverage, while keeping the test time much shorter than the "full" tests
+ * so that the "fast" tests can be enabled in a wider range of circumstances.
*/
/* Configs for skciphers and aeads */
@@ -1183,14 +1188,18 @@ static void generate_random_testvec_config(struct rnd_state *rng,
static void crypto_disable_simd_for_test(void)
{
+#ifdef CONFIG_CRYPTO_SELFTESTS_FULL
migrate_disable();
__this_cpu_write(crypto_simd_disabled_for_test, true);
+#endif
}
static void crypto_reenable_simd_for_test(void)
{
+#ifdef CONFIG_CRYPTO_SELFTESTS_FULL
__this_cpu_write(crypto_simd_disabled_for_test, false);
migrate_enable();
+#endif
}
/*
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 41f13d490333..229b189a7988 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -21,10 +21,10 @@
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/mm.h>
-#include <asm/byteorder.h>
-#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
#define WP512_DIGEST_SIZE 64
#define WP384_DIGEST_SIZE 48
@@ -37,9 +37,6 @@
struct wp512_ctx {
u8 bitLength[WP512_LENGTHBYTES];
- u8 buffer[WP512_BLOCK_SIZE];
- int bufferBits;
- int bufferPos;
u64 hash[WP512_DIGEST_SIZE/8];
};
@@ -779,16 +776,16 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
* The core Whirlpool transform.
*/
-static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) {
+static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx,
+ const u8 *buffer) {
int i, r;
u64 K[8]; /* the round key */
u64 block[8]; /* mu(buffer) */
u64 state[8]; /* the cipher state */
u64 L[8];
- const __be64 *buffer = (const __be64 *)wctx->buffer;
for (i = 0; i < 8; i++)
- block[i] = be64_to_cpu(buffer[i]);
+ block[i] = get_unaligned_be64(buffer + i * 8);
state[0] = block[0] ^ (K[0] = wctx->hash[0]);
state[1] = block[1] ^ (K[1] = wctx->hash[1]);
@@ -991,8 +988,6 @@ static int wp512_init(struct shash_desc *desc) {
int i;
memset(wctx->bitLength, 0, 32);
- wctx->bufferBits = wctx->bufferPos = 0;
- wctx->buffer[0] = 0;
for (i = 0; i < 8; i++) {
wctx->hash[i] = 0L;
}
@@ -1000,84 +995,54 @@ static int wp512_init(struct shash_desc *desc) {
return 0;
}
-static int wp512_update(struct shash_desc *desc, const u8 *source,
- unsigned int len)
+static void wp512_add_length(u8 *bitLength, u64 value)
{
- struct wp512_ctx *wctx = shash_desc_ctx(desc);
- int sourcePos = 0;
- unsigned int bits_len = len * 8; // convert to number of bits
- int sourceGap = (8 - ((int)bits_len & 7)) & 7;
- int bufferRem = wctx->bufferBits & 7;
+ u32 carry;
int i;
- u32 b, carry;
- u8 *buffer = wctx->buffer;
- u8 *bitLength = wctx->bitLength;
- int bufferBits = wctx->bufferBits;
- int bufferPos = wctx->bufferPos;
- u64 value = bits_len;
for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) {
carry += bitLength[i] + ((u32)value & 0xff);
bitLength[i] = (u8)carry;
carry >>= 8;
value >>= 8;
}
- while (bits_len > 8) {
- b = ((source[sourcePos] << sourceGap) & 0xff) |
- ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap));
- buffer[bufferPos++] |= (u8)(b >> bufferRem);
- bufferBits += 8 - bufferRem;
- if (bufferBits == WP512_BLOCK_SIZE * 8) {
- wp512_process_buffer(wctx);
- bufferBits = bufferPos = 0;
- }
- buffer[bufferPos] = b << (8 - bufferRem);
- bufferBits += bufferRem;
- bits_len -= 8;
- sourcePos++;
- }
- if (bits_len > 0) {
- b = (source[sourcePos] << sourceGap) & 0xff;
- buffer[bufferPos] |= b >> bufferRem;
- } else {
- b = 0;
- }
- if (bufferRem + bits_len < 8) {
- bufferBits += bits_len;
- } else {
- bufferPos++;
- bufferBits += 8 - bufferRem;
- bits_len -= 8 - bufferRem;
- if (bufferBits == WP512_BLOCK_SIZE * 8) {
- wp512_process_buffer(wctx);
- bufferBits = bufferPos = 0;
- }
- buffer[bufferPos] = b << (8 - bufferRem);
- bufferBits += (int)bits_len;
- }
+}
- wctx->bufferBits = bufferBits;
- wctx->bufferPos = bufferPos;
+static int wp512_update(struct shash_desc *desc, const u8 *source,
+ unsigned int len)
+{
+ struct wp512_ctx *wctx = shash_desc_ctx(desc);
+ unsigned int remain = len % WP512_BLOCK_SIZE;
+ u64 bits_len = (len - remain) * 8ull;
+ u8 *bitLength = wctx->bitLength;
- return 0;
+ wp512_add_length(bitLength, bits_len);
+ do {
+ wp512_process_buffer(wctx, source);
+ source += WP512_BLOCK_SIZE;
+ bits_len -= WP512_BLOCK_SIZE * 8;
+ } while (bits_len);
+
+ return remain;
}
-static int wp512_final(struct shash_desc *desc, u8 *out)
+static int wp512_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int bufferPos, u8 *out)
{
struct wp512_ctx *wctx = shash_desc_ctx(desc);
int i;
- u8 *buffer = wctx->buffer;
u8 *bitLength = wctx->bitLength;
- int bufferBits = wctx->bufferBits;
- int bufferPos = wctx->bufferPos;
__be64 *digest = (__be64 *)out;
+ u8 buffer[WP512_BLOCK_SIZE];
- buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
+ wp512_add_length(bitLength, bufferPos * 8);
+ memcpy(buffer, src, bufferPos);
+ buffer[bufferPos] = 0x80U;
bufferPos++;
if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
if (bufferPos < WP512_BLOCK_SIZE)
memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
- wp512_process_buffer(wctx);
+ wp512_process_buffer(wctx, buffer);
bufferPos = 0;
}
if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES)
@@ -1086,31 +1051,32 @@ static int wp512_final(struct shash_desc *desc, u8 *out)
bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
bitLength, WP512_LENGTHBYTES);
- wp512_process_buffer(wctx);
+ wp512_process_buffer(wctx, buffer);
+ memzero_explicit(buffer, sizeof(buffer));
for (i = 0; i < WP512_DIGEST_SIZE/8; i++)
digest[i] = cpu_to_be64(wctx->hash[i]);
- wctx->bufferBits = bufferBits;
- wctx->bufferPos = bufferPos;
return 0;
}
-static int wp384_final(struct shash_desc *desc, u8 *out)
+static int wp384_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
u8 D[64];
- wp512_final(desc, D);
+ wp512_finup(desc, src, len, D);
memcpy(out, D, WP384_DIGEST_SIZE);
memzero_explicit(D, WP512_DIGEST_SIZE);
return 0;
}
-static int wp256_final(struct shash_desc *desc, u8 *out)
+static int wp256_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
u8 D[64];
- wp512_final(desc, D);
+ wp512_finup(desc, src, len, D);
memcpy(out, D, WP256_DIGEST_SIZE);
memzero_explicit(D, WP512_DIGEST_SIZE);
@@ -1121,11 +1087,12 @@ static struct shash_alg wp_algs[3] = { {
.digestsize = WP512_DIGEST_SIZE,
.init = wp512_init,
.update = wp512_update,
- .final = wp512_final,
+ .finup = wp512_finup,
.descsize = sizeof(struct wp512_ctx),
.base = {
.cra_name = "wp512",
.cra_driver_name = "wp512-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -1133,11 +1100,12 @@ static struct shash_alg wp_algs[3] = { {
.digestsize = WP384_DIGEST_SIZE,
.init = wp512_init,
.update = wp512_update,
- .final = wp384_final,
+ .finup = wp384_finup,
.descsize = sizeof(struct wp512_ctx),
.base = {
.cra_name = "wp384",
.cra_driver_name = "wp384-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -1145,11 +1113,12 @@ static struct shash_alg wp_algs[3] = { {
.digestsize = WP256_DIGEST_SIZE,
.init = wp512_init,
.update = wp512_update,
- .final = wp256_final,
+ .finup = wp256_finup,
.descsize = sizeof(struct wp512_ctx),
.base = {
.cra_name = "wp256",
.cra_driver_name = "wp256-generic",
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
index dc3a072ce3b6..f28a060a8810 100644
--- a/drivers/accel/amdxdna/aie2_psp.c
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *
psp->ddev = ddev;
memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
- psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
- psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL);
+ psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
+ psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
if (!psp->fw_buffer) {
drm_err(ddev, "no memory for fw buffer");
return NULL;
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 6f8bbe1247a5..c9a0bcaba2e4 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -33,7 +33,7 @@
static DEFINE_MUTEX(isolated_cpus_lock);
static DEFINE_MUTEX(round_robin_lock);
-static unsigned long power_saving_mwait_eax;
+static unsigned int power_saving_mwait_eax;
static unsigned char tsc_detected_unstable;
static unsigned char tsc_marked_unstable;
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index c8f37f4e6626..fef6fb29ece4 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -483,6 +483,13 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
return_ACPI_STATUS(AE_NULL_OBJECT);
}
+ if (this_walk_state->num_operands < obj_desc->method.param_count) {
+ ACPI_ERROR((AE_INFO, "Missing argument for method [%4.4s]",
+ acpi_ut_get_node_name(method_node)));
+
+ return_ACPI_STATUS(AE_AML_UNINITIALIZED_ARG);
+ }
+
/* Init for new method, possibly wait on method mutex */
status =
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index fea11a35eea3..9b041415a9d0 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -883,19 +883,16 @@ static int __init einj_init(void)
}
einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops);
- if (!einj_dev)
- return -ENODEV;
- einj_initialized = true;
+ if (einj_dev)
+ einj_initialized = true;
return 0;
}
static void __exit einj_exit(void)
{
- if (einj_initialized)
- faux_device_destroy(einj_dev);
-
+ faux_device_destroy(einj_dev);
}
module_init(einj_init);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 45593612a4db..6905b56bf3e4 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -243,23 +243,10 @@ static int acpi_battery_get_property(struct power_supply *psy,
break;
case POWER_SUPPLY_PROP_CURRENT_NOW:
case POWER_SUPPLY_PROP_POWER_NOW:
- if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) {
+ if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN)
ret = -ENODEV;
- break;
- }
-
- val->intval = battery->rate_now * 1000;
- /*
- * When discharging, the current should be reported as a
- * negative number as per the power supply class interface
- * definition.
- */
- if (psp == POWER_SUPPLY_PROP_CURRENT_NOW &&
- (battery->state & ACPI_BATTERY_STATE_DISCHARGING) &&
- acpi_battery_handle_discharging(battery)
- == POWER_SUPPLY_STATUS_DISCHARGING)
- val->intval = -val->intval;
-
+ else
+ val->intval = battery->rate_now * 1000;
break;
case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index a9ae2fd62863..6b649031808f 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void)
struct cpc_desc *cpc_ptr;
int cpu;
- for_each_possible_cpu(cpu) {
+ for_each_present_cpu(cpu) {
cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF];
if (!CPC_IN_SYSTEM_MEMORY(desired_reg) &&
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 6f4203716b53..75c7db8b156a 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -23,8 +23,10 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/list.h>
+#include <linux/printk.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/suspend.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
@@ -2031,6 +2033,21 @@ void __init acpi_ec_ecdt_probe(void)
goto out;
}
+ if (!strstarts(ecdt_ptr->id, "\\")) {
+ /*
+ * The ECDT table on some MSI notebooks contains invalid data, together
+ * with an empty ID string ("").
+ *
+ * Section 5.2.15 of the ACPI specification requires the ID string to be
+ * a "fully qualified reference to the (...) embedded controller device",
+ * so this string always has to start with a backslash.
+ *
+ * By verifying this we can avoid such faulty ECDT tables in a safe way.
+ */
+ pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id);
+ goto out;
+ }
+
ec = acpi_ec_alloc();
if (!ec)
goto out;
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 00910ccd7eda..e2781864fdce 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -175,6 +175,12 @@ bool processor_physically_present(acpi_handle handle);
static inline void acpi_early_processor_control_setup(void) {}
#endif
+#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
+void acpi_idle_rescan_dead_smt_siblings(void);
+#else
+static inline void acpi_idle_rescan_dead_smt_siblings(void) {}
+#endif
+
/* --------------------------------------------------------------------------
Embedded Controller
-------------------------------------------------------------------------- */
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 3b281bc1e73c..65e779be64ff 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -279,6 +279,9 @@ static int __init acpi_processor_driver_init(void)
* after acpi_cppc_processor_probe() has been called for all online CPUs
*/
acpi_processor_init_invariance_cppc();
+
+ acpi_idle_rescan_dead_smt_siblings();
+
return 0;
err:
driver_unregister(&acpi_processor_driver);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e2febca2ec13..2c2dc559e0f8 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -24,6 +24,8 @@
#include <acpi/processor.h>
#include <linux/context_tracking.h>
+#include "internal.h"
+
/*
* Include the apic definitions for x86 to have the APIC timer related defines
* available also for UP (on SMP it gets magically included via linux/smp.h).
@@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = {
};
#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
+void acpi_idle_rescan_dead_smt_siblings(void)
+{
+ if (cpuidle_get_driver() == &acpi_idle_driver)
+ arch_cpu_rescan_dead_smt_siblings();
+}
+
static
DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 7d59c6c9185f..b1ab192d7a08 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -667,6 +667,13 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = {
},
},
{
+ /* MACHENIKE L16P/L16P */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MACHENIKE"),
+ DMI_MATCH(DMI_BOARD_NAME, "L16P"),
+ },
+ },
+ {
/*
* TongFang GM5HG0A in case of the SKIKK Vanaheim relabel the
* board-name is changed, so check OEM strings instead. Note
diff --git a/drivers/acpi/riscv/cppc.c b/drivers/acpi/riscv/cppc.c
index 4cdff387deff..440cf9fb91aa 100644
--- a/drivers/acpi/riscv/cppc.c
+++ b/drivers/acpi/riscv/cppc.c
@@ -37,10 +37,8 @@ static int __init sbi_cppc_init(void)
{
if (sbi_spec_version >= sbi_mk_version(2, 0) &&
sbi_probe_extension(SBI_EXT_CPPC) > 0) {
- pr_info("SBI CPPC extension detected\n");
cppc_ext_present = true;
} else {
- pr_info("SBI CPPC extension NOT detected!!\n");
cppc_ext_present = false;
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c463ca4a8fff..1ba2192ae667 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5384,10 +5384,9 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg,
void __user *ubuf = (void __user *)arg;
struct binder_write_read bwr;
- if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
- ret = -EFAULT;
- goto out;
- }
+ if (copy_from_user(&bwr, ubuf, sizeof(bwr)))
+ return -EFAULT;
+
binder_debug(BINDER_DEBUG_READ_WRITE,
"%d:%d write %lld at %016llx, read %lld at %016llx\n",
proc->pid, thread->pid,
@@ -5402,8 +5401,6 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg,
trace_binder_write_done(ret);
if (ret < 0) {
bwr.read_consumed = 0;
- if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
- ret = -EFAULT;
goto out;
}
}
@@ -5417,22 +5414,17 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg,
if (!binder_worklist_empty_ilocked(&proc->todo))
binder_wakeup_proc_ilocked(proc);
binder_inner_proc_unlock(proc);
- if (ret < 0) {
- if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
- ret = -EFAULT;
+ if (ret < 0)
goto out;
- }
}
binder_debug(BINDER_DEBUG_READ_WRITE,
"%d:%d wrote %lld of %lld, read return %lld of %lld\n",
proc->pid, thread->pid,
(u64)bwr.write_consumed, (u64)bwr.write_size,
(u64)bwr.read_consumed, (u64)bwr.read_size);
- if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
- ret = -EFAULT;
- goto out;
- }
out:
+ if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+ ret = -EFAULT;
return ret;
}
@@ -6128,7 +6120,7 @@ static int binder_release(struct inode *nodp, struct file *filp)
debugfs_remove(proc->debugfs_entry);
if (proc->binderfs_entry) {
- binderfs_remove_file(proc->binderfs_entry);
+ simple_recursive_removal(proc->binderfs_entry, NULL);
proc->binderfs_entry = NULL;
}
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 1ba5caf1d88d..a5fd23dcafcd 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -81,7 +81,6 @@ extern bool is_binderfs_device(const struct inode *inode);
extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name,
const struct file_operations *fops,
void *data);
-extern void binderfs_remove_file(struct dentry *dentry);
#else
static inline bool is_binderfs_device(const struct inode *inode)
{
@@ -94,7 +93,6 @@ static inline struct dentry *binderfs_create_file(struct dentry *dir,
{
return NULL;
}
-static inline void binderfs_remove_file(struct dentry *dentry) {}
#endif
#ifdef CONFIG_ANDROID_BINDERFS
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 024275dbfdd8..acc946bb1457 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -500,21 +500,6 @@ static struct dentry *binderfs_create_dentry(struct dentry *parent,
return dentry;
}
-void binderfs_remove_file(struct dentry *dentry)
-{
- struct inode *parent_inode;
-
- parent_inode = d_inode(dentry->d_parent);
- inode_lock(parent_inode);
- if (simple_positive(dentry)) {
- dget(dentry);
- simple_unlink(parent_inode, dentry);
- d_delete(dentry);
- dput(dentry);
- }
- inode_unlock(parent_inode);
-}
-
struct dentry *binderfs_create_file(struct dentry *parent, const char *name,
const struct file_operations *fops,
void *data)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 163ac909bd06..aa93b0ecbbc6 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1410,8 +1410,15 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
static bool ahci_broken_lpm(struct pci_dev *pdev)
{
+ /*
+ * Platforms with LPM problems.
+ * If driver_data is NULL, there is no existing BIOS version with
+ * functioning LPM.
+ * If driver_data is non-NULL, then driver_data contains the DMI BIOS
+ * build date of the first BIOS version with functioning LPM (i.e. older
+ * BIOS versions have broken LPM).
+ */
static const struct dmi_system_id sysids[] = {
- /* Various Lenovo 50 series have LPM issues with older BIOSen */
{
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
@@ -1438,13 +1445,30 @@ static bool ahci_broken_lpm(struct pci_dev *pdev)
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"),
},
+ .driver_data = "20180409", /* 2.35 */
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ASUSPRO D840MB_M840SA"),
+ },
+ /* 320 is broken, there is no known good version. */
+ },
+ {
/*
- * Note date based on release notes, 2.35 has been
- * reported to be good, but I've been unable to get
- * a hold of the reporter to get the DMI BIOS date.
- * TODO: fix this.
+ * AMD 500 Series Chipset SATA Controller [1022:43eb]
+ * on this motherboard timeouts on ports 5 and 6 when
+ * LPM is enabled, at least with WDC WD20EFAX-68FB5N0
+ * hard drives. LPM with the same drive works fine on
+ * all other ports on the same controller.
*/
- .driver_data = "20180310", /* 2.35 */
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR,
+ "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_BOARD_NAME,
+ "ROG STRIX B550-F GAMING (WI-FI)"),
+ },
+ /* 3621 is broken, there is no known good version. */
},
{ } /* terminate list */
};
@@ -1455,6 +1479,9 @@ static bool ahci_broken_lpm(struct pci_dev *pdev)
if (!dmi)
return false;
+ if (!dmi->driver_data)
+ return true;
+
dmi_get_date(DMI_BIOS_DATE, &year, &month, &date);
snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date);
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index b7f0bf795521..f2140fc06ba0 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -514,15 +514,19 @@ unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
EXPORT_SYMBOL_GPL(ata_acpi_gtm_xfermask);
/**
- * ata_acpi_cbl_80wire - Check for 80 wire cable
+ * ata_acpi_cbl_pata_type - Return PATA cable type
* @ap: Port to check
- * @gtm: GTM data to use
*
- * Return 1 if the @gtm indicates the BIOS selected an 80wire mode.
+ * Return ATA_CBL_PATA* according to the transfer mode selected by BIOS
*/
-int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
+int ata_acpi_cbl_pata_type(struct ata_port *ap)
{
struct ata_device *dev;
+ int ret = ATA_CBL_PATA_UNK;
+ const struct ata_acpi_gtm *gtm = ata_acpi_init_gtm(ap);
+
+ if (!gtm)
+ return ATA_CBL_PATA40;
ata_for_each_dev(dev, &ap->link, ENABLED) {
unsigned int xfer_mask, udma_mask;
@@ -530,13 +534,17 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
xfer_mask = ata_acpi_gtm_xfermask(dev, gtm);
ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask);
- if (udma_mask & ~ATA_UDMA_MASK_40C)
- return 1;
+ ret = ATA_CBL_PATA40;
+
+ if (udma_mask & ~ATA_UDMA_MASK_40C) {
+ ret = ATA_CBL_PATA80;
+ break;
+ }
}
- return 0;
+ return ret;
}
-EXPORT_SYMBOL_GPL(ata_acpi_cbl_80wire);
+EXPORT_SYMBOL_GPL(ata_acpi_cbl_pata_type);
static void ata_acpi_gtf_to_tf(struct ata_device *dev,
const struct ata_acpi_gtf *gtf,
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index b811efd2cc34..73e81e160c91 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -27,7 +27,7 @@
#include <scsi/scsi_host.h>
#include <linux/dmi.h>
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86) && defined(CONFIG_X86_32)
#include <asm/msr.h>
static int use_msr;
module_param_named(msr, use_msr, int, 0644);
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index fbf5f07ea357..f7a933eefe05 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1298,7 +1298,7 @@ static int pata_macio_pci_attach(struct pci_dev *pdev,
priv->dev = &pdev->dev;
/* Get MMIO regions */
- if (pci_request_regions(pdev, "pata-macio")) {
+ if (pcim_request_all_regions(pdev, "pata-macio")) {
dev_err(&pdev->dev,
"Cannot obtain PCI resources\n");
return -EBUSY;
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 696b99720dcb..bb80e7800dcb 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -201,11 +201,9 @@ static int via_cable_detect(struct ata_port *ap) {
two drives */
if (ata66 & (0x10100000 >> (16 * ap->port_no)))
return ATA_CBL_PATA80;
+
/* Check with ACPI so we can spot BIOS reported SATA bridges */
- if (ata_acpi_init_gtm(ap) &&
- ata_acpi_cbl_80wire(ap, ata_acpi_init_gtm(ap)))
- return ATA_CBL_PATA80;
- return ATA_CBL_PATA40;
+ return ata_acpi_cbl_pata_type(ap);
}
static int via_pre_reset(struct ata_link *link, unsigned long deadline)
@@ -368,7 +366,8 @@ static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask)
}
if (dev->class == ATA_DEV_ATAPI &&
- dmi_check_system(no_atapi_dma_dmi_table)) {
+ (dmi_check_system(no_atapi_dma_dmi_table) ||
+ config->id == PCI_DEVICE_ID_VIA_6415)) {
ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n");
mask &= ATA_MASK_PIO;
}
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index d4aa0f353b6c..eeae160c898d 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -288,7 +288,9 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
struct sk_buff *new_skb;
int result = 0;
- if (!skb->len) return 0;
+ if (skb->len < sizeof(struct atmtcp_hdr))
+ goto done;
+
dev = vcc->dev_data;
hdr = (struct atmtcp_hdr *) skb->data;
if (hdr->length == ATMTCP_HDR_MAGIC) {
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 1206ab764ba9..f2e91b7d79f0 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -852,6 +852,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc,
IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data,
skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&card->pcidev->dev, IDT77252_PRV_PADDR(skb)))
+ return -ENOMEM;
error = -EINVAL;
@@ -1857,6 +1859,8 @@ add_rx_skb(struct idt77252_dev *card, int queue,
paddr = dma_map_single(&card->pcidev->dev, skb->data,
skb_end_pointer(skb) - skb->data,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(&card->pcidev->dev, paddr))
+ goto outpoolrm;
IDT77252_PRV_PADDR(skb) = paddr;
if (push_rx_skb(card, skb, queue)) {
@@ -1871,6 +1875,7 @@ outunmap:
dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb),
skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE);
+outpoolrm:
handle = IDT77252_PRV_POOL(skb);
card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 7779ab0ca7ce..efc575a00edd 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
CPU_SHOW_VULN_FALLBACK(ghostwrite);
CPU_SHOW_VULN_FALLBACK(old_microcode);
CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
+CPU_SHOW_VULN_FALLBACK(tsa);
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling
static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL);
static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
+static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_ghostwrite.attr,
&dev_attr_old_microcode.attr,
&dev_attr_indirect_target_selection.attr,
+ &dev_attr_tsa.attr,
NULL
};
diff --git a/drivers/base/faux.c b/drivers/base/faux.c
index 9054d346bd7f..f5fbda0a9a44 100644
--- a/drivers/base/faux.c
+++ b/drivers/base/faux.c
@@ -86,6 +86,7 @@ static struct device_driver faux_driver = {
.name = "faux_driver",
.bus = &faux_bus_type,
.probe_type = PROBE_FORCE_SYNCHRONOUS,
+ .suppress_bind_attrs = true,
};
static void faux_device_release(struct device *dev)
@@ -169,7 +170,7 @@ struct faux_device *faux_device_create_with_groups(const char *name,
* successful is almost impossible to determine by the caller.
*/
if (!dev->driver) {
- dev_err(dev, "probe did not succeed, tearing down the device\n");
+ dev_dbg(dev, "probe did not succeed, tearing down the device\n");
faux_device_destroy(faux_dev);
faux_dev = NULL;
}
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 44486b2c7172..6942c62fa59d 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -822,26 +822,6 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name,
{}
#endif
-/*
- * Reject firmware file names with ".." path components.
- * There are drivers that construct firmware file names from device-supplied
- * strings, and we don't want some device to be able to tell us "I would like to
- * be sent my firmware from ../../../etc/shadow, please".
- *
- * Search for ".." surrounded by either '/' or start/end of string.
- *
- * This intentionally only looks at the firmware name, not at the firmware base
- * directory or at symlink contents.
- */
-static bool name_contains_dotdot(const char *name)
-{
- size_t name_len = strlen(name);
-
- return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 ||
- strstr(name, "/../") != NULL ||
- (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0);
-}
-
/* called from request_firmware() and request_firmware_work_func() */
static int
_request_firmware(const struct firmware **firmware_p, const char *name,
@@ -862,6 +842,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
goto out;
}
+
+ /*
+ * Reject firmware file names with ".." path components.
+ * There are drivers that construct firmware file names from
+ * device-supplied strings, and we don't want some device to be
+ * able to tell us "I would like to be sent my firmware from
+ * ../../../etc/shadow, please".
+ *
+ * This intentionally only looks at the firmware name, not at
+ * the firmware base directory or at symlink contents.
+ */
if (name_contains_dotdot(name)) {
dev_warn(device,
"Firmware load for '%s' refused, path contains '..' component\n",
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index eebe699fdf4f..7a50af416cac 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1237,6 +1237,7 @@ void dpm_complete(pm_message_t state)
void dpm_resume_end(pm_message_t state)
{
dpm_resume(state);
+ pm_restore_gfp_mask();
dpm_complete(state);
}
EXPORT_SYMBOL_GPL(dpm_resume_end);
@@ -1279,6 +1280,22 @@ static void dpm_async_suspend_parent(struct device *dev, async_func_t func)
dpm_async_with_cleanup(dev->parent, func);
}
+static void dpm_async_suspend_complete_all(struct list_head *device_list)
+{
+ struct device *dev;
+
+ guard(mutex)(&async_wip_mtx);
+
+ list_for_each_entry_reverse(dev, device_list, power.entry) {
+ /*
+ * In case the device is being waited for and async processing
+ * has not started for it yet, let the waiters make progress.
+ */
+ if (!dev->power.work_in_progress)
+ complete_all(&dev->power.completion);
+ }
+}
+
/**
* resume_event - Return a "resume" message for given "suspend" sleep state.
* @sleep_state: PM message representing a sleep state.
@@ -1455,6 +1472,7 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_late_early_list);
/*
* Move all devices to the target list to resume them
* properly.
@@ -1657,6 +1675,7 @@ int dpm_suspend_late(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_suspended_list);
/*
* Move all devices to the target list to resume them
* properly.
@@ -1950,6 +1969,7 @@ int dpm_suspend(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_prepared_list);
/*
* Move all devices to the target list to resume them
* properly.
@@ -2176,8 +2196,10 @@ int dpm_suspend_start(pm_message_t state)
error = dpm_prepare(state);
if (error)
dpm_save_failed_step(SUSPEND_PREPARE);
- else
+ else {
+ pm_restrict_gfp_mask();
error = dpm_suspend(state);
+ }
dpm_show_time(starttime, state, error, "start");
return error;
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index f2843f814675..1f3f782a04ba 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1173,6 +1173,8 @@ err_name:
err_map:
kfree(map);
err:
+ if (bus && bus->free_on_exit)
+ kfree(bus);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(__regmap_init);
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 749ae1246f4c..d35caa3c69e1 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -80,6 +80,7 @@ enum {
DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */
DEVFL_FREED = (1<<8), /* device has been cleaned up */
+ DEVFL_DEAD = (1<<9), /* device has timed out of aoe_deadsecs */
};
enum {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 50cc90f6ab35..6298f8e271e3 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -754,7 +754,7 @@ rexmit_timer(struct timer_list *timer)
utgts = count_targets(d, NULL);
- if (d->flags & DEVFL_TKILL) {
+ if (d->flags & (DEVFL_TKILL | DEVFL_DEAD)) {
spin_unlock_irqrestore(&d->lock, flags);
return;
}
@@ -786,7 +786,8 @@ rexmit_timer(struct timer_list *timer)
* to clean up.
*/
list_splice(&flist, &d->factive[0]);
- aoedev_downdev(d);
+ d->flags |= DEVFL_DEAD;
+ queue_work(aoe_wq, &d->work);
goto out;
}
@@ -898,6 +899,9 @@ aoecmd_sleepwork(struct work_struct *work)
{
struct aoedev *d = container_of(work, struct aoedev, work);
+ if (d->flags & DEVFL_DEAD)
+ aoedev_downdev(d);
+
if (d->flags & DEVFL_GDALLOC)
aoeblk_gdalloc(d);
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index bba05f0c5bbd..3a240755045b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -198,9 +198,13 @@ aoedev_downdev(struct aoedev *d)
{
struct aoetgt *t, **tt, **te;
struct list_head *head, *pos, *nx;
+ struct request *rq, *rqnext;
int i;
+ unsigned long flags;
- d->flags &= ~DEVFL_UP;
+ spin_lock_irqsave(&d->lock, flags);
+ d->flags &= ~(DEVFL_UP | DEVFL_DEAD);
+ spin_unlock_irqrestore(&d->lock, flags);
/* clean out active and to-be-retransmitted buffers */
for (i = 0; i < NFACTIVE; i++) {
@@ -223,6 +227,13 @@ aoedev_downdev(struct aoedev *d)
/* clean out the in-process request (if any) */
aoe_failip(d);
+ /* clean out any queued block requests */
+ list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) {
+ list_del_init(&rq->queuelist);
+ blk_mq_start_request(rq);
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ }
+
/* fast fail all pending I/O */
if (d->blkq) {
/* UP is cleared, freeze+quiesce to insure all are errored */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b1be6c510372..0c2eabe14af3 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- rcu_read_lock();
- if (!page)
+ if (!page) {
+ rcu_read_lock();
return ERR_PTR(-ENOMEM);
+ }
xa_lock(&brd->brd_pages);
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
page, gfp);
+ rcu_read_lock();
if (ret) {
xa_unlock(&brd->brd_pages);
__free_page(page);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f8d136684109..8d994cae3b83 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -308,14 +308,13 @@ end_io:
static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
- struct loop_device *lo = rq->q->queuedata;
if (!atomic_dec_and_test(&cmd->ref))
return;
kfree(cmd->bvec);
cmd->bvec = NULL;
if (req_op(rq) == REQ_OP_WRITE)
- file_end_write(lo->lo_backing_file);
+ kiocb_end_write(&cmd->iocb);
if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq);
}
@@ -391,7 +390,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
}
if (rw == ITER_SOURCE) {
- file_start_write(lo->lo_backing_file);
+ kiocb_start_write(&cmd->iocb);
ret = file->f_op->write_iter(&cmd->iocb, &iter);
} else
ret = file->f_op->read_iter(&cmd->iocb, &iter);
@@ -1248,12 +1247,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS);
- if (size_changed) {
- loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
- lo->lo_backing_file);
- loop_set_size(lo, new_size);
- }
-
/* update the direct I/O flag if lo_offset changed */
loop_update_dio(lo);
@@ -1261,6 +1254,11 @@ out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue, memflags);
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ if (!err && size_changed) {
+ loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_backing_file);
+ loop_set_size(lo, new_size);
+ }
out_unlock:
mutex_unlock(&lo->lo_mutex);
if (partscan)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7bdc7eb808ea..2592bd19ebc1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -2198,9 +2198,7 @@ again:
goto out;
}
}
- ret = nbd_start_device(nbd);
- if (ret)
- goto out;
+
if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
GFP_KERNEL);
@@ -2216,6 +2214,8 @@ again:
goto out;
}
set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
+
+ ret = nbd_start_device(nbd);
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index c637ea010d34..9fd284fa76dc 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1148,8 +1148,8 @@ exit:
blk_mq_end_request(req, res);
}
-static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
- int res, unsigned issue_flags)
+static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io,
+ struct request *req)
{
/* read cmd first because req will overwrite it */
struct io_uring_cmd *cmd = io->cmd;
@@ -1164,6 +1164,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
io->req = req;
+ return cmd;
+}
+
+static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
+ int res, unsigned issue_flags)
+{
+ struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req);
/* tell ublksrv one io request is coming */
io_uring_cmd_done(cmd, res, 0, issue_flags);
@@ -1416,6 +1423,14 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
+static inline bool ublk_belong_to_same_batch(const struct ublk_io *io,
+ const struct ublk_io *io2)
+{
+ return (io_uring_cmd_ctx_handle(io->cmd) ==
+ io_uring_cmd_ctx_handle(io2->cmd)) &&
+ (io->task == io2->task);
+}
+
static void ublk_queue_rqs(struct rq_list *rqlist)
{
struct rq_list requeue_list = { };
@@ -1427,14 +1442,16 @@ static void ublk_queue_rqs(struct rq_list *rqlist)
struct ublk_queue *this_q = req->mq_hctx->driver_data;
struct ublk_io *this_io = &this_q->ios[req->tag];
- if (io && io->task != this_io->task && !rq_list_empty(&submit_list))
+ if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) {
+ rq_list_add_tail(&requeue_list, req);
+ continue;
+ }
+
+ if (io && !ublk_belong_to_same_batch(io, this_io) &&
+ !rq_list_empty(&submit_list))
ublk_queue_cmd_list(io, &submit_list);
io = this_io;
-
- if (ublk_prep_req(this_q, req, true) == BLK_STS_OK)
- rq_list_add_tail(&submit_list, req);
- else
- rq_list_add_tail(&requeue_list, req);
+ rq_list_add_tail(&submit_list, req);
}
if (!rq_list_empty(&submit_list))
@@ -2148,10 +2165,9 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
return 0;
}
-static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io)
+static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io,
+ struct request *req)
{
- struct request *req = io->req;
-
/*
* We have handled UBLK_IO_NEED_GET_DATA command,
* so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
@@ -2178,6 +2194,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
u32 cmd_op = cmd->cmd_op;
unsigned tag = ub_cmd->tag;
int ret = -EINVAL;
+ struct request *req;
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
@@ -2236,11 +2253,19 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
break;
case UBLK_IO_NEED_GET_DATA:
- io->addr = ub_cmd->addr;
- if (!ublk_get_data(ubq, io))
- return -EIOCBQUEUED;
-
- return UBLK_IO_RES_OK;
+ /*
+ * ublk_get_data() may fail and fallback to requeue, so keep
+ * uring_cmd active first and prepare for handling new requeued
+ * request
+ */
+ req = io->req;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ if (likely(ublk_get_data(ubq, io, req))) {
+ __ublk_prep_compl_io_cmd(io, req);
+ return UBLK_IO_RES_OK;
+ }
+ break;
default:
goto out;
}
@@ -2825,6 +2850,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
if (copy_from_user(&info, argp, sizeof(info)))
return -EFAULT;
+ if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth ||
+ info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues)
+ return -EINVAL;
+
if (capable(CAP_SYS_ADMIN))
info.flags &= ~UBLK_F_UNPRIVILEGED_DEV;
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 0d6ad50da046..8df310983bf6 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -670,7 +670,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i
hdev->flush = bfusb_flush;
hdev->send = bfusb_send_frame;
- set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS);
if (hci_register_dev(hdev) < 0) {
BT_ERR("Can't register HCI device");
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 1fa58c059cbf..8b43dfc755de 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -398,7 +398,7 @@ static int bpa10x_probe(struct usb_interface *intf,
hdev->send = bpa10x_send_frame;
hdev->set_diag = bpa10x_set_diag;
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
err = hci_register_dev(hdev);
if (err < 0) {
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 0a60660fc8ce..3a3a56ddbb06 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -135,7 +135,7 @@ int btbcm_check_bdaddr(struct hci_dev *hdev)
if (btbcm_set_bdaddr_from_efi(hdev) != 0) {
bt_dev_info(hdev, "BCM: Using default device address (%pMR)",
&bda->bdaddr);
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
}
@@ -467,7 +467,7 @@ static int btbcm_print_controller_features(struct hci_dev *hdev)
/* Read DMI and disable broken Read LE Min/Max Tx Power */
if (dmi_first_match(disable_broken_read_transmit_power))
- set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER);
return 0;
}
@@ -706,7 +706,7 @@ int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_m
btbcm_check_bdaddr(hdev);
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
return 0;
}
@@ -769,7 +769,7 @@ int btbcm_setup_apple(struct hci_dev *hdev)
kfree_skb(skb);
}
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
return 0;
}
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 55cc1652bfe4..06016ac3965c 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -88,7 +88,7 @@ int btintel_check_bdaddr(struct hci_dev *hdev)
if (!bacmp(&bda->bdaddr, BDADDR_INTEL)) {
bt_dev_err(hdev, "Found Intel default device address (%pMR)",
&bda->bdaddr);
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
kfree_skb(skb);
@@ -2027,7 +2027,7 @@ static int btintel_download_fw(struct hci_dev *hdev,
*/
if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
download:
@@ -2295,7 +2295,7 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
*/
if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
}
@@ -2670,7 +2670,7 @@ static u8 btintel_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb)
* Distinguish ISO data packets form ACL data packets
* based on their connection handle value range.
*/
- if (hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) {
+ if (iso_capable(hdev) && hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) {
__u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle);
if (hci_handle(handle) >= BTINTEL_ISODATA_HANDLE_BASE)
@@ -3435,9 +3435,9 @@ static int btintel_setup_combined(struct hci_dev *hdev)
}
/* Apply the common HCI quirks for Intel device */
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG);
/* Set up the quality report callback for Intel devices */
hdev->set_quality_report = btintel_set_quality_report;
@@ -3475,8 +3475,8 @@ static int btintel_setup_combined(struct hci_dev *hdev)
*/
if (!btintel_test_flag(hdev,
INTEL_ROM_LEGACY_NO_WBS_SUPPORT))
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- &hdev->quirks);
+ hci_set_quirk(hdev,
+ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
err = btintel_legacy_rom_setup(hdev, &ver);
break;
@@ -3491,11 +3491,11 @@ static int btintel_setup_combined(struct hci_dev *hdev)
*
* All Legacy bootloader devices support WBS
*/
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- &hdev->quirks);
+ hci_set_quirk(hdev,
+ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* These variants don't seem to support LE Coded PHY */
- set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED);
/* Setup MSFT Extension support */
btintel_set_msft_opcode(hdev, ver.hw_variant);
@@ -3571,10 +3571,10 @@ static int btintel_setup_combined(struct hci_dev *hdev)
*
* All Legacy bootloader devices support WBS
*/
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* These variants don't seem to support LE Coded PHY */
- set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED);
/* Setup MSFT Extension support */
btintel_set_msft_opcode(hdev, ver.hw_variant);
@@ -3600,7 +3600,7 @@ static int btintel_setup_combined(struct hci_dev *hdev)
*
* All TLV based devices support WBS
*/
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* Setup MSFT Extension support */
btintel_set_msft_opcode(hdev,
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 50fe17f1e1d1..f4e3fb54fe76 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -396,8 +396,13 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data)
static int btintel_pcie_start_rx(struct btintel_pcie_data *data)
{
int i, ret;
+ struct rxq *rxq = &data->rxq;
+
+ /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the
+ * hardware issues leading to race condition at the firmware.
+ */
- for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) {
+ for (i = 0; i < rxq->count - 3; i++) {
ret = btintel_pcie_submit_rx(data);
if (ret)
return ret;
@@ -1782,8 +1787,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data)
* + size of index * Number of queues(2) * type of index array(4)
* + size of context information
*/
- total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd)
- + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT;
+ total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT;
+ total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT;
/* Add the sum of size of index array and size of ci struct */
total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info);
@@ -1808,36 +1813,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data)
data->dma_v_addr = v_addr;
/* Setup descriptor count */
- data->txq.count = BTINTEL_DESCS_COUNT;
- data->rxq.count = BTINTEL_DESCS_COUNT;
+ data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT;
+ data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT;
/* Setup tfds */
data->txq.tfds_p_addr = p_addr;
data->txq.tfds = v_addr;
- p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT);
- v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT);
+ p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT);
+ v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT);
/* Setup urbd0 */
data->txq.urbd0s_p_addr = p_addr;
data->txq.urbd0s = v_addr;
- p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT);
- v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT);
+ p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT);
+ v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT);
/* Setup FRBD*/
data->rxq.frbds_p_addr = p_addr;
data->rxq.frbds = v_addr;
- p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT);
- v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT);
+ p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT);
+ v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT);
/* Setup urbd1 */
data->rxq.urbd1s_p_addr = p_addr;
data->rxq.urbd1s = v_addr;
- p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT);
- v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT);
+ p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT);
+ v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT);
/* Setup data buffers for txq */
err = btintel_pcie_setup_txq_bufs(data, &data->txq);
@@ -2028,6 +2033,28 @@ static void btintel_pcie_release_hdev(struct btintel_pcie_data *data)
data->hdev = NULL;
}
+static void btintel_pcie_disable_interrupts(struct btintel_pcie_data *data)
+{
+ spin_lock(&data->irq_lock);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, data->fh_init_mask);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, data->hw_init_mask);
+ spin_unlock(&data->irq_lock);
+}
+
+static void btintel_pcie_enable_interrupts(struct btintel_pcie_data *data)
+{
+ spin_lock(&data->irq_lock);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, ~data->fh_init_mask);
+ btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, ~data->hw_init_mask);
+ spin_unlock(&data->irq_lock);
+}
+
+static void btintel_pcie_synchronize_irqs(struct btintel_pcie_data *data)
+{
+ for (int i = 0; i < data->alloc_vecs; i++)
+ synchronize_irq(data->msix_entries[i].vector);
+}
+
static int btintel_pcie_setup_internal(struct hci_dev *hdev)
{
struct btintel_pcie_data *data = hci_get_drvdata(hdev);
@@ -2054,9 +2081,9 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev)
}
/* Apply the common HCI quirks for Intel device */
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG);
/* Set up the quality report callback for Intel devices */
hdev->set_quality_report = btintel_set_quality_report;
@@ -2096,7 +2123,7 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev)
*
* All TLV based devices support WBS
*/
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* Setup MSFT Extension support */
btintel_set_msft_opcode(hdev,
@@ -2147,6 +2174,8 @@ static int btintel_pcie_setup(struct hci_dev *hdev)
bt_dev_err(hdev, "Firmware download retry count: %d",
fw_dl_retry);
btintel_pcie_dump_debug_registers(hdev);
+ btintel_pcie_disable_interrupts(data);
+ btintel_pcie_synchronize_irqs(data);
err = btintel_pcie_reset_bt(data);
if (err) {
bt_dev_err(hdev, "Failed to do shr reset: %d", err);
@@ -2154,6 +2183,7 @@ static int btintel_pcie_setup(struct hci_dev *hdev)
}
usleep_range(10000, 12000);
btintel_pcie_reset_ia(data);
+ btintel_pcie_enable_interrupts(data);
btintel_pcie_config_msix(data);
err = btintel_pcie_enable_bt(data);
if (err) {
@@ -2286,6 +2316,12 @@ static void btintel_pcie_remove(struct pci_dev *pdev)
data = pci_get_drvdata(pdev);
+ btintel_pcie_disable_interrupts(data);
+
+ btintel_pcie_synchronize_irqs(data);
+
+ flush_work(&data->rx_work);
+
btintel_pcie_reset_bt(data);
for (int i = 0; i < data->alloc_vecs; i++) {
struct msix_entry *msix_entry;
@@ -2298,8 +2334,6 @@ static void btintel_pcie_remove(struct pci_dev *pdev)
btintel_pcie_release_hdev(data);
- flush_work(&data->rx_work);
-
destroy_workqueue(data->workqueue);
btintel_pcie_free(data);
diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h
index 21b964b15c1c..7dad4523236c 100644
--- a/drivers/bluetooth/btintel_pcie.h
+++ b/drivers/bluetooth/btintel_pcie.h
@@ -154,8 +154,11 @@ enum msix_mbox_int_causes {
/* Default interrupt timeout in msec */
#define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000
-/* The number of descriptors in TX/RX queues */
-#define BTINTEL_DESCS_COUNT 16
+/* The number of descriptors in TX queues */
+#define BTINTEL_PCIE_TX_DESCS_COUNT 32
+
+/* The number of descriptors in RX queues */
+#define BTINTEL_PCIE_RX_DESCS_COUNT 64
/* Number of Queue for TX and RX
* It indicates the index of the IA(Index Array)
@@ -177,9 +180,6 @@ enum {
/* Doorbell vector for TFD */
#define BTINTEL_PCIE_TX_DB_VEC 0
-/* Number of pending RX requests for downlink */
-#define BTINTEL_PCIE_RX_MAX_QUEUE 6
-
/* Doorbell vector for FRBD */
#define BTINTEL_PCIE_RX_DB_VEC 513
diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c
index c16a3518b8ff..4fc673640bfc 100644
--- a/drivers/bluetooth/btmtksdio.c
+++ b/drivers/bluetooth/btmtksdio.c
@@ -1141,7 +1141,7 @@ static int btmtksdio_setup(struct hci_dev *hdev)
}
/* Enable WBS with mSBC codec */
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* Enable GPIO reset mechanism */
if (bdev->reset) {
@@ -1384,7 +1384,7 @@ static int btmtksdio_probe(struct sdio_func *func,
SET_HCIDEV_DEV(hdev, &func->dev);
hdev->manufacturer = 70;
- set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP);
sdio_set_drvdata(func, bdev);
diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
index c97e260fcb0c..51400a891f6e 100644
--- a/drivers/bluetooth/btmtkuart.c
+++ b/drivers/bluetooth/btmtkuart.c
@@ -872,7 +872,7 @@ static int btmtkuart_probe(struct serdev_device *serdev)
SET_HCIDEV_DEV(hdev, &serdev->dev);
hdev->manufacturer = 70;
- set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP);
if (btmtkuart_is_standalone(bdev)) {
err = clk_prepare_enable(bdev->osc);
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 1088db6056a4..24f9b52605a1 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -1807,7 +1807,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev)
"local-bd-address",
(u8 *)&ba, sizeof(ba));
if (bacmp(&ba, BDADDR_ANY))
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
if (hci_register_dev(hdev) < 0) {
dev_err(&serdev->dev, "Can't register HCI device\n");
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index edefb9dc76aa..7c958d6065be 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -739,7 +739,7 @@ static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *co
bda = (struct hci_rp_read_bd_addr *)skb->data;
if (!bacmp(&bda->bdaddr, &config->bdaddr))
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
kfree_skb(skb);
diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
index c0eb71d6ffd3..d2e13fcb6bab 100644
--- a/drivers/bluetooth/btqcomsmd.c
+++ b/drivers/bluetooth/btqcomsmd.c
@@ -117,7 +117,7 @@ static int btqcomsmd_setup(struct hci_dev *hdev)
/* Devices do not have persistent storage for BD address. Retrieve
* it from the firmware node property.
*/
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
return 0;
}
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 7838c89e529e..4d182cf6e037 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -1287,7 +1287,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
/* Enable controller to do both LE scan and BR/EDR inquiry
* simultaneously.
*/
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
/* Enable central-peripheral role (able to create new connections with
* an existing connection in slave role).
@@ -1301,7 +1301,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
case CHIP_ID_8851B:
case CHIP_ID_8922A:
case CHIP_ID_8852BT:
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
/* RTL8852C needs to transmit mSBC data continuously without
* the zero length of USB packets for the ALT 6 supported chips
@@ -1312,7 +1312,8 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
if (btrtl_dev->project_id == CHIP_ID_8852A ||
btrtl_dev->project_id == CHIP_ID_8852B ||
btrtl_dev->project_id == CHIP_ID_8852C)
- set_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks);
+ hci_set_quirk(hdev,
+ HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER);
hci_set_aosp_capable(hdev);
break;
@@ -1331,8 +1332,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
* but it doesn't support any features from page 2 -
* it either responds with garbage or with error status
*/
- set_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2,
- &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2);
break;
default:
break;
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index a69feb08486a..8325655ce6aa 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -327,7 +327,7 @@ static int btsdio_probe(struct sdio_func *func,
hdev->send = btsdio_send_frame;
if (func->vendor == 0x0104 && func->device == 0x00c5)
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
err = hci_register_dev(hdev);
if (err < 0) {
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 9ab661d2d1e6..f9eeec0aed57 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2472,18 +2472,18 @@ static int btusb_setup_csr(struct hci_dev *hdev)
* Probably will need to be expanded in the future;
* without these the controller will lock up.
*/
- set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks);
- set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL);
+ hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_VOICE_SETTING);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE);
/* Clear the reset quirk since this is not an actual
* early Bluetooth 1.1 device from CSR.
*/
- clear_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
- clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_clear_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
+ hci_clear_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
/*
* Special workaround for these BT 4.0 chip clones, and potentially more:
@@ -3192,6 +3192,32 @@ static const struct qca_device_info qca_devices_table[] = {
{ 0x00190200, 40, 4, 16 }, /* WCN785x 2.0 */
};
+static u16 qca_extract_board_id(const struct qca_version *ver)
+{
+ u16 flag = le16_to_cpu(ver->flag);
+ u16 board_id = 0;
+
+ if (((flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) {
+ /* The board_id should be split into two bytes
+ * The 1st byte is chip ID, and the 2nd byte is platform ID
+ * For example, board ID 0x010A, 0x01 is platform ID. 0x0A is chip ID
+ * we have several platforms, and platform IDs are continuously added
+ * Platform ID:
+ * 0x00 is for Mobile
+ * 0x01 is for X86
+ * 0x02 is for Automotive
+ * 0x03 is for Consumer electronic
+ */
+ board_id = (ver->chip_id << 8) + ver->platform_id;
+ }
+
+ /* Take 0xffff as invalid board ID */
+ if (board_id == 0xffff)
+ board_id = 0;
+
+ return board_id;
+}
+
static int btusb_qca_send_vendor_req(struct usb_device *udev, u8 request,
void *data, u16 size)
{
@@ -3348,44 +3374,28 @@ static void btusb_generate_qca_nvm_name(char *fwname, size_t max_size,
const struct qca_version *ver)
{
u32 rom_version = le32_to_cpu(ver->rom_version);
- u16 flag = le16_to_cpu(ver->flag);
+ const char *variant;
+ int len;
+ u16 board_id;
- if (((flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) {
- /* The board_id should be split into two bytes
- * The 1st byte is chip ID, and the 2nd byte is platform ID
- * For example, board ID 0x010A, 0x01 is platform ID. 0x0A is chip ID
- * we have several platforms, and platform IDs are continuously added
- * Platform ID:
- * 0x00 is for Mobile
- * 0x01 is for X86
- * 0x02 is for Automotive
- * 0x03 is for Consumer electronic
- */
- u16 board_id = (ver->chip_id << 8) + ver->platform_id;
- const char *variant;
+ board_id = qca_extract_board_id(ver);
- switch (le32_to_cpu(ver->ram_version)) {
- case WCN6855_2_0_RAM_VERSION_GF:
- case WCN6855_2_1_RAM_VERSION_GF:
- variant = "_gf";
- break;
- default:
- variant = "";
- break;
- }
-
- if (board_id == 0) {
- snprintf(fwname, max_size, "qca/nvm_usb_%08x%s.bin",
- rom_version, variant);
- } else {
- snprintf(fwname, max_size, "qca/nvm_usb_%08x%s_%04x.bin",
- rom_version, variant, board_id);
- }
- } else {
- snprintf(fwname, max_size, "qca/nvm_usb_%08x.bin",
- rom_version);
+ switch (le32_to_cpu(ver->ram_version)) {
+ case WCN6855_2_0_RAM_VERSION_GF:
+ case WCN6855_2_1_RAM_VERSION_GF:
+ variant = "_gf";
+ break;
+ default:
+ variant = NULL;
+ break;
}
+ len = snprintf(fwname, max_size, "qca/nvm_usb_%08x", rom_version);
+ if (variant)
+ len += snprintf(fwname + len, max_size - len, "%s", variant);
+ if (board_id)
+ len += snprintf(fwname + len, max_size - len, "_%04x", board_id);
+ len += snprintf(fwname + len, max_size - len, ".bin");
}
static int btusb_setup_qca_load_nvm(struct hci_dev *hdev,
@@ -3494,7 +3504,7 @@ static int btusb_setup_qca(struct hci_dev *hdev)
/* Mark HCI_OP_ENHANCED_SETUP_SYNC_CONN as broken as it doesn't seem to
* work with the likes of HSP/HFP mSBC.
*/
- set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN);
return 0;
}
@@ -4008,10 +4018,10 @@ static int btusb_probe(struct usb_interface *intf,
}
#endif
if (id->driver_info & BTUSB_CW6622)
- set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY);
if (id->driver_info & BTUSB_BCM2045)
- set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY);
if (id->driver_info & BTUSB_BCM92035)
hdev->setup = btusb_setup_bcm92035;
@@ -4068,8 +4078,8 @@ static int btusb_probe(struct usb_interface *intf,
hdev->reset = btmtk_reset_sync;
hdev->set_bdaddr = btmtk_set_bdaddr;
hdev->send = btusb_send_frame_mtk;
- set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks);
- set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP);
data->recv_acl = btmtk_usb_recv_acl;
data->suspend = btmtk_usb_suspend;
data->resume = btmtk_usb_resume;
@@ -4077,20 +4087,20 @@ static int btusb_probe(struct usb_interface *intf,
}
if (id->driver_info & BTUSB_SWAVE) {
- set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS);
}
if (id->driver_info & BTUSB_INTEL_BOOT) {
hdev->manufacturer = 2;
- set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE);
}
if (id->driver_info & BTUSB_ATH3012) {
data->setup_on_usb = btusb_setup_qca;
hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
}
if (id->driver_info & BTUSB_QCA_ROME) {
@@ -4098,7 +4108,7 @@ static int btusb_probe(struct usb_interface *intf,
hdev->shutdown = btusb_shutdown_qca;
hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
hdev->reset = btusb_qca_reset;
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
btusb_check_needs_reset_resume(intf);
}
@@ -4112,7 +4122,7 @@ static int btusb_probe(struct usb_interface *intf,
hdev->shutdown = btusb_shutdown_qca;
hdev->set_bdaddr = btusb_set_bdaddr_wcn6855;
hdev->reset = btusb_qca_reset;
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
hci_set_msft_opcode(hdev, 0xFD70);
}
@@ -4140,35 +4150,35 @@ static int btusb_probe(struct usb_interface *intf,
if (id->driver_info & BTUSB_ACTIONS_SEMI) {
/* Support is advertised, but not implemented */
- set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &hdev->quirks);
- set_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_CREATE_CONN);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT);
}
if (!reset)
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) {
if (!disable_scofix)
- set_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE);
}
if (id->driver_info & BTUSB_BROKEN_ISOC)
data->isoc = NULL;
if (id->driver_info & BTUSB_WIDEBAND_SPEECH)
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
if (id->driver_info & BTUSB_INVALID_LE_STATES)
- set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES);
if (id->driver_info & BTUSB_DIGIANSWER) {
data->cmdreq_type = USB_TYPE_VENDOR;
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
}
if (id->driver_info & BTUSB_CSR) {
@@ -4177,10 +4187,10 @@ static int btusb_probe(struct usb_interface *intf,
/* Old firmware would otherwise execute USB reset */
if (bcdDevice < 0x117)
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
/* This must be set first in case we disable it for fakes */
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
/* Fake CSR devices with broken commands */
if (le16_to_cpu(udev->descriptor.idVendor) == 0x0a12 &&
@@ -4193,7 +4203,7 @@ static int btusb_probe(struct usb_interface *intf,
/* New sniffer firmware has crippled HCI interface */
if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997)
- set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE);
}
if (id->driver_info & BTUSB_INTEL_BOOT) {
diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c
index 1394c575aa6d..707e90f80130 100644
--- a/drivers/bluetooth/hci_aml.c
+++ b/drivers/bluetooth/hci_aml.c
@@ -424,7 +424,7 @@ static int aml_check_bdaddr(struct hci_dev *hdev)
if (!bacmp(&paddr->bdaddr, AML_BDADDR_DEFAULT)) {
bt_dev_info(hdev, "amlbt using default bdaddr (%pM)", &paddr->bdaddr);
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
exit:
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 9684eb16059b..f96617b85d87 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -643,8 +643,8 @@ static int bcm_setup(struct hci_uart *hu)
* Allow the bootloader to set a valid address through the
* device tree.
*/
- if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks))
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks);
+ if (hci_test_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR))
+ hci_set_quirk(hu->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
if (!bcm_request_irq(bcm))
err = bcm_setup_sleep(hu);
diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c
index 9bce53e49cfa..8a9aa33776b0 100644
--- a/drivers/bluetooth/hci_bcm4377.c
+++ b/drivers/bluetooth/hci_bcm4377.c
@@ -1435,7 +1435,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377)
bda = (struct hci_rp_read_bd_addr *)skb->data;
if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr))
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks);
+ hci_set_quirk(bcm4377->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
kfree_skb(skb);
return 0;
@@ -2389,13 +2389,13 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hdev->setup = bcm4377_hci_setup;
if (bcm4377->hw->broken_mws_transport_config)
- set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG);
if (bcm4377->hw->broken_ext_scan)
- set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN);
if (bcm4377->hw->broken_le_coded)
- set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED);
if (bcm4377->hw->broken_le_ext_adv_report_phy)
- set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY);
pci_set_drvdata(pdev, bcm4377);
hci_set_drvdata(hdev, bcm4377);
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index 811f33701f84..d22fbb7f9fc5 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -660,7 +660,7 @@ static int intel_setup(struct hci_uart *hu)
*/
if (!bacmp(&params.otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR);
}
/* With this Intel bootloader only the hardware variant and device
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index acba83156de9..d0adae3267b4 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -667,13 +667,13 @@ static int hci_uart_register_dev(struct hci_uart *hu)
SET_HCIDEV_DEV(hdev, hu->tty->dev);
if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags))
- set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE);
if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags))
- set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG);
if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags))
- set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE);
/* Only call open() for the protocol after hdev is fully initialized as
* open() (or a timer/workqueue it starts) may attempt to reference it.
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index e19e9bd49555..7044c86325ce 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -649,11 +649,11 @@ static int ll_setup(struct hci_uart *hu)
/* This means that there was an error getting the BD address
* during probe, so mark the device as having a bad address.
*/
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks);
+ hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR);
} else if (bacmp(&lldev->bdaddr, BDADDR_ANY)) {
err = ll_set_bdaddr(hu->hdev, &lldev->bdaddr);
if (err)
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks);
+ hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR);
}
/* Operational speed if any */
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index 9fc10a16fd96..cd7575c20f65 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -439,7 +439,7 @@ static int nokia_setup(struct hci_uart *hu)
if (btdev->man_id == NOKIA_ID_BCM2048) {
hu->hdev->set_bdaddr = btbcm_set_bdaddr;
- set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks);
+ hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR);
dev_dbg(dev, "bcm2048 has invalid bluetooth address!");
}
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 5fe5879881f5..33c43503714b 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1892,7 +1892,7 @@ static int qca_setup(struct hci_uart *hu)
/* Enable controller to do both LE scan and BR/EDR inquiry
* simultaneously.
*/
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
switch (soc_type) {
case QCA_QCA2066:
@@ -1944,7 +1944,7 @@ retry:
case QCA_WCN7850:
qcadev = serdev_device_get_drvdata(hu->serdev);
if (qcadev->bdaddr_property_broken)
- set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN);
hci_set_aosp_capable(hdev);
@@ -2392,10 +2392,17 @@ static int qca_serdev_probe(struct serdev_device *serdev)
*/
qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev,
"bluetooth");
- if (IS_ERR(qcadev->bt_power->pwrseq))
- return PTR_ERR(qcadev->bt_power->pwrseq);
- break;
+ /*
+ * Some modules have BT_EN enabled via a hardware pull-up,
+ * meaning it is not defined in the DTS and is not controlled
+ * through the power sequence. In such cases, fall through
+ * to follow the legacy flow.
+ */
+ if (IS_ERR(qcadev->bt_power->pwrseq))
+ qcadev->bt_power->pwrseq = NULL;
+ else
+ break;
}
fallthrough;
case QCA_WCN3950:
@@ -2480,7 +2487,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
hdev = qcadev->serdev_hu.hdev;
if (power_ctrl_enabled) {
- set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP);
hdev->shutdown = qca_power_off;
}
@@ -2489,11 +2496,11 @@ static int qca_serdev_probe(struct serdev_device *serdev)
* be queried via hci. Same with the valid le states quirk.
*/
if (data->capabilities & QCA_CAP_WIDEBAND_SPEECH)
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- &hdev->quirks);
+ hci_set_quirk(hdev,
+ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
if (!(data->capabilities & QCA_CAP_VALID_LE_STATES))
- set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES);
}
return 0;
@@ -2543,7 +2550,7 @@ static void qca_serdev_shutdown(struct device *dev)
* invoked and the SOC is already in the initial state, so
* don't also need to send the VSC.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) ||
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP) ||
hci_dev_test_flag(hdev, HCI_SETUP))
return;
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 89a22e9b3253..593d9cefbbf9 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -152,7 +152,7 @@ static int hci_uart_close(struct hci_dev *hdev)
* BT SOC is completely powered OFF during BT OFF, holding port
* open may drain the battery.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP)) {
clear_bit(HCI_UART_PROTO_READY, &hu->flags);
serdev_device_close(hu->serdev);
}
@@ -358,13 +358,13 @@ int hci_uart_register_device_priv(struct hci_uart *hu,
SET_HCIDEV_DEV(hdev, &hu->serdev->dev);
if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags))
- set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER);
if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags))
- set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE);
if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags))
- set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG);
if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags))
return 0;
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 59f4d7bdffdc..f7d8c3c00655 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -415,16 +415,16 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
hdev->get_codec_config_data = vhci_get_codec_config_data;
hdev->wakeup = vhci_wakeup;
hdev->setup = vhci_setup;
- set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
- set_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP);
+ hci_set_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED);
/* bit 6 is for external configuration */
if (opcode & 0x40)
- set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG);
/* bit 7 is for raw device */
if (opcode & 0x80)
- set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE);
if (hci_register_dev(hdev) < 0) {
BT_ERR("Can't register HCI device");
diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
index 756f292df9e8..6f1a37e85c6a 100644
--- a/drivers/bluetooth/virtio_bt.c
+++ b/drivers/bluetooth/virtio_bt.c
@@ -327,17 +327,17 @@ static int virtbt_probe(struct virtio_device *vdev)
hdev->setup = virtbt_setup_intel;
hdev->shutdown = virtbt_shutdown_generic;
hdev->set_bdaddr = virtbt_set_bdaddr_intel;
- set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
break;
case VIRTIO_BT_CONFIG_VENDOR_REALTEK:
hdev->manufacturer = 93;
hdev->setup = virtbt_setup_realtek;
hdev->shutdown = virtbt_shutdown_generic;
- set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
- set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+ hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY);
+ hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED);
break;
}
}
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 7671bd158545..c1c0a4759c7e 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -943,6 +943,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
struct dprc_endpoint endpoint1 = {{ 0 }};
struct dprc_endpoint endpoint2 = {{ 0 }};
+ struct fsl_mc_bus *mc_bus;
int state, err;
mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
@@ -966,6 +967,8 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
strcpy(endpoint_desc.type, endpoint2.type);
endpoint_desc.id = endpoint2.id;
endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev);
+ if (endpoint)
+ return endpoint;
/*
* We know that the device has an endpoint because we verified by
@@ -973,17 +976,13 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
* yet discovered by the fsl-mc bus, thus the lookup returned NULL.
* Force a rescan of the devices in this container and retry the lookup.
*/
- if (!endpoint) {
- struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_bus_dev);
-
- if (mutex_trylock(&mc_bus->scan_mutex)) {
- err = dprc_scan_objects(mc_bus_dev, true);
- mutex_unlock(&mc_bus->scan_mutex);
- }
-
- if (err < 0)
- return ERR_PTR(err);
+ mc_bus = to_fsl_mc_bus(mc_bus_dev);
+ if (mutex_trylock(&mc_bus->scan_mutex)) {
+ err = dprc_scan_objects(mc_bus_dev, true);
+ mutex_unlock(&mc_bus->scan_mutex);
}
+ if (err < 0)
+ return ERR_PTR(err);
endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev);
/*
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index bf490967241a..2505df1f4e69 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -720,11 +720,6 @@ static const struct pci_device_id agp_amd64_pci_table[] = {
MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table);
-static const struct pci_device_id agp_amd64_pci_promisc_table[] = {
- { PCI_DEVICE_CLASS(0, 0) },
- { }
-};
-
static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume);
static struct pci_driver agp_amd64_pci_driver = {
@@ -739,6 +734,7 @@ static struct pci_driver agp_amd64_pci_driver = {
/* Not static due to IOMMU code calling it early. */
int __init agp_amd64_init(void)
{
+ struct pci_dev *pdev = NULL;
int err = 0;
if (agp_off)
@@ -767,9 +763,13 @@ int __init agp_amd64_init(void)
}
/* Look for any AGP bridge */
- agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table;
- err = driver_attach(&agp_amd64_pci_driver.driver);
- if (err == 0 && agp_bridges_found == 0) {
+ for_each_pci_dev(pdev)
+ if (pci_find_capability(pdev, PCI_CAP_ID_AGP))
+ pci_add_dynid(&agp_amd64_pci_driver,
+ pdev->vendor, pdev->device,
+ pdev->subsystem_vendor,
+ pdev->subsystem_device, 0, 0, 0);
+ if (agp_bridges_found == 0) {
pci_unregister_driver(&agp_amd64_pci_driver);
err = -ENODEV;
}
diff --git a/drivers/char/tpm/eventlog/common.c b/drivers/char/tpm/eventlog/common.c
index 4c0bbba64ee5..691813d2a5a2 100644
--- a/drivers/char/tpm/eventlog/common.c
+++ b/drivers/char/tpm/eventlog/common.c
@@ -32,7 +32,7 @@ static int tpm_bios_measurements_open(struct inode *inode,
struct tpm_chip *chip;
inode_lock(inode);
- if (!inode->i_private) {
+ if (!inode->i_nlink) {
inode_unlock(inode);
return -ENODEV;
}
@@ -105,7 +105,7 @@ static int tpm_read_log(struct tpm_chip *chip)
void tpm_bios_log_setup(struct tpm_chip *chip)
{
const char *name = dev_name(&chip->dev);
- unsigned int cnt;
+ struct dentry *dentry;
int log_version;
int rc = 0;
@@ -117,14 +117,12 @@ void tpm_bios_log_setup(struct tpm_chip *chip)
return;
log_version = rc;
- cnt = 0;
- chip->bios_dir[cnt] = securityfs_create_dir(name, NULL);
+ chip->bios_dir = securityfs_create_dir(name, NULL);
/* NOTE: securityfs_create_dir can return ENODEV if securityfs is
* compiled out. The caller should ignore the ENODEV return code.
*/
- if (IS_ERR(chip->bios_dir[cnt]))
- goto err;
- cnt++;
+ if (IS_ERR(chip->bios_dir))
+ return;
chip->bin_log_seqops.chip = chip;
if (log_version == EFI_TCG2_EVENT_LOG_FORMAT_TCG_2)
@@ -135,14 +133,13 @@ void tpm_bios_log_setup(struct tpm_chip *chip)
&tpm1_binary_b_measurements_seqops;
- chip->bios_dir[cnt] =
+ dentry =
securityfs_create_file("binary_bios_measurements",
- 0440, chip->bios_dir[0],
+ 0440, chip->bios_dir,
(void *)&chip->bin_log_seqops,
&tpm_bios_measurements_ops);
- if (IS_ERR(chip->bios_dir[cnt]))
+ if (IS_ERR(dentry))
goto err;
- cnt++;
if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
@@ -150,42 +147,23 @@ void tpm_bios_log_setup(struct tpm_chip *chip)
chip->ascii_log_seqops.seqops =
&tpm1_ascii_b_measurements_seqops;
- chip->bios_dir[cnt] =
+ dentry =
securityfs_create_file("ascii_bios_measurements",
- 0440, chip->bios_dir[0],
+ 0440, chip->bios_dir,
(void *)&chip->ascii_log_seqops,
&tpm_bios_measurements_ops);
- if (IS_ERR(chip->bios_dir[cnt]))
+ if (IS_ERR(dentry))
goto err;
- cnt++;
}
return;
err:
- chip->bios_dir[cnt] = NULL;
tpm_bios_log_teardown(chip);
return;
}
void tpm_bios_log_teardown(struct tpm_chip *chip)
{
- int i;
- struct inode *inode;
-
- /* securityfs_remove currently doesn't take care of handling sync
- * between removal and opening of pseudo files. To handle this, a
- * workaround is added by making i_private = NULL here during removal
- * and to check it during open(), both within inode_lock()/unlock().
- * This design ensures that open() either safely gets kref or fails.
- */
- for (i = (TPM_NUM_EVENT_LOG_FILES - 1); i >= 0; i--) {
- if (chip->bios_dir[i]) {
- inode = d_inode(chip->bios_dir[i]);
- inode_lock(inode);
- inode->i_private = NULL;
- inode_unlock(inode);
- securityfs_remove(chip->bios_dir[i]);
- }
- }
+ securityfs_remove(chip->bios_dir);
}
diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c
index 15510c2ff21c..1b1561c84127 100644
--- a/drivers/clk/clk-scmi.c
+++ b/drivers/clk/clk-scmi.c
@@ -404,6 +404,7 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
const struct scmi_handle *handle = sdev->handle;
struct scmi_protocol_handle *ph;
const struct clk_ops *scmi_clk_ops_db[SCMI_MAX_CLK_OPS] = {};
+ struct scmi_clk *sclks;
if (!handle)
return -ENODEV;
@@ -430,18 +431,21 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
transport_is_atomic = handle->is_transport_atomic(handle,
&atomic_threshold_us);
+ sclks = devm_kcalloc(dev, count, sizeof(*sclks), GFP_KERNEL);
+ if (!sclks)
+ return -ENOMEM;
+
+ for (idx = 0; idx < count; idx++)
+ hws[idx] = &sclks[idx].hw;
+
for (idx = 0; idx < count; idx++) {
- struct scmi_clk *sclk;
+ struct scmi_clk *sclk = &sclks[idx];
const struct clk_ops *scmi_ops;
- sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL);
- if (!sclk)
- return -ENOMEM;
-
sclk->info = scmi_proto_clk_ops->info_get(ph, idx);
if (!sclk->info) {
dev_dbg(dev, "invalid clock info for idx %d\n", idx);
- devm_kfree(dev, sclk);
+ hws[idx] = NULL;
continue;
}
@@ -479,13 +483,11 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
if (err) {
dev_err(dev, "failed to register clock %d\n", idx);
devm_kfree(dev, sclk->parent_data);
- devm_kfree(dev, sclk);
hws[idx] = NULL;
} else {
dev_dbg(dev, "Registered clock:%s%s\n",
sclk->info->name,
scmi_ops->enable ? " (atomic ops)" : "");
- hws[idx] = &sclk->hw;
}
}
diff --git a/drivers/clk/imx/clk-imx95-blk-ctl.c b/drivers/clk/imx/clk-imx95-blk-ctl.c
index 25974947ad0c..cc2ee2be1819 100644
--- a/drivers/clk/imx/clk-imx95-blk-ctl.c
+++ b/drivers/clk/imx/clk-imx95-blk-ctl.c
@@ -219,11 +219,15 @@ static const struct imx95_blk_ctl_dev_data lvds_csr_dev_data = {
.clk_reg_offset = 0,
};
+static const char * const disp_engine_parents[] = {
+ "videopll1", "dsi_pll", "ldb_pll_div7"
+};
+
static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = {
[IMX95_CLK_DISPMIX_ENG0_SEL] = {
.name = "disp_engine0_sel",
- .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", },
- .num_parents = 4,
+ .parent_names = disp_engine_parents,
+ .num_parents = ARRAY_SIZE(disp_engine_parents),
.reg = 0,
.bit_idx = 0,
.bit_width = 2,
@@ -232,8 +236,8 @@ static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = {
},
[IMX95_CLK_DISPMIX_ENG1_SEL] = {
.name = "disp_engine1_sel",
- .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", },
- .num_parents = 4,
+ .parent_names = disp_engine_parents,
+ .num_parents = ARRAY_SIZE(disp_engine_parents),
.reg = 0,
.bit_idx = 2,
.bit_width = 2,
diff --git a/drivers/clk/sunxi-ng/ccu-sun55i-a523.c b/drivers/clk/sunxi-ng/ccu-sun55i-a523.c
index 9efb9fd24b42..1a9a1cb869e2 100644
--- a/drivers/clk/sunxi-ng/ccu-sun55i-a523.c
+++ b/drivers/clk/sunxi-ng/ccu-sun55i-a523.c
@@ -385,7 +385,8 @@ static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_FEAT(mbus_clk, "mbus", mbus_parents,
0, 0, /* no P */
24, 3, /* mux */
BIT(31), /* gate */
- 0, CCU_FEATURE_UPDATE_BIT);
+ CLK_IS_CRITICAL,
+ CCU_FEATURE_UPDATE_BIT);
static const struct clk_hw *mbus_hws[] = { &mbus_clk.common.hw };
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index 579a81bb46df..52e4369664c5 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -350,7 +350,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(de_clk, "de", de_parents,
0x104, 0, 4, 24, 2, BIT(31),
CLK_SET_RATE_PARENT);
-static const char * const tcon_parents[] = { "pll-video" };
+static const char * const tcon_parents[] = { "pll-video", "pll-periph0" };
static SUNXI_CCU_M_WITH_MUX_GATE(tcon_clk, "tcon", tcon_parents,
0x118, 0, 4, 24, 3, BIT(31), 0);
@@ -362,11 +362,11 @@ static const char * const csi_mclk_parents[] = { "osc24M", "pll-video",
static SUNXI_CCU_M_WITH_MUX_GATE(csi0_mclk_clk, "csi0-mclk", csi_mclk_parents,
0x130, 0, 5, 8, 3, BIT(15), 0);
-static const char * const csi1_sclk_parents[] = { "pll-video", "pll-isp" };
-static SUNXI_CCU_M_WITH_MUX_GATE(csi1_sclk_clk, "csi-sclk", csi1_sclk_parents,
+static const char * const csi_sclk_parents[] = { "pll-video", "pll-isp" };
+static SUNXI_CCU_M_WITH_MUX_GATE(csi_sclk_clk, "csi-sclk", csi_sclk_parents,
0x134, 16, 4, 24, 3, BIT(31), 0);
-static SUNXI_CCU_M_WITH_MUX_GATE(csi1_mclk_clk, "csi-mclk", csi_mclk_parents,
+static SUNXI_CCU_M_WITH_MUX_GATE(csi1_mclk_clk, "csi1-mclk", csi_mclk_parents,
0x134, 0, 5, 8, 3, BIT(15), 0);
static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
@@ -452,7 +452,7 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = {
&tcon_clk.common,
&csi_misc_clk.common,
&csi0_mclk_clk.common,
- &csi1_sclk_clk.common,
+ &csi_sclk_clk.common,
&csi1_mclk_clk.common,
&ve_clk.common,
&ac_dig_clk.common,
@@ -551,7 +551,7 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
[CLK_TCON0] = &tcon_clk.common.hw,
[CLK_CSI_MISC] = &csi_misc_clk.common.hw,
[CLK_CSI0_MCLK] = &csi0_mclk_clk.common.hw,
- [CLK_CSI1_SCLK] = &csi1_sclk_clk.common.hw,
+ [CLK_CSI_SCLK] = &csi_sclk_clk.common.hw,
[CLK_CSI1_MCLK] = &csi1_mclk_clk.common.hw,
[CLK_VE] = &ve_clk.common.hw,
[CLK_AC_DIG] = &ac_dig_clk.common.hw,
@@ -633,7 +633,7 @@ static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
[CLK_TCON0] = &tcon_clk.common.hw,
[CLK_CSI_MISC] = &csi_misc_clk.common.hw,
[CLK_CSI0_MCLK] = &csi0_mclk_clk.common.hw,
- [CLK_CSI1_SCLK] = &csi1_sclk_clk.common.hw,
+ [CLK_CSI_SCLK] = &csi_sclk_clk.common.hw,
[CLK_CSI1_MCLK] = &csi1_mclk_clk.common.hw,
[CLK_VE] = &ve_clk.common.hw,
[CLK_AC_DIG] = &ac_dig_clk.common.hw,
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 09549451dd51..2edc13ca184e 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -22,6 +22,7 @@
#include <linux/irq.h>
#include <linux/acpi.h>
#include <linux/hyperv.h>
+#include <linux/export.h>
#include <clocksource/hyperv_timer.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index 3383a7ce27ff..c83fd14dd7ad 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -1556,21 +1556,27 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
}
for (i = 0; i < n_insns; ++i) {
+ unsigned int n = insns[i].n;
+
if (insns[i].insn & INSN_MASK_WRITE) {
if (copy_from_user(data, insns[i].data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_from_user failed\n");
ret = -EFAULT;
goto error;
}
+ if (n < MIN_SAMPLES) {
+ memset(&data[n], 0, (MIN_SAMPLES - n) *
+ sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, insns + i, data, file);
if (ret < 0)
goto error;
if (insns[i].insn & INSN_MASK_READ) {
if (copy_to_user(insns[i].data, data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_to_user failed\n");
ret = -EFAULT;
@@ -1589,6 +1595,16 @@ error:
return i;
}
+#define MAX_INSNS MAX_SAMPLES
+static int check_insnlist_len(struct comedi_device *dev, unsigned int n_insns)
+{
+ if (n_insns > MAX_INSNS) {
+ dev_dbg(dev->class_dev, "insnlist length too large\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/*
* COMEDI_INSN ioctl
* synchronous instruction
@@ -1633,6 +1649,10 @@ static int do_insn_ioctl(struct comedi_device *dev,
ret = -EFAULT;
goto error;
}
+ if (insn->n < MIN_SAMPLES) {
+ memset(&data[insn->n], 0,
+ (MIN_SAMPLES - insn->n) * sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, insn, data, file);
if (ret < 0)
@@ -2239,6 +2259,9 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd,
rc = -EFAULT;
break;
}
+ rc = check_insnlist_len(dev, insnlist.n_insns);
+ if (rc)
+ break;
insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL);
if (!insns) {
rc = -ENOMEM;
@@ -3142,6 +3165,9 @@ static int compat_insnlist(struct file *file, unsigned long arg)
if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32)))
return -EFAULT;
+ rc = check_insnlist_len(dev, insnlist32.n_insns);
+ if (rc)
+ return rc;
insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL);
if (!insns)
return -ENOMEM;
diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c
index 376130bfba8a..9e4b7c840a8f 100644
--- a/drivers/comedi/drivers.c
+++ b/drivers/comedi/drivers.c
@@ -339,10 +339,10 @@ int comedi_dio_insn_config(struct comedi_device *dev,
unsigned int *data,
unsigned int mask)
{
- unsigned int chan_mask = 1 << CR_CHAN(insn->chanspec);
+ unsigned int chan = CR_CHAN(insn->chanspec);
- if (!mask)
- mask = chan_mask;
+ if (!mask && chan < 32)
+ mask = 1U << chan;
switch (data[0]) {
case INSN_CONFIG_DIO_INPUT:
@@ -382,7 +382,7 @@ EXPORT_SYMBOL_GPL(comedi_dio_insn_config);
unsigned int comedi_dio_update_state(struct comedi_subdevice *s,
unsigned int *data)
{
- unsigned int chanmask = (s->n_chan < 32) ? ((1 << s->n_chan) - 1)
+ unsigned int chanmask = (s->n_chan < 32) ? ((1U << s->n_chan) - 1)
: 0xffffffff;
unsigned int mask = data[0] & chanmask;
unsigned int bits = data[1];
@@ -615,6 +615,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev,
unsigned int _data[2];
int ret;
+ if (insn->n == 0)
+ return 0;
+
memset(_data, 0, sizeof(_data));
memset(&_insn, 0, sizeof(_insn));
_insn.insn = INSN_BITS;
@@ -625,8 +628,8 @@ static int insn_rw_emulate_bits(struct comedi_device *dev,
if (insn->insn == INSN_WRITE) {
if (!(s->subdev_flags & SDF_WRITABLE))
return -EINVAL;
- _data[0] = 1 << (chan - base_chan); /* mask */
- _data[1] = data[0] ? (1 << (chan - base_chan)) : 0; /* bits */
+ _data[0] = 1U << (chan - base_chan); /* mask */
+ _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */
}
ret = s->insn_bits(dev, s, &_insn, _data);
@@ -709,7 +712,7 @@ static int __comedi_device_postconfig(struct comedi_device *dev)
if (s->type == COMEDI_SUBD_DO) {
if (s->n_chan < 32)
- s->io_bits = (1 << s->n_chan) - 1;
+ s->io_bits = (1U << s->n_chan) - 1;
else
s->io_bits = 0xffffffff;
}
diff --git a/drivers/comedi/drivers/aio_iiro_16.c b/drivers/comedi/drivers/aio_iiro_16.c
index b00fab0b89d4..739cc4db52ac 100644
--- a/drivers/comedi/drivers/aio_iiro_16.c
+++ b/drivers/comedi/drivers/aio_iiro_16.c
@@ -177,7 +177,8 @@ static int aio_iiro_16_attach(struct comedi_device *dev,
* Digital input change of state interrupts are optionally supported
* using IRQ 2-7, 10-12, 14, or 15.
*/
- if ((1 << it->options[1]) & 0xdcfc) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (1 << it->options[1]) & 0xdcfc) {
ret = request_irq(it->options[1], aio_iiro_16_cos, 0,
dev->board_name, dev);
if (ret == 0)
diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c
index 9747e6d1f6eb..7984950f0f99 100644
--- a/drivers/comedi/drivers/comedi_test.c
+++ b/drivers/comedi/drivers/comedi_test.c
@@ -792,7 +792,7 @@ static void waveform_detach(struct comedi_device *dev)
{
struct waveform_private *devpriv = dev->private;
- if (devpriv) {
+ if (devpriv && dev->n_subdevices) {
timer_delete_sync(&devpriv->ai_timer);
timer_delete_sync(&devpriv->ao_timer);
}
diff --git a/drivers/comedi/drivers/das16m1.c b/drivers/comedi/drivers/das16m1.c
index b8ea737ad3d1..1b638f5b5a4f 100644
--- a/drivers/comedi/drivers/das16m1.c
+++ b/drivers/comedi/drivers/das16m1.c
@@ -522,7 +522,8 @@ static int das16m1_attach(struct comedi_device *dev,
devpriv->extra_iobase = dev->iobase + DAS16M1_8255_IOBASE;
/* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */
- if ((1 << it->options[1]) & 0xdcfc) {
+ if (it->options[1] >= 2 && it->options[1] <= 15 &&
+ (1 << it->options[1]) & 0xdcfc) {
ret = request_irq(it->options[1], das16m1_interrupt, 0,
dev->board_name, dev);
if (ret == 0)
diff --git a/drivers/comedi/drivers/das6402.c b/drivers/comedi/drivers/das6402.c
index 68f95330de45..7660487e563c 100644
--- a/drivers/comedi/drivers/das6402.c
+++ b/drivers/comedi/drivers/das6402.c
@@ -567,7 +567,8 @@ static int das6402_attach(struct comedi_device *dev,
das6402_reset(dev);
/* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */
- if ((1 << it->options[1]) & 0x8cec) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (1 << it->options[1]) & 0x8cec) {
ret = request_irq(it->options[1], das6402_interrupt, 0,
dev->board_name, dev);
if (ret == 0) {
diff --git a/drivers/comedi/drivers/pcl812.c b/drivers/comedi/drivers/pcl812.c
index 0df639c6a595..abca61a72cf7 100644
--- a/drivers/comedi/drivers/pcl812.c
+++ b/drivers/comedi/drivers/pcl812.c
@@ -1149,7 +1149,8 @@ static int pcl812_attach(struct comedi_device *dev, struct comedi_devconfig *it)
if (IS_ERR(dev->pacer))
return PTR_ERR(dev->pacer);
- if ((1 << it->options[1]) & board->irq_bits) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (1 << it->options[1]) & board->irq_bits) {
ret = request_irq(it->options[1], pcl812_interrupt, 0,
dev->board_name, dev);
if (ret == 0)
diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs
index 94ed81644fe1..43c87d0259b6 100644
--- a/drivers/cpufreq/rcpufreq_dt.rs
+++ b/drivers/cpufreq/rcpufreq_dt.rs
@@ -26,9 +26,9 @@ fn find_supply_name_exact(dev: &Device, name: &str) -> Option<CString> {
}
/// Finds supply name for the CPU from DT.
-fn find_supply_names(dev: &Device, cpu: u32) -> Option<KVec<CString>> {
+fn find_supply_names(dev: &Device, cpu: cpu::CpuId) -> Option<KVec<CString>> {
// Try "cpu0" for older DTs, fallback to "cpu".
- let name = (cpu == 0)
+ let name = (cpu.as_u32() == 0)
.then(|| find_supply_name_exact(dev, "cpu0"))
.flatten()
.or_else(|| find_supply_name_exact(dev, "cpu"))?;
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 4e1ba35deda9..b19bc60cc627 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -45,7 +45,6 @@ struct psci_cpuidle_domain_state {
static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data);
static DEFINE_PER_CPU(struct psci_cpuidle_domain_state, psci_domain_state);
static bool psci_cpuidle_use_syscore;
-static bool psci_cpuidle_use_cpuhp;
void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx,
u32 state)
@@ -124,8 +123,12 @@ static int psci_idle_cpuhp_up(unsigned int cpu)
{
struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev);
- if (pd_dev)
- pm_runtime_get_sync(pd_dev);
+ if (pd_dev) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ pm_runtime_get_sync(pd_dev);
+ else
+ dev_pm_genpd_resume(pd_dev);
+ }
return 0;
}
@@ -135,7 +138,11 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev);
if (pd_dev) {
- pm_runtime_put_sync(pd_dev);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ pm_runtime_put_sync(pd_dev);
+ else
+ dev_pm_genpd_suspend(pd_dev);
+
/* Clear domain state to start fresh at next online. */
psci_clear_domain_state();
}
@@ -196,9 +203,6 @@ static void psci_idle_init_cpuhp(void)
{
int err;
- if (!psci_cpuidle_use_cpuhp)
- return;
-
err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
"cpuidle/psci:online",
psci_idle_cpuhp_up,
@@ -259,10 +263,8 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
* s2ram and s2idle.
*/
drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state;
- if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
drv->states[state_count - 1].enter = psci_enter_domain_idle_state;
- psci_cpuidle_use_cpuhp = true;
- }
return 0;
}
@@ -339,7 +341,6 @@ static void psci_cpu_deinit_idle(int cpu)
dt_idle_detach_cpu(data->dev);
psci_cpuidle_use_syscore = false;
- psci_cpuidle_use_cpuhp = false;
}
static int psci_idle_init_cpu(struct device *dev, int cpu)
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index af37477ffd8d..be21e4e2016c 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -314,30 +314,30 @@ static int chcr_compute_partial_hash(struct shash_desc *desc,
if (digest_size == SHA1_DIGEST_SIZE) {
error = crypto_shash_init(desc) ?:
crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?:
- crypto_shash_export(desc, (void *)&sha1_st);
+ crypto_shash_export_core(desc, &sha1_st);
memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE);
} else if (digest_size == SHA224_DIGEST_SIZE) {
error = crypto_shash_init(desc) ?:
crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
- crypto_shash_export(desc, (void *)&sha256_st);
+ crypto_shash_export_core(desc, &sha256_st);
memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
} else if (digest_size == SHA256_DIGEST_SIZE) {
error = crypto_shash_init(desc) ?:
crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
- crypto_shash_export(desc, (void *)&sha256_st);
+ crypto_shash_export_core(desc, &sha256_st);
memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
} else if (digest_size == SHA384_DIGEST_SIZE) {
error = crypto_shash_init(desc) ?:
crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
- crypto_shash_export(desc, (void *)&sha512_st);
+ crypto_shash_export_core(desc, &sha512_st);
memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
} else if (digest_size == SHA512_DIGEST_SIZE) {
error = crypto_shash_init(desc) ?:
crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
- crypto_shash_export(desc, (void *)&sha512_st);
+ crypto_shash_export_core(desc, &sha512_st);
memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
} else {
error = -EINVAL;
diff --git a/drivers/crypto/intel/qat/qat_common/qat_algs.c b/drivers/crypto/intel/qat/qat_common/qat_algs.c
index 3c4bba4a8779..c03a69851114 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_algs.c
@@ -5,11 +5,11 @@
#include <linux/crypto.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/cipher.h>
+#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/aes.h>
#include <crypto/sha1.h>
#include <crypto/sha2.h>
-#include <crypto/hash.h>
#include <crypto/hmac.h>
#include <crypto/algapi.h>
#include <crypto/authenc.h>
@@ -154,19 +154,19 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
switch (ctx->qat_hash_alg) {
case ICP_QAT_HW_AUTH_ALGO_SHA1:
- if (crypto_shash_export(shash, &ctx->sha1))
+ if (crypto_shash_export_core(shash, &ctx->sha1))
return -EFAULT;
for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
*hash_state_out = cpu_to_be32(ctx->sha1.state[i]);
break;
case ICP_QAT_HW_AUTH_ALGO_SHA256:
- if (crypto_shash_export(shash, &ctx->sha256))
+ if (crypto_shash_export_core(shash, &ctx->sha256))
return -EFAULT;
for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
*hash_state_out = cpu_to_be32(ctx->sha256.state[i]);
break;
case ICP_QAT_HW_AUTH_ALGO_SHA512:
- if (crypto_shash_export(shash, &ctx->sha512))
+ if (crypto_shash_export_core(shash, &ctx->sha512))
return -EFAULT;
for (i = 0; i < digest_size >> 3; i++, hash512_state_out++)
*hash512_state_out = cpu_to_be64(ctx->sha512.state[i]);
@@ -190,19 +190,19 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
switch (ctx->qat_hash_alg) {
case ICP_QAT_HW_AUTH_ALGO_SHA1:
- if (crypto_shash_export(shash, &ctx->sha1))
+ if (crypto_shash_export_core(shash, &ctx->sha1))
return -EFAULT;
for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
*hash_state_out = cpu_to_be32(ctx->sha1.state[i]);
break;
case ICP_QAT_HW_AUTH_ALGO_SHA256:
- if (crypto_shash_export(shash, &ctx->sha256))
+ if (crypto_shash_export_core(shash, &ctx->sha256))
return -EFAULT;
for (i = 0; i < digest_size >> 2; i++, hash_state_out++)
*hash_state_out = cpu_to_be32(ctx->sha256.state[i]);
break;
case ICP_QAT_HW_AUTH_ALGO_SHA512:
- if (crypto_shash_export(shash, &ctx->sha512))
+ if (crypto_shash_export_core(shash, &ctx->sha512))
return -EFAULT;
for (i = 0; i < digest_size >> 3; i++, hash512_state_out++)
*hash512_state_out = cpu_to_be64(ctx->sha512.state[i]);
diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c
index 2cbc664e5d62..623aaa4439c4 100644
--- a/drivers/cxl/core/edac.c
+++ b/drivers/cxl/core/edac.c
@@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
{
struct cxl_mailbox *cxl_mbox;
- u8 min_scrub_cycle = U8_MAX;
struct cxl_region_params *p;
struct cxl_memdev *cxlmd;
struct cxl_region *cxlr;
+ u8 min_scrub_cycle = 0;
int i, ret;
if (!cxl_ps_ctx->cxlr) {
@@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
if (ret)
return ret;
+ /*
+ * The min_scrub_cycle of a region is the max of minimum scrub
+ * cycles supported by memdevs that back the region.
+ */
if (min_cycle)
- min_scrub_cycle = min(*min_cycle, min_scrub_cycle);
+ min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
}
if (min_cycle)
@@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
old_rec = xa_store(&array_rec->rec_gen_media,
le64_to_cpu(rec->media_hdr.phys_addr), rec,
GFP_KERNEL);
- if (xa_is_err(old_rec))
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
return xa_err(old_rec);
+ }
kfree(old_rec);
@@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
old_rec = xa_store(&array_rec->rec_dram,
le64_to_cpu(rec->media_hdr.phys_addr), rec,
GFP_KERNEL);
- if (xa_is_err(old_rec))
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
return xa_err(old_rec);
+ }
kfree(old_rec);
@@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
attrbs.bank = ctx->bank;
break;
case EDAC_REPAIR_RANK_SPARING:
- attrbs.repair_type = CXL_BANK_SPARING;
+ attrbs.repair_type = CXL_RANK_SPARING;
break;
default:
return NULL;
diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c
index 6f2eae1eb126..7c750599ea69 100644
--- a/drivers/cxl/core/features.c
+++ b/drivers/cxl/core/features.c
@@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
u32 flags;
if (rpc_in->op_size < sizeof(uuid_t))
- return ERR_PTR(-EINVAL);
+ return false;
feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid);
if (IS_ERR(feat))
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 485a831695c7..2731ba3a0799 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
ras_cap.header_log);
}
-static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
{
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
- struct cxl_dev_state *cxlds;
- cxlds = pci_get_drvdata(pdev);
- if (!cxlds)
- return;
-
- trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+ trace_cxl_aer_correctable_error(cxlmd, status);
}
-static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void
+cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
{
u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
- struct cxl_dev_state *cxlds;
u32 fe;
- cxlds = pci_get_drvdata(pdev);
- if (!cxlds)
- return;
-
if (hweight32(status) > 1)
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
ras_cap.cap_control));
else
fe = status;
- trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
+ trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
ras_cap.header_log);
}
+static int match_memdev_by_parent(struct device *dev, const void *uport)
+{
+ if (is_cxl_memdev(dev) && dev->parent == uport)
+ return 1;
+ return 0;
+}
+
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
{
unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
@@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
data->prot_err.agent_addr.bus,
devfn);
+ struct cxl_memdev *cxlmd;
int port_type;
if (!pdev)
return;
- guard(device)(&pdev->dev);
-
port_type = pci_pcie_type(pdev);
if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
port_type == PCI_EXP_TYPE_DOWNSTREAM ||
@@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
return;
}
+ guard(device)(&pdev->dev);
+ if (!pdev->dev.driver)
+ return;
+
+ struct device *mem_dev __free(put_device) = bus_find_device(
+ &cxl_bus_type, NULL, pdev, match_memdev_by_parent);
+ if (!mem_dev)
+ return;
+
+ cxlmd = to_cxl_memdev(mem_dev);
if (data->severity == AER_CORRECTABLE)
- cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
+ cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
else
- cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap);
+ cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
}
static void cxl_cper_prot_err_work_fn(struct work_struct *work)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 890ecac04dac..2bcf9ceca997 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1118,7 +1118,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
* Catch exporters making buffers inaccessible even when
* attachments preventing that exist.
*/
- WARN_ON_ONCE(ret == EBUSY);
+ WARN_ON_ONCE(ret == -EBUSY);
if (ret)
return ERR_PTR(ret);
}
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index b1ef4546346d..bea3e9858aca 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -685,11 +685,13 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
dma_resv_iter_begin(&cursor, obj, usage);
dma_resv_for_each_fence_unlocked(&cursor, fence) {
- ret = dma_fence_wait_timeout(fence, intr, ret);
- if (ret <= 0) {
- dma_resv_iter_end(&cursor);
- return ret;
- }
+ ret = dma_fence_wait_timeout(fence, intr, timeout);
+ if (ret <= 0)
+ break;
+
+ /* Even for zero timeout the return value is 1 */
+ if (timeout)
+ timeout = ret;
}
dma_resv_iter_end(&cursor);
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 7eee3eb47a8e..c9d0c68d2fcb 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -264,8 +264,7 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
ubuf->sg = NULL;
}
} else {
- dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
- direction);
+ dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction);
}
return ret;
@@ -280,7 +279,7 @@ static int end_cpu_udmabuf(struct dma_buf *buf,
if (!ubuf->sg)
return -EINVAL;
- dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
+ dma_sync_sgtable_for_device(dev, ubuf->sg, direction);
return 0;
}
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 49f09998e5c0..3371e0a76d3c 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -161,12 +161,16 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
const struct pci_device_id *pid)
{
struct dw_edma_pcie_data *pdata = (void *)pid->driver_data;
- struct dw_edma_pcie_data vsec_data;
+ struct dw_edma_pcie_data *vsec_data __free(kfree) = NULL;
struct device *dev = &pdev->dev;
struct dw_edma_chip *chip;
int err, nr_irqs;
int i, mask;
+ vsec_data = kmalloc(sizeof(*vsec_data), GFP_KERNEL);
+ if (!vsec_data)
+ return -ENOMEM;
+
/* Enable PCI device */
err = pcim_enable_device(pdev);
if (err) {
@@ -174,23 +178,23 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return err;
}
- memcpy(&vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
+ memcpy(vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
/*
* Tries to find if exists a PCIe Vendor-Specific Extended Capability
* for the DMA, if one exists, then reconfigures it.
*/
- dw_edma_pcie_get_vsec_dma_data(pdev, &vsec_data);
+ dw_edma_pcie_get_vsec_dma_data(pdev, vsec_data);
/* Mapping PCI BAR regions */
- mask = BIT(vsec_data.rg.bar);
- for (i = 0; i < vsec_data.wr_ch_cnt; i++) {
- mask |= BIT(vsec_data.ll_wr[i].bar);
- mask |= BIT(vsec_data.dt_wr[i].bar);
+ mask = BIT(vsec_data->rg.bar);
+ for (i = 0; i < vsec_data->wr_ch_cnt; i++) {
+ mask |= BIT(vsec_data->ll_wr[i].bar);
+ mask |= BIT(vsec_data->dt_wr[i].bar);
}
- for (i = 0; i < vsec_data.rd_ch_cnt; i++) {
- mask |= BIT(vsec_data.ll_rd[i].bar);
- mask |= BIT(vsec_data.dt_rd[i].bar);
+ for (i = 0; i < vsec_data->rd_ch_cnt; i++) {
+ mask |= BIT(vsec_data->ll_rd[i].bar);
+ mask |= BIT(vsec_data->dt_rd[i].bar);
}
err = pcim_iomap_regions(pdev, mask, pci_name(pdev));
if (err) {
@@ -213,7 +217,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
return -ENOMEM;
/* IRQs allocation */
- nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data.irqs,
+ nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data->irqs,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (nr_irqs < 1) {
pci_err(pdev, "fail to alloc IRQ vector (number of IRQs=%u)\n",
@@ -224,22 +228,22 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
/* Data structure initialization */
chip->dev = dev;
- chip->mf = vsec_data.mf;
+ chip->mf = vsec_data->mf;
chip->nr_irqs = nr_irqs;
chip->ops = &dw_edma_pcie_plat_ops;
- chip->ll_wr_cnt = vsec_data.wr_ch_cnt;
- chip->ll_rd_cnt = vsec_data.rd_ch_cnt;
+ chip->ll_wr_cnt = vsec_data->wr_ch_cnt;
+ chip->ll_rd_cnt = vsec_data->rd_ch_cnt;
- chip->reg_base = pcim_iomap_table(pdev)[vsec_data.rg.bar];
+ chip->reg_base = pcim_iomap_table(pdev)[vsec_data->rg.bar];
if (!chip->reg_base)
return -ENOMEM;
for (i = 0; i < chip->ll_wr_cnt; i++) {
struct dw_edma_region *ll_region = &chip->ll_region_wr[i];
struct dw_edma_region *dt_region = &chip->dt_region_wr[i];
- struct dw_edma_block *ll_block = &vsec_data.ll_wr[i];
- struct dw_edma_block *dt_block = &vsec_data.dt_wr[i];
+ struct dw_edma_block *ll_block = &vsec_data->ll_wr[i];
+ struct dw_edma_block *dt_block = &vsec_data->dt_wr[i];
ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar];
if (!ll_region->vaddr.io)
@@ -263,8 +267,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
for (i = 0; i < chip->ll_rd_cnt; i++) {
struct dw_edma_region *ll_region = &chip->ll_region_rd[i];
struct dw_edma_region *dt_region = &chip->dt_region_rd[i];
- struct dw_edma_block *ll_block = &vsec_data.ll_rd[i];
- struct dw_edma_block *dt_block = &vsec_data.dt_rd[i];
+ struct dw_edma_block *ll_block = &vsec_data->ll_rd[i];
+ struct dw_edma_block *dt_block = &vsec_data->dt_rd[i];
ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar];
if (!ll_region->vaddr.io)
@@ -298,31 +302,31 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", chip->mf);
pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p)\n",
- vsec_data.rg.bar, vsec_data.rg.off, vsec_data.rg.sz,
+ vsec_data->rg.bar, vsec_data->rg.off, vsec_data->rg.sz,
chip->reg_base);
for (i = 0; i < chip->ll_wr_cnt; i++) {
pci_dbg(pdev, "L. List:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- i, vsec_data.ll_wr[i].bar,
- vsec_data.ll_wr[i].off, chip->ll_region_wr[i].sz,
+ i, vsec_data->ll_wr[i].bar,
+ vsec_data->ll_wr[i].off, chip->ll_region_wr[i].sz,
chip->ll_region_wr[i].vaddr.io, &chip->ll_region_wr[i].paddr);
pci_dbg(pdev, "Data:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- i, vsec_data.dt_wr[i].bar,
- vsec_data.dt_wr[i].off, chip->dt_region_wr[i].sz,
+ i, vsec_data->dt_wr[i].bar,
+ vsec_data->dt_wr[i].off, chip->dt_region_wr[i].sz,
chip->dt_region_wr[i].vaddr.io, &chip->dt_region_wr[i].paddr);
}
for (i = 0; i < chip->ll_rd_cnt; i++) {
pci_dbg(pdev, "L. List:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- i, vsec_data.ll_rd[i].bar,
- vsec_data.ll_rd[i].off, chip->ll_region_rd[i].sz,
+ i, vsec_data->ll_rd[i].bar,
+ vsec_data->ll_rd[i].off, chip->ll_region_rd[i].sz,
chip->ll_region_rd[i].vaddr.io, &chip->ll_region_rd[i].paddr);
pci_dbg(pdev, "Data:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- i, vsec_data.dt_rd[i].bar,
- vsec_data.dt_rd[i].off, chip->dt_region_rd[i].sz,
+ i, vsec_data->dt_rd[i].bar,
+ vsec_data->dt_rd[i].off, chip->dt_region_rd[i].sz,
chip->dt_region_rd[i].vaddr.io, &chip->dt_region_rd[i].paddr);
}
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
index 47c8adfdc155..9f0c41ca7770 100644
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -449,9 +449,9 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c,
return ret;
spin_lock_irqsave(&cvc->pc->lock, flags);
- spin_lock_irqsave(&cvc->vc.lock, flags);
+ spin_lock(&cvc->vc.lock);
vd = mtk_cqdma_find_active_desc(c, cookie);
- spin_unlock_irqrestore(&cvc->vc.lock, flags);
+ spin_unlock(&cvc->vc.lock);
spin_unlock_irqrestore(&cvc->pc->lock, flags);
if (vd) {
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 0d6324c4e2be..7a2488a0d6a3 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1351,7 +1351,7 @@ static int nbpf_probe(struct platform_device *pdev)
if (irqs == 1) {
eirq = irqbuf[0];
- for (i = 0; i <= num_channels; i++)
+ for (i = 0; i < num_channels; i++)
nbpf->chan[i].irq = irqbuf[0];
} else {
eirq = platform_get_irq_byname(pdev, "error");
@@ -1361,16 +1361,15 @@ static int nbpf_probe(struct platform_device *pdev)
if (irqs == num_channels + 1) {
struct nbpf_channel *chan;
- for (i = 0, chan = nbpf->chan; i <= num_channels;
+ for (i = 0, chan = nbpf->chan; i < num_channels;
i++, chan++) {
/* Skip the error IRQ */
if (irqbuf[i] == eirq)
i++;
+ if (i >= ARRAY_SIZE(irqbuf))
+ return -EINVAL;
chan->irq = irqbuf[i];
}
-
- if (chan != nbpf->chan + num_channels)
- return -EINVAL;
} else {
/* 2 IRQs and more than one channel */
if (irqbuf[0] == eirq)
@@ -1378,7 +1377,7 @@ static int nbpf_probe(struct platform_device *pdev)
else
irq = irqbuf[0];
- for (i = 0; i <= num_channels; i++)
+ for (i = 0; i < num_channels; i++)
nbpf->chan[i].irq = irq;
}
}
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 58b1482a0fbb..07f1e9dc1ca7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
/* Return size in MBs. */
return size >> 10;
@@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
+ u32 addr_mask = 0, addr_mask_sec = 0;
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
+
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3879,6 +3894,7 @@ static int per_family_init(struct amd64_pvt *pvt)
break;
case 0x70 ... 0x7f:
pvt->ctl_name = "F19h_M70h";
+ pvt->max_mcs = 4;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x90 ... 0x9f:
diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c
index 1d51838a60c1..51c451c7f0f0 100755
--- a/drivers/edac/ecs.c
+++ b/drivers/edac/ecs.c
@@ -170,8 +170,10 @@ static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group
fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru);
fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru);
- for (i = 0; i < ECS_MAX_ATTRS; i++)
+ for (i = 0; i < ECS_MAX_ATTRS; i++) {
+ sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr);
fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr;
+ }
sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru);
group->name = fru_ctx->name;
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 1930dc00c791..1cb5c67e78ae 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -125,7 +125,7 @@
#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
-static const struct res_config {
+static struct res_config {
bool machine_check;
/* The number of present memory controllers. */
int num_imc;
@@ -479,7 +479,7 @@ static u64 rpl_p_err_addr(u64 ecclog)
return ECC_ERROR_LOG_ADDR45(ecclog);
}
-static const struct res_config ehl_cfg = {
+static struct res_config ehl_cfg = {
.num_imc = 1,
.imc_base = 0x5000,
.ibecc_base = 0xdc00,
@@ -489,7 +489,7 @@ static const struct res_config ehl_cfg = {
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
};
-static const struct res_config icl_cfg = {
+static struct res_config icl_cfg = {
.num_imc = 1,
.imc_base = 0x5000,
.ibecc_base = 0xd800,
@@ -499,7 +499,7 @@ static const struct res_config icl_cfg = {
.err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
};
-static const struct res_config tgl_cfg = {
+static struct res_config tgl_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0x5000,
@@ -513,7 +513,7 @@ static const struct res_config tgl_cfg = {
.err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
};
-static const struct res_config adl_cfg = {
+static struct res_config adl_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@@ -524,7 +524,7 @@ static const struct res_config adl_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct res_config adl_n_cfg = {
+static struct res_config adl_n_cfg = {
.machine_check = true,
.num_imc = 1,
.imc_base = 0xd800,
@@ -535,7 +535,7 @@ static const struct res_config adl_n_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct res_config rpl_p_cfg = {
+static struct res_config rpl_p_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@@ -547,7 +547,7 @@ static const struct res_config rpl_p_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct res_config mtl_ps_cfg = {
+static struct res_config mtl_ps_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@@ -558,7 +558,7 @@ static const struct res_config mtl_ps_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct res_config mtl_p_cfg = {
+static struct res_config mtl_p_cfg = {
.machine_check = true,
.num_imc = 2,
.imc_base = 0xd800,
@@ -569,7 +569,7 @@ static const struct res_config mtl_p_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct pci_device_id igen6_pci_tbl[] = {
+static struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
@@ -1350,9 +1350,11 @@ static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
return -ENODEV;
}
- if (lmc < res_cfg->num_imc)
+ if (lmc < res_cfg->num_imc) {
igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
res_cfg->num_imc, lmc);
+ res_cfg->num_imc = lmc;
+ }
return 0;
diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
index d1a8caa85369..70a033a76233 100755
--- a/drivers/edac/mem_repair.c
+++ b/drivers/edac/mem_repair.c
@@ -333,6 +333,7 @@ static int mem_repair_create_desc(struct device *dev,
for (i = 0; i < MR_MAX_ATTRS; i++) {
memcpy(&ctx->mem_repair_dev_attr[i],
&dev_attr[i], sizeof(dev_attr[i]));
+ sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr);
ctx->mem_repair_attrs[i] =
&ctx->mem_repair_dev_attr[i].dev_attr.attr;
}
diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c
index e421d3ebd959..f9d02af2fc3a 100755
--- a/drivers/edac/scrub.c
+++ b/drivers/edac/scrub.c
@@ -176,6 +176,7 @@ static int scrub_create_desc(struct device *scrub_dev,
group = &scrub_ctx->group;
for (i = 0; i < SCRUB_MAX_ATTRS; i++) {
memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i]));
+ sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr);
scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr;
}
sprintf(scrub_ctx->name, "%s%d", "scrub", instance);
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index fe55613a8ea9..37eb2e6c2f9f 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -110,7 +110,7 @@ struct ffa_drv_info {
struct work_struct sched_recv_irq_work;
struct xarray partition_info;
DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS));
- struct mutex notify_lock; /* lock to protect notifier hashtable */
+ rwlock_t notify_lock; /* lock to protect notifier hashtable */
};
static struct ffa_drv_info *drv_info;
@@ -1250,13 +1250,12 @@ notifier_hnode_get_by_type(u16 notify_id, enum notify_type type)
return NULL;
}
-static int
-update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb,
- void *cb_data, bool is_registration, bool is_framework)
+static int update_notifier_cb(struct ffa_device *dev, int notify_id,
+ struct notifier_cb_info *cb, bool is_framework)
{
struct notifier_cb_info *cb_info = NULL;
enum notify_type type = ffa_notify_type_get(dev->vm_id);
- bool cb_found;
+ bool cb_found, is_registration = !!cb;
if (is_framework)
cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, dev->vm_id,
@@ -1270,20 +1269,10 @@ update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb,
return -EINVAL;
if (is_registration) {
- cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL);
- if (!cb_info)
- return -ENOMEM;
-
- cb_info->dev = dev;
- cb_info->cb_data = cb_data;
- if (is_framework)
- cb_info->fwk_cb = cb;
- else
- cb_info->cb = cb;
-
- hash_add(drv_info->notifier_hash, &cb_info->hnode, notify_id);
+ hash_add(drv_info->notifier_hash, &cb->hnode, notify_id);
} else {
hash_del(&cb_info->hnode);
+ kfree(cb_info);
}
return 0;
@@ -1300,20 +1289,19 @@ static int __ffa_notify_relinquish(struct ffa_device *dev, int notify_id,
if (notify_id >= FFA_MAX_NOTIFICATIONS)
return -EINVAL;
- mutex_lock(&drv_info->notify_lock);
+ write_lock(&drv_info->notify_lock);
- rc = update_notifier_cb(dev, notify_id, NULL, NULL, false,
- is_framework);
+ rc = update_notifier_cb(dev, notify_id, NULL, is_framework);
if (rc) {
pr_err("Could not unregister notification callback\n");
- mutex_unlock(&drv_info->notify_lock);
+ write_unlock(&drv_info->notify_lock);
return rc;
}
if (!is_framework)
rc = ffa_notification_unbind(dev->vm_id, BIT(notify_id));
- mutex_unlock(&drv_info->notify_lock);
+ write_unlock(&drv_info->notify_lock);
return rc;
}
@@ -1334,6 +1322,7 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu,
{
int rc;
u32 flags = 0;
+ struct notifier_cb_info *cb_info = NULL;
if (ffa_notifications_disabled())
return -EOPNOTSUPP;
@@ -1341,28 +1330,40 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu,
if (notify_id >= FFA_MAX_NOTIFICATIONS)
return -EINVAL;
- mutex_lock(&drv_info->notify_lock);
+ cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL);
+ if (!cb_info)
+ return -ENOMEM;
+
+ cb_info->dev = dev;
+ cb_info->cb_data = cb_data;
+ if (is_framework)
+ cb_info->fwk_cb = cb;
+ else
+ cb_info->cb = cb;
+
+ write_lock(&drv_info->notify_lock);
if (!is_framework) {
if (is_per_vcpu)
flags = PER_VCPU_NOTIFICATION_FLAG;
rc = ffa_notification_bind(dev->vm_id, BIT(notify_id), flags);
- if (rc) {
- mutex_unlock(&drv_info->notify_lock);
- return rc;
- }
+ if (rc)
+ goto out_unlock_free;
}
- rc = update_notifier_cb(dev, notify_id, cb, cb_data, true,
- is_framework);
+ rc = update_notifier_cb(dev, notify_id, cb_info, is_framework);
if (rc) {
pr_err("Failed to register callback for %d - %d\n",
notify_id, rc);
if (!is_framework)
ffa_notification_unbind(dev->vm_id, BIT(notify_id));
}
- mutex_unlock(&drv_info->notify_lock);
+
+out_unlock_free:
+ write_unlock(&drv_info->notify_lock);
+ if (rc)
+ kfree(cb_info);
return rc;
}
@@ -1406,9 +1407,9 @@ static void handle_notif_callbacks(u64 bitmap, enum notify_type type)
if (!(bitmap & 1))
continue;
- mutex_lock(&drv_info->notify_lock);
+ read_lock(&drv_info->notify_lock);
cb_info = notifier_hnode_get_by_type(notify_id, type);
- mutex_unlock(&drv_info->notify_lock);
+ read_unlock(&drv_info->notify_lock);
if (cb_info && cb_info->cb)
cb_info->cb(notify_id, cb_info->cb_data);
@@ -1446,9 +1447,9 @@ static void handle_fwk_notif_callbacks(u32 bitmap)
ffa_rx_release();
- mutex_lock(&drv_info->notify_lock);
+ read_lock(&drv_info->notify_lock);
cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid);
- mutex_unlock(&drv_info->notify_lock);
+ read_unlock(&drv_info->notify_lock);
if (cb_info && cb_info->fwk_cb)
cb_info->fwk_cb(notify_id, cb_info->cb_data, buf);
@@ -1973,7 +1974,7 @@ static void ffa_notifications_setup(void)
goto cleanup;
hash_init(drv_info->notifier_hash);
- mutex_init(&drv_info->notify_lock);
+ rwlock_init(&drv_info->notify_lock);
drv_info->notif_enabled = true;
return;
diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds
index c3a166675450..367907eb7d86 100644
--- a/drivers/firmware/efi/libstub/zboot.lds
+++ b/drivers/firmware/efi/libstub/zboot.lds
@@ -29,14 +29,12 @@ SECTIONS
. = _etext;
}
-#ifdef CONFIG_EFI_SBAT
.sbat : ALIGN(4096) {
_sbat = .;
*(.sbat)
_esbat = ALIGN(4096);
. = _esbat;
}
-#endif
.data : ALIGN(4096) {
_data = .;
@@ -60,6 +58,6 @@ SECTIONS
PROVIDE(__efistub__gzdata_size =
ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start));
-PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext));
-PROVIDE(__data_size = ABSOLUTE(_end - _etext));
+PROVIDE(__data_rawsize = ABSOLUTE(_edata - _data));
+PROVIDE(__data_size = ABSOLUTE(_end - _data));
PROVIDE(__sbat_size = ABSOLUTE(_esbat - _sbat));
diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index e02f14f4bd7c..3a69fe3234c7 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -430,6 +430,9 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer)
return -EOPNOTSUPP;
}
+ msg.chan_id = xfer->acpm_chan_id;
+ msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL;
+
scoped_guard(mutex, &achan->tx_lock) {
tx_front = readl(achan->tx.front);
idx = (tx_front + 1) % achan->qlen;
@@ -446,25 +449,15 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer)
/* Advance TX front. */
writel(idx, achan->tx.front);
- }
- msg.chan_id = xfer->acpm_chan_id;
- msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL;
- ret = mbox_send_message(achan->chan, (void *)&msg);
- if (ret < 0)
- return ret;
-
- ret = acpm_wait_for_message_response(achan, xfer);
+ ret = mbox_send_message(achan->chan, (void *)&msg);
+ if (ret < 0)
+ return ret;
- /*
- * NOTE: we might prefer not to need the mailbox ticker to manage the
- * transfer queueing since the protocol layer queues things by itself.
- * Unfortunately, we have to kick the mailbox framework after we have
- * received our message.
- */
- mbox_client_txdone(achan->chan, ret);
+ mbox_client_txdone(achan->chan, 0);
+ }
- return ret;
+ return acpm_wait_for_message_response(achan, xfer);
}
/**
diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c
index 26227669f026..70a01c5b8ad1 100644
--- a/drivers/gpio/gpio-loongson-64bit.c
+++ b/drivers/gpio/gpio-loongson-64bit.c
@@ -268,7 +268,7 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data0 = {
/* LS7A2000 ACPI GPIO */
static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data1 = {
.label = "ls7a2000_gpio",
- .mode = BYTE_CTRL_MODE,
+ .mode = BIT_CTRL_MODE,
.conf_offset = 0x4,
.in_offset = 0x8,
.out_offset = 0x0,
diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c
index 10ea71273c89..9875e34bde72 100644
--- a/drivers/gpio/gpio-mlxbf3.c
+++ b/drivers/gpio/gpio-mlxbf3.c
@@ -190,7 +190,9 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev)
struct mlxbf3_gpio_context *gs;
struct gpio_irq_chip *girq;
struct gpio_chip *gc;
+ char *colon_ptr;
int ret, irq;
+ long num;
gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL);
if (!gs)
@@ -227,25 +229,39 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev)
gc->owner = THIS_MODULE;
gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges;
- irq = platform_get_irq(pdev, 0);
- if (irq >= 0) {
- girq = &gs->gc.irq;
- gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip);
- girq->default_type = IRQ_TYPE_NONE;
- /* This will let us handle the parent IRQ in the driver */
- girq->num_parents = 0;
- girq->parents = NULL;
- girq->parent_handler = NULL;
- girq->handler = handle_bad_irq;
-
- /*
- * Directly request the irq here instead of passing
- * a flow-handler because the irq is shared.
- */
- ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler,
- IRQF_SHARED, dev_name(dev), gs);
- if (ret)
- return dev_err_probe(dev, ret, "failed to request IRQ");
+ colon_ptr = strchr(dev_name(dev), ':');
+ if (!colon_ptr) {
+ dev_err(dev, "invalid device name format\n");
+ return -EINVAL;
+ }
+
+ ret = kstrtol(++colon_ptr, 16, &num);
+ if (ret) {
+ dev_err(dev, "invalid device instance\n");
+ return ret;
+ }
+
+ if (!num) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0) {
+ girq = &gs->gc.irq;
+ gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip);
+ girq->default_type = IRQ_TYPE_NONE;
+ /* This will let us handle the parent IRQ in the driver */
+ girq->num_parents = 0;
+ girq->parents = NULL;
+ girq->parent_handler = NULL;
+ girq->handler = handle_bad_irq;
+
+ /*
+ * Directly request the irq here instead of passing
+ * a flow-handler because the irq is shared.
+ */
+ ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler,
+ IRQF_SHARED, dev_name(dev), gs);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to request IRQ");
+ }
}
platform_set_drvdata(pdev, gs);
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index b852e4997629..e80a96f39788 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -974,7 +974,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base)
IRQF_ONESHOT | IRQF_SHARED, dev_name(dev),
chip);
if (ret)
- return dev_err_probe(dev, client->irq, "failed to request irq\n");
+ return dev_err_probe(dev, ret, "failed to request irq\n");
return 0;
}
diff --git a/drivers/gpio/gpio-spacemit-k1.c b/drivers/gpio/gpio-spacemit-k1.c
index f027066365ff..3cc75c701ec4 100644
--- a/drivers/gpio/gpio-spacemit-k1.c
+++ b/drivers/gpio/gpio-spacemit-k1.c
@@ -278,6 +278,7 @@ static const struct of_device_id spacemit_gpio_dt_ids[] = {
{ .compatible = "spacemit,k1-gpio" },
{ /* sentinel */ }
};
+MODULE_DEVICE_TABLE(of, spacemit_gpio_dt_ids);
static struct platform_driver spacemit_gpio_driver = {
.probe = spacemit_gpio_probe,
diff --git a/drivers/gpio/gpiolib-acpi-quirks.c b/drivers/gpio/gpiolib-acpi-quirks.c
index 219667315b2c..c13545dce349 100644
--- a/drivers/gpio/gpiolib-acpi-quirks.c
+++ b/drivers/gpio/gpiolib-acpi-quirks.c
@@ -331,6 +331,19 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
.ignore_interrupt = "AMDI0030:00@11",
},
},
+ {
+ /*
+ * Wakeup only works when keyboard backlight is turned off
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/4169
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 15"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_interrupt = "AMDI0030:00@8",
+ },
+ },
{} /* Terminating entry */
};
diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c
index 4d5f83b17624..72422c5db364 100644
--- a/drivers/gpio/gpiolib-devres.c
+++ b/drivers/gpio/gpiolib-devres.c
@@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(devm_gpiod_unhinge);
*/
void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs)
{
- devm_remove_action(dev, devm_gpiod_release_array, descs);
+ devm_release_action(dev, devm_gpiod_release_array, descs);
}
EXPORT_SYMBOL_GPL(devm_gpiod_put_array);
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 73ba73b31cb1..37ab78243fab 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -708,7 +708,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id,
unsigned int idx, unsigned long *flags)
{
char propname[32]; /* 32 is max size of property name */
- enum of_gpio_flags of_flags;
+ enum of_gpio_flags of_flags = 0;
const of_find_gpio_quirk *q;
struct gpio_desc *desc;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index fdafa0df1b43..3a3eca5b4c40 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3297,14 +3297,15 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc)
static int gpio_chip_get_multiple(struct gpio_chip *gc,
unsigned long *mask, unsigned long *bits)
{
- int ret;
-
lockdep_assert_held(&gc->gpiodev->srcu);
if (gc->get_multiple) {
+ int ret;
+
ret = gc->get_multiple(gc, mask, bits);
if (ret > 0)
return -EBADE;
+ return ret;
}
if (gc->get) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ca4a6b82817f..df77558e03ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 0f3e2944edd7..e68c0fa8d751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
lower_32_bits(page_table_base));
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 85567d0d9545..f5d5c45ddc0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
drm_sched_entity_fini(entity);
}
}
+ kref_put(&ctx->refcount, amdgpu_ctx_fini);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 8e626f50b362..f81608330a3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1902,7 +1902,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence) == fence)
+ if (preempted && (&job->hw_fence.base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e1bab6a96cb6..aa32df7e2fb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5193,6 +5193,8 @@ exit:
dev->dev->power.disable_depth--;
#endif
}
+
+ amdgpu_vram_mgr_clear_reset_blocks(adev);
adev->in_suspend = false;
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
@@ -6019,16 +6021,12 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle)
return ret;
}
-static int amdgpu_device_halt_activities(struct amdgpu_device *adev,
- struct amdgpu_job *job,
- struct amdgpu_reset_context *reset_context,
- struct list_head *device_list,
- struct amdgpu_hive_info *hive,
- bool need_emergency_restart)
+static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive)
{
- struct list_head *device_list_handle = NULL;
struct amdgpu_device *tmp_adev = NULL;
- int i, r = 0;
+ int r;
/*
* Build list of devices to reset.
@@ -6045,26 +6043,54 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev,
}
if (!list_is_first(&adev->reset_list, device_list))
list_rotate_to_front(&adev->reset_list, device_list);
- device_list_handle = device_list;
} else {
list_add_tail(&adev->reset_list, device_list);
- device_list_handle = device_list;
}
if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
- r = amdgpu_device_health_check(device_list_handle);
+ r = amdgpu_device_health_check(device_list);
if (r)
return r;
}
- /* We need to lock reset domain only once both for XGMI and single device */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
+ return 0;
+}
+
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
- /* block all schedulers and reset given job's ring */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static int amdgpu_device_halt_activities(
+ struct amdgpu_device *adev, struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context,
+ struct list_head *device_list, struct amdgpu_hive_info *hive,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
+
+ /* block all schedulers and reset given job's ring */
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
amdgpu_device_set_mp1_state(tmp_adev);
/*
@@ -6252,11 +6278,6 @@ static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
amdgpu_ras_set_error_query_ready(tmp_adev, true);
}
-
- tmp_adev = list_first_entry(device_list, struct amdgpu_device,
- reset_list);
- amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
-
}
@@ -6324,10 +6345,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
reset_context->hive = hive;
INIT_LIST_HEAD(&device_list);
+ if (amdgpu_device_recovery_prepare(adev, &device_list, hive))
+ goto end_reset;
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
hive, need_emergency_restart);
if (r)
- goto end_reset;
+ goto reset_unlock;
if (need_emergency_restart)
goto skip_sched_resume;
@@ -6337,7 +6364,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
- if (job && dma_fence_is_signaled(&job->hw_fence)) {
+ if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -6345,13 +6372,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
if (r)
- goto end_reset;
+ goto reset_unlock;
skip_hw_reset:
r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
if (r)
- goto end_reset;
+ goto reset_unlock;
skip_sched_resume:
amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
end_reset:
if (hive) {
mutex_unlock(&hive->hive_lock);
@@ -6763,6 +6792,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
memset(&reset_context, 0, sizeof(reset_context));
INIT_LIST_HEAD(&device_list);
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
hive, false);
if (hive) {
@@ -6880,8 +6911,8 @@ out:
if (hive) {
list_for_each_entry(tmp_adev, &device_list, reset_list)
amdgpu_device_unset_mp1_state(tmp_adev);
- amdgpu_device_unlock_reset_domain(adev->reset_domain);
}
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
}
if (hive) {
@@ -6927,6 +6958,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
amdgpu_device_sched_resume(&device_list, NULL, NULL);
amdgpu_device_gpu_resume(adev, &device_list, false);
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
adev->pcie_reset_ctx.occurs_dpc = false;
if (hive) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a0e9bf9b2710..81b3443c8d7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -321,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
const struct firmware *fw;
int r;
- r = request_firmware(&fw, fw_name, adev->dev);
+ r = firmware_request_nowarn(&fw, fw_name, adev->dev);
if (r) {
- dev_err(adev->dev, "can't load firmware \"%s\"\n",
- fw_name);
+ if (amdgpu_discovery == 2)
+ dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+ else
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
return r;
}
@@ -459,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
/* Read from file if it is the preferred option */
fw_name = amdgpu_discovery_get_fw_name(adev);
if (fw_name != NULL) {
- dev_info(adev->dev, "use ip discovery information from file");
+ drm_dbg(&adev->ddev, "use ip discovery information from file");
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
-
- if (r) {
- dev_err(adev->dev, "failed to read ip discovery binary from file\n");
- r = -EINVAL;
+ if (r)
goto out;
- }
-
} else {
+ drm_dbg(&adev->ddev, "use ip discovery information from memory");
r = amdgpu_discovery_read_binary_from_mem(
adev, adev->mman.discovery_bin);
if (r)
@@ -1338,10 +1336,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
int r;
r = amdgpu_discovery_init(adev);
- if (r) {
- DRM_ERROR("amdgpu_discovery_init failed\n");
+ if (r)
return r;
- }
wafl_ver = 0;
adev->gfx.xcc_mask = 0;
@@ -2579,8 +2575,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
default:
r = amdgpu_discovery_reg_base_init(adev);
- if (r)
- return -EINVAL;
+ if (r) {
+ drm_err(&adev->ddev, "discovery failed: %d\n", r);
+ return r;
+ }
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 8cecf25996ed..5fec808d7f54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -41,22 +41,6 @@
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
-/*
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- ktime_t start_timestamp;
-};
-
static struct kmem_cache *amdgpu_fence_slab;
int amdgpu_fence_slab_init(void)
@@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
if (am_fence == NULL)
return -ENOMEM;
- fence = &am_fence->base;
- am_fence->ring = ring;
} else {
/* take use of job-embedded fence */
- fence = &job->hw_fence;
+ am_fence = &job->hw_fence;
}
+ fence = &am_fence->base;
+ am_fence->ring = ring;
seq = ++ring->fence_drv.sync_seq;
if (job && job->job_run_counter) {
@@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
* it right here or we won't be able to track them in fence_drv
* and they will remain unsignaled during sa_bo free.
*/
- job = container_of(old, struct amdgpu_job, hw_fence);
+ job = container_of(old, struct amdgpu_job, hw_fence.base);
if (!job->base.s_fence && !dma_fence_is_signaled(old))
dma_fence_signal(old);
RCU_INIT_POINTER(*ptr, NULL);
@@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
return (const char *)to_amdgpu_ring(job->base.sched)->name;
}
@@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
*/
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
@@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu)
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
/* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence));
+ kfree(container_of(f, struct amdgpu_job, hw_fence.base));
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index acb21fc8b3ce..ddb9d3269357 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
/* Check if any fences where initialized */
if (job->base.s_fence && job->base.s_fence->finished.ops)
f = &job->base.s_fence->finished;
- else if (job->hw_fence.ops)
- f = &job->hw_fence;
+ else if (job->hw_fence.base.ops)
+ f = &job->hw_fence.base;
else
f = NULL;
@@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
/* only put the hw fence if has embedded fence */
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index f2c049129661..931fed8892cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -48,7 +48,7 @@ struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
struct amdgpu_sync explicit_sync;
- struct dma_fence hw_fence;
+ struct amdgpu_fence hw_fence;
struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index e6f0b035e20b..c14f63cefe67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3522,8 +3522,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
uint8_t *ucode_array_start_addr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
- "amdgpu/%s_sos.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3799,8 +3803,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
struct amdgpu_device *adev = psp->adev;
int err;
- err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
- "amdgpu/%s_ta.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
if (err)
return err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 426834806fbf..6ac0ce361a2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -427,6 +427,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
unsigned long flags;
ktime_t deadline;
+ bool ret;
if (unlikely(ring->adev->debug_disable_soft_recovery))
return false;
@@ -441,12 +442,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
dma_fence_set_error(fence, -ENODATA);
spin_unlock_irqrestore(fence->lock, flags);
- atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
- return dma_fence_is_signaled(fence);
+ ret = dma_fence_is_signaled(fence);
+ /* increment the counter only if soft reset worked */
+ if (ret)
+ atomic_inc(&ring->adev->gpu_reset_counter);
+
+ return ret;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index b95b47110769..e1f25218943a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -127,6 +127,22 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+};
+
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 6716ac281c49..9b54a1ece447 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -540,8 +540,10 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 4):
case IP_VERSION(4, 4, 5):
- /* For SDMA 4.x, use the existing DPM interface for backward compatibility */
- r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
+ /* For SDMA 4.x, use the existing DPM interface for backward compatibility,
+ * we need to convert the logical instance ID to physical instance ID before reset.
+ */
+ r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id));
break;
case IP_VERSION(5, 0, 0):
case IP_VERSION(5, 0, 1):
@@ -568,7 +570,7 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
/**
* amdgpu_sdma_reset_engine - Reset a specific SDMA engine
* @adev: Pointer to the AMDGPU device
- * @instance_id: ID of the SDMA engine instance to reset
+ * @instance_id: Logical ID of the SDMA engine instance to reset
*
* Returns: 0 on success, or a negative error code on failure.
*/
@@ -601,7 +603,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
/* Perform the SDMA reset for the specified instance */
ret = amdgpu_sdma_soft_reset(adev, instance_id);
if (ret) {
- dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id);
+ dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
goto exit;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 208b7d1d8a27..450e4bf093b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -154,6 +154,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
struct ttm_resource *res);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 2505c46a9c3d..eaddc441c51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -30,6 +30,10 @@
#define AMDGPU_UCODE_NAME_MAX (128)
+static const struct kicker_device kicker_device_list[] = {
+ {0x744B, 0x00},
+};
+
static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
{
DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -1387,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return NULL;
}
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+ if (adev->pdev->device == kicker_device_list[i].device &&
+ adev->pdev->revision == kicker_device_list[i].revision)
+ return true;
+ }
+
+ return false;
+}
+
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
{
int maj, min, rev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 9e89c3487be5..6349aad6da35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -605,6 +605,11 @@ struct amdgpu_firmware {
uint32_t pldm_version;
};
+struct kicker_device{
+ unsigned short device;
+ u8 revision;
+};
+
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
@@ -632,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index abdc52b0895a..07c936e90d8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -783,6 +783,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
}
/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_reset_clear(mm, false);
+ mutex_unlock(&mgr->lock);
+}
+
+/**
* amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
*
* @man: TTM memory type manager
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index afd6d59164bf..ec9b84f92d46 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -85,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
@@ -759,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
AMDGPU_UCODE_REQUIRED,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5ee2237d8ee8..bc983ecf3d99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4640,6 +4640,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d377a7c57d5e..ad9be3656653 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2235,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 167 &&
+ adev->gfx.pfp_fw_version >= 196 &&
+ adev->gfx.mec_fw_version >= 474) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
case IP_VERSION(9, 4, 2):
adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index cfa91d709d49..cc626036ed9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -32,6 +32,7 @@
#include "gc/gc_11_0_0_sh_mask.h"
MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
@@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
- "amdgpu/%s_imu.bin", ucode_prefix);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index c9eba537de09..28eb846280dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1630,10 +1630,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
if (r)
goto failure;
- r = mes_v11_0_set_hw_resources_1(&adev->mes);
- if (r) {
- DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
- goto failure;
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
+ r = mes_v11_0_set_hw_resources_1(&adev->mes);
+ if (r) {
+ DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
+ goto failure;
+ }
}
r = mes_v11_0_query_sched_status(&adev->mes);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index b4f17332d466..6b222630f3fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -1742,7 +1742,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
if (r)
goto failure;
- mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
mes_v12_0_init_aggregated_doorbell(&adev->mes);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index df612fd9cc50..ead616c11705 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 9c169112a5e7..bb82c652e4c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -45,6 +45,7 @@
#include "amdgpu_ras.h"
MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin");
MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin");
static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = {
@@ -490,7 +491,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
{
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 doorbell_offset, doorbell;
- u32 rb_cntl, ib_cntl;
+ u32 rb_cntl, ib_cntl, sdma_cntl;
int i;
for_each_inst(i, inst_mask) {
@@ -502,6 +503,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
if (sdma[i]->use_doorbell) {
doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
@@ -995,6 +999,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
/* enable context empty interrupt during initialization */
@@ -1670,7 +1675,7 @@ static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring)
static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
- u32 id = GET_INST(SDMA0, ring->me);
+ u32 id = ring->me;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
@@ -1686,7 +1691,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 instance_id = GET_INST(SDMA0, ring->me);
+ u32 instance_id = ring->me;
u32 inst_mask;
uint64_t rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 9505ae96fbec..37f4b5b4a098 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1399,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1542,8 +1543,13 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
u32 inst_id = ring->me;
+ int r;
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, inst_id);
+ amdgpu_amdkfd_resume(adev, true);
- return amdgpu_sdma_reset_engine(adev, inst_id);
+ return r;
}
static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index a6e612b4a892..0b40411b92a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1318,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1455,8 +1456,13 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
u32 inst_id = ring->me;
+ int r;
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, inst_id);
+ amdgpu_amdkfd_resume(adev, true);
- return amdgpu_sdma_reset_engine(adev, inst_id);
+ return r;
}
static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 5a70ae17be04..a9bdf8d61d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1374,9 +1374,22 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
- /* add firmware version checks here */
- if (0 && !adev->sdma.disable_uq)
- adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 2):
+ if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
r = amdgpu_sdma_sysfs_reset_mask_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index ad47d0bdf777..86903eccbd4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1349,9 +1349,15 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
- /* add firmware version checks here */
- if (0 && !adev->sdma.disable_uq)
- adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 338cf43c45fe..cdefd7fcb0da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -669,6 +669,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
if (indirect)
amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
/* Pause dpg */
vcn_v5_0_1_pause_dpg_mode(vinst, &state);
@@ -681,7 +684,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
@@ -692,6 +695,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8fa6489b6f5d..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 865dca2547de..a0f22ea6d15a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start,
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
@@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- if (gc_ip_version < IP_VERSION(9, 5, 0))
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
@@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
-
- /* Pairs with mmput in deferred_list_work */
- mmget(mm);
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l, trigger);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
- if (!mmget_not_zero(mni->mm))
- return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
@@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
}
svm_range_unlock(prange);
- mmput(mni->mm);
return true;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index baa2374acdeb..4ec73f33535e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
@@ -2008,8 +2012,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (!amdgpu_sriov_vf(dev->gpu->adev))
dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
- if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
- dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d3100f641ac6..f58fa5da7fe5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3610,13 +3610,15 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
luminance_range = &conn_base->display_info.luminance_range;
- if (luminance_range->max_luminance) {
- caps->aux_min_input_signal = luminance_range->min_luminance;
+ if (luminance_range->max_luminance)
caps->aux_max_input_signal = luminance_range->max_luminance;
- } else {
- caps->aux_min_input_signal = 0;
+ else
caps->aux_max_input_signal = 512;
- }
+
+ if (luminance_range->min_luminance)
+ caps->aux_min_input_signal = luminance_range->min_luminance;
+ else
+ caps->aux_min_input_signal = 1;
min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid);
if (min_input_signal_override >= 0)
@@ -4718,9 +4720,23 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
return 1;
}
+/* Rescale from [min..max] to [0..MAX_BACKLIGHT_LEVEL] */
+static inline u32 scale_input_to_fw(int min, int max, u64 input)
+{
+ return DIV_ROUND_CLOSEST_ULL(input * MAX_BACKLIGHT_LEVEL, max - min);
+}
+
+/* Rescale from [0..MAX_BACKLIGHT_LEVEL] to [min..max] */
+static inline u32 scale_fw_to_input(int min, int max, u64 input)
+{
+ return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), MAX_BACKLIGHT_LEVEL);
+}
+
static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps,
- uint32_t *brightness)
+ unsigned int min, unsigned int max,
+ uint32_t *user_brightness)
{
+ u32 brightness = scale_input_to_fw(min, max, *user_brightness);
u8 prev_signal = 0, prev_lum = 0;
int i = 0;
@@ -4731,7 +4747,7 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap
return;
/* choose start to run less interpolation steps */
- if (caps->luminance_data[caps->data_points/2].input_signal > *brightness)
+ if (caps->luminance_data[caps->data_points/2].input_signal > brightness)
i = caps->data_points/2;
do {
u8 signal = caps->luminance_data[i].input_signal;
@@ -4742,17 +4758,18 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap
* brightness < signal: interpolate between previous and current luminance numerator
* brightness > signal: find next data point
*/
- if (*brightness > signal) {
+ if (brightness > signal) {
prev_signal = signal;
prev_lum = lum;
i++;
continue;
}
- if (*brightness < signal)
+ if (brightness < signal)
lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) *
- (*brightness - prev_signal),
+ (brightness - prev_signal),
signal - prev_signal);
- *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101);
+ *user_brightness = scale_fw_to_input(min, max,
+ DIV_ROUND_CLOSEST(lum * brightness, 101));
return;
} while (i < caps->data_points);
}
@@ -4765,11 +4782,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
if (!get_brightness_range(caps, &min, &max))
return brightness;
- convert_custom_brightness(caps, &brightness);
+ convert_custom_brightness(caps, min, max, &brightness);
- // Rescale 0..255 to min..max
- return min + DIV_ROUND_CLOSEST((max - min) * brightness,
- AMDGPU_MAX_BL_LEVEL);
+ // Rescale 0..max to min..max
+ return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max);
}
static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps,
@@ -4782,8 +4798,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap
if (brightness < min)
return 0;
- // Rescale min..max to 0..255
- return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min),
+ // Rescale min..max to 0..max
+ return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min),
max - min);
}
@@ -4908,7 +4924,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
struct drm_device *drm = aconnector->base.dev;
struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm;
struct backlight_properties props = { 0 };
- struct amdgpu_dm_backlight_caps caps = { 0 };
+ struct amdgpu_dm_backlight_caps *caps;
char bl_name[16];
int min, max;
@@ -4922,22 +4938,21 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
return;
}
- amdgpu_acpi_get_backlight_caps(&caps);
- if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) {
+ caps = &dm->backlight_caps[aconnector->bl_idx];
+ if (get_brightness_range(caps, &min, &max)) {
if (power_supply_is_system_supplied() > 0)
- props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100);
+ props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100);
else
- props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100);
+ props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100);
/* min is zero, so max needs to be adjusted */
props.max_brightness = max - min;
drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max,
- caps.ac_level, caps.dc_level);
+ caps->ac_level, caps->dc_level);
} else
- props.brightness = AMDGPU_MAX_BL_LEVEL;
+ props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL;
- if (caps.data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE))
+ if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE))
drm_info(drm, "Using custom brightness curve\n");
- props.max_brightness = AMDGPU_MAX_BL_LEVEL;
props.type = BACKLIGHT_RAW;
snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d",
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 87058271b00c..2551823382f8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -728,7 +728,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
* support programmable degamma anywhere.
*/
is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch;
- drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0,
+ /* Dont't enable DRM CRTC degamma property for DCN401 since the
+ * pre-blending degamma LUT doesn't apply to cursor, and therefore
+ * can't work similar to a post-blending degamma LUT as in other hw
+ * versions.
+ * TODO: revisit it once KMS plane color API is merged.
+ */
+ drm_crtc_enable_color_mgmt(&acrtc->base,
+ (is_dcn &&
+ dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ?
+ MAX_COLOR_LUT_ENTRIES : 0,
true, MAX_COLOR_LUT_ENTRIES);
drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index d4395b92fb85..9e3e51a2dc49 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -1029,6 +1029,10 @@ enum dc_edid_status dm_helpers_read_local_edid(
return EDID_NO_RESPONSE;
edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
+ if (!edid ||
+ edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH)
+ return EDID_BAD_INPUT;
+
sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1);
memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
index a3b8e3d4a429..4b17d2fcd565 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -1565,7 +1565,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct(
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
if (!clk_mgr->base.bw_params) {
BREAK_TO_DEBUGGER();
- kfree(clk_mgr);
+ kfree(clk_mgr401);
return NULL;
}
@@ -1576,6 +1576,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct(
if (!clk_mgr->wm_range_table) {
BREAK_TO_DEBUGGER();
kfree(clk_mgr->base.bw_params);
+ kfree(clk_mgr401);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 56d011a1323c..b34b5b52236d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -241,6 +241,7 @@ static bool create_links(
DC_LOG_DC("BIOS object table - end");
/* Create a link for each usb4 dpia port */
+ dc->lowest_dpia_link_index = MAX_LINKS;
for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) {
struct link_init_data link_init_params = {0};
struct dc_link *link;
@@ -253,6 +254,9 @@ static bool create_links(
link = dc->link_srv->create_link(&link_init_params);
if (link) {
+ if (dc->lowest_dpia_link_index > dc->link_count)
+ dc->lowest_dpia_link_index = dc->link_count;
+
dc->links[dc->link_count] = link;
link->dc = dc;
++dc->link_count;
@@ -6376,6 +6380,35 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context)
else
return 0;
}
+/**
+ ***********************************************************************************************
+ * dc_get_host_router_index: Get index of host router from a dpia link
+ *
+ * This function return a host router index of the target link. If the target link is dpia link.
+ *
+ * @param [in] link: target link
+ * @param [out] host_router_index: host router index of the target link
+ *
+ * @return: true if the host router index is found and valid.
+ *
+ ***********************************************************************************************
+ */
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index)
+{
+ struct dc *dc = link->ctx->dc;
+
+ if (link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ return false;
+
+ if (link->link_index < dc->lowest_dpia_link_index)
+ return false;
+
+ *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router;
+ if (*host_router_index < dc->caps.num_of_host_routers)
+ return true;
+ else
+ return false;
+}
bool dc_is_cursor_limit_pending(struct dc *dc)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 1d917be36fc4..f41073c0147e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -66,7 +66,8 @@ struct dmub_notification;
#define MAX_STREAMS 6
#define MIN_VIEWPORT_SIZE 12
#define MAX_NUM_EDP 2
-#define MAX_HOST_ROUTERS_NUM 2
+#define MAX_HOST_ROUTERS_NUM 3
+#define MAX_DPIA_PER_HOST_ROUTER 2
/* Display Core Interfaces */
struct dc_versions {
@@ -305,6 +306,8 @@ struct dc_caps {
/* Conservative limit for DCC cases which require ODM4:1 to support*/
uint32_t dcc_plane_width_limit;
struct dc_scl_caps scl_caps;
+ uint8_t num_of_host_routers;
+ uint8_t num_of_dpias_per_host_router;
};
struct dc_bug_wa {
@@ -1603,6 +1606,7 @@ struct dc {
uint8_t link_count;
struct dc_link *links[MAX_LINKS];
+ uint8_t lowest_dpia_link_index;
struct link_service *link_srv;
struct dc_state *current_state;
@@ -2595,6 +2599,8 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state
unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context);
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index);
+
/* DSC Interfaces */
#include "dc_dsc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 0bad8304ccf6..d346f8ae1634 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -1172,8 +1172,8 @@ struct dc_lttpr_caps {
union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates;
union dp_alpm_lttpr_cap alpm;
uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1];
- uint8_t lttpr_ieee_oui[3];
- uint8_t lttpr_device_id[6];
+ uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host
+ uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host
};
struct dc_dongle_dfp_cap_ext {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index d562ddeca512..c9f6c6275ca1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -974,6 +974,7 @@ struct dc_crtc_timing {
uint32_t pix_clk_100hz;
uint32_t min_refresh_in_uhz;
+ uint32_t max_refresh_in_uhz;
uint32_t vic;
uint32_t hdmi_vic;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index d47cacfdb695..2aa6d44bb359 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -788,6 +788,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
plane->pixel_format = dml2_420_10;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
plane->pixel_format = dml2_444_64;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
index c4dad7164d31..5b62cd19d979 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -4685,7 +4685,10 @@ static void calculate_tdlut_setting(
//the tdlut is fetched during the 2 row times of prefetch.
if (p->setup_for_tdlut) {
*p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
- *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+ if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
+ *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+ else
+ *p->tdlut_opt_time = 0;
*p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
*p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 5de775fd8fce..208630754c8a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -953,6 +953,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p
out->SourcePixelFormat[location] = dml_420_10;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
out->SourcePixelFormat[location] = dml_444_64;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index e8730cc40edb..38e17b1796e1 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1225,7 +1225,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
return;
if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- if (!link->skip_implict_edp_power_control)
+ if (!link->skip_implict_edp_power_control && hws)
hws->funcs.edp_backlight_control(link, false);
link->dc->hwss.set_abm_immediate_disable(pipe_ctx);
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index c814d957305a..a267f574b619 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1047,6 +1047,15 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
if (dc->caps.sequential_ono) {
update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false;
update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp &&
+ pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = false;
+ update_state->pg_pipe_res_update[PG_DPP][j] = false;
+ }
+ }
}
}
@@ -1193,6 +1202,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true;
if (dc->caps.sequential_ono) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (new_pipe->stream_res.dsc && !new_pipe->top_pipe &&
+ update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) {
+ update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true;
+ update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (new_pipe->plane_res.hubp &&
+ new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = true;
+ update_state->pg_pipe_res_update[PG_DPP][j] = true;
+ }
+ }
+ }
+ }
+
for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
update_state->pg_pipe_res_update[PG_DPP][i]) {
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index a5127c2d47ef..0f965380a9b4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -385,9 +385,15 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx)
bool dp_is_lttpr_present(struct dc_link *link)
{
/* Some sink devices report invalid LTTPR revision, so don't validate against that cap */
- return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 &&
+ uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+ bool is_lttpr_present = (lttpr_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count <= 4);
+
+ if (lttpr_count > 0 && !is_lttpr_present)
+ DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n");
+
+ return is_lttpr_present;
}
/* in DP compliance test, DPR-120 may have
@@ -1551,6 +1557,8 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
uint8_t lttpr_dpcd_data[10] = {0};
enum dc_status status;
bool is_lttpr_present;
+ uint32_t lttpr_count;
+ uint32_t closest_lttpr_offset;
/* Logic to determine LTTPR support*/
bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
@@ -1602,20 +1610,22 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+ lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
/* If this chip cap is set, at least one retimer must exist in the chain
* Override count to 1 if we receive a known bad count (0 or an invalid value) */
if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
- (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) {
+ lttpr_count == 0) {
/* If you see this message consistently, either the host platform has FIXED_VS flag
* incorrectly configured or the sink device is returning an invalid count.
*/
DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.",
link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80;
+ lttpr_count = 1;
DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
}
- /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
is_lttpr_present = dp_is_lttpr_present(link);
DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present);
@@ -1623,11 +1633,25 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
if (is_lttpr_present) {
CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
- core_link_read_dpcd(link, DP_LTTPR_IEEE_OUI, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui));
- CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), "LTTPR IEEE OUI: ");
+ // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR
+ closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count);
- core_link_read_dpcd(link, DP_LTTPR_DEVICE_ID, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id));
- CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), "LTTPR Device ID: ");
+ core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset),
+ link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui));
+ core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset),
+ link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id));
+
+ if (lttpr_count > 1) {
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui),
+ "Closest LTTPR To Host's IEEE OUI: ");
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id),
+ "Closest LTTPR To Host's LTTPR Device ID: ");
+ } else {
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui),
+ "LTTPR IEEE OUI: ");
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id),
+ "LTTPR Device ID: ");
+ }
}
return status;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 7e0af5297dc4..51ca0b2959fc 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -1954,6 +1954,9 @@ static bool dcn31_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
dc->config.disable_hbr_audio_dp2 = true;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index d96bc6cb73ad..8383e2e59be5 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -1885,6 +1885,9 @@ static bool dcn314_resource_construct(
dc->caps.max_disp_clock_khz_at_vmin = 650000;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 72c6cf047db0..e01aa2f2e13e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1894,6 +1894,9 @@ static bool dcn35_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 989a270f7dea..4ebe4e00a4f8 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -1866,6 +1866,9 @@ static bool dcn351_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
index 48e1f234185f..db36b8f9ce65 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
@@ -1867,6 +1867,9 @@ static bool dcn36_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 3ba9b62ba70b..250f09922d2f 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh(
v_total = div64_u64(div64_u64(((unsigned long long)(
frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
stream->timing.h_total), 1000000);
+ } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) {
+ /* When the target refresh rate is the maximum panel refresh rate
+ * round up the vtotal value to prevent off-by-one error causing
+ * v_total_min to be below the panel's lower bound
+ */
+ v_total = div64_u64(div64_u64(((unsigned long long)(
+ frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
+ stream->timing.h_total) + (1000000 - 1), 1000000);
} else {
v_total = div64_u64(div64_u64(((unsigned long long)(
frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index a7167668d189..1c7235935d14 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -58,6 +58,7 @@
MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin");
+MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin");
MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin");
@@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16};
int smu_v13_0_init_microcode(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- char ucode_prefix[15];
+ char ucode_prefix[30];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
const struct common_firmware_header *header;
@@ -103,8 +104,13 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
return 0;
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
- "amdgpu/%s.bin", ucode_prefix);
+
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 34547edf1ee3..87f2e5ee8790 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -159,7 +159,7 @@ bool malidp_format_mod_supported(struct drm_device *drm,
}
if (!fourcc_mod_is_vendor(modifier, ARM)) {
- DRM_ERROR("Unknown modifier (not Arm)\n");
+ DRM_DEBUG_KMS("Unknown modifier (not Arm)\n");
return false;
}
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 1de832964e92..031980d8f3ab 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -29,7 +29,6 @@
*/
#include <linux/delay.h>
-#include <linux/export.h>
#include <linux/pci.h>
#include <drm/drm_atomic.h>
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
index b3f588b71a7d..af6f79793407 100644
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str
adev->id = ret;
adev->name = "dp_hpd_bridge";
adev->dev.parent = parent;
- adev->dev.of_node = of_node_get(parent->of_node);
adev->dev.release = drm_aux_hpd_bridge_release;
adev->dev.platform_data = of_node_get(np);
+ device_set_of_node_from_dev(&adev->dev, parent);
+
ret = auxiliary_device_init(adev);
if (ret) {
of_node_put(adev->dev.platform_data);
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index 79b009ab9396..29b0358a7b6d 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel,
panel_bridge->bridge.of_node = panel->dev->of_node;
panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES;
panel_bridge->bridge.type = connector_type;
+ panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first;
drm_bridge_add(&panel_bridge->bridge);
@@ -413,8 +414,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev,
return bridge;
}
- bridge->pre_enable_prev_first = panel->prepare_prev_first;
-
*ptr = bridge;
devres_add(dev, ptr);
@@ -456,8 +455,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm,
if (ret)
return ERR_PTR(ret);
- bridge->pre_enable_prev_first = panel->prepare_prev_first;
-
return bridge;
}
EXPORT_SYMBOL(drmm_panel_bridge_add);
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index 0014c497e3fe..bccc88d25948 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -1095,7 +1095,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi,
bool first = !xfer->tx_done;
u32 reg;
- dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n",
+ dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n",
xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done);
if (length > DSI_TX_FIFO_SIZE)
@@ -1293,7 +1293,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi)
spin_unlock_irqrestore(&dsi->transfer_lock, flags);
dev_dbg(dsi->dev,
- "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
+ "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len,
xfer->rx_done);
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 60224f476e1d..834b42a4d31f 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -348,12 +348,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata,
* 200 ms. We'll assume that the panel driver will have the hardcoded
* delay in its prepare and always disable HPD.
*
- * If HPD somehow makes sense on some future panel we'll have to
- * change this to be conditional on someone specifying that HPD should
- * be used.
+ * For DisplayPort bridge type, we need HPD. So we use the bridge type
+ * to conditionally disable HPD.
+ * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms()
+ * can be called before. So for DisplayPort, HPD will be enabled once
+ * bridge type is set. We are using bridge type instead of "no-hpd"
+ * property because it is not used properly in devicetree description
+ * and hence is unreliable.
*/
- regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE,
- HPD_DISABLE);
+
+ if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort)
+ regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE,
+ HPD_DISABLE);
pdata->comms_enabled = true;
@@ -1195,9 +1201,14 @@ static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge)
struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
int val = 0;
- pm_runtime_get_sync(pdata->dev);
+ /*
+ * Runtime reference is grabbed in ti_sn_bridge_hpd_enable()
+ * as the chip won't report HPD just after being powered on.
+ * HPD_DEBOUNCED_STATE reflects correct state only after the
+ * debounce time (~100-400 ms).
+ */
+
regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val);
- pm_runtime_put_autosuspend(pdata->dev);
return val & HPD_DEBOUNCED_STATE ? connector_status_connected
: connector_status_disconnected;
@@ -1220,6 +1231,26 @@ static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry *
debugfs_create_file("status", 0600, debugfs, pdata, &status_fops);
}
+static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge)
+{
+ struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
+
+ /*
+ * Device needs to be powered on before reading the HPD state
+ * for reliable hpd detection in ti_sn_bridge_detect() due to
+ * the high debounce time.
+ */
+
+ pm_runtime_get_sync(pdata->dev);
+}
+
+static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge)
+{
+ struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge);
+
+ pm_runtime_put_autosuspend(pdata->dev);
+}
+
static const struct drm_bridge_funcs ti_sn_bridge_funcs = {
.attach = ti_sn_bridge_attach,
.detach = ti_sn_bridge_detach,
@@ -1234,6 +1265,8 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = {
.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
.debugfs_init = ti_sn65dsi86_debugfs_init,
+ .hpd_enable = ti_sn_bridge_hpd_enable,
+ .hpd_disable = ti_sn_bridge_hpd_disable,
};
static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata,
@@ -1321,8 +1354,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev,
pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort
? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP;
- if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort)
- pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT;
+ if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) {
+ pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT |
+ DRM_BRIDGE_OP_HPD;
+ /*
+ * If comms were already enabled they would have been enabled
+ * with the wrong value of HPD_DISABLE. Update it now. Comms
+ * could be enabled if anyone is holding a pm_runtime reference
+ * (like if a GPIO is in use). Note that in most cases nobody
+ * is doing AUX channel xfers before the bridge is added so
+ * HPD doesn't _really_ matter then. The only exception is in
+ * the eDP case where the panel wants to read the EDID before
+ * the bridge is added. We always consistently have HPD disabled
+ * for eDP.
+ */
+ mutex_lock(&pdata->comms_mutex);
+ if (pdata->comms_enabled)
+ regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG,
+ HPD_DISABLE, 0);
+ mutex_unlock(&pdata->comms_mutex);
+ }
drm_bridge_add(&pdata->bridge);
diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c
index 7d2e499ea5de..262e93e07a28 100644
--- a/drivers/gpu/drm/display/drm_bridge_connector.c
+++ b/drivers/gpu/drm/display/drm_bridge_connector.c
@@ -708,11 +708,14 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm,
if (bridge_connector->bridge_hdmi_audio ||
bridge_connector->bridge_dp_audio) {
struct device *dev;
+ struct drm_bridge *bridge;
if (bridge_connector->bridge_hdmi_audio)
- dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev;
+ bridge = bridge_connector->bridge_hdmi_audio;
else
- dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev;
+ bridge = bridge_connector->bridge_dp_audio;
+
+ dev = bridge->hdmi_audio_dev;
ret = drm_connector_hdmi_audio_init(connector, dev,
&drm_bridge_connector_hdmi_audio_funcs,
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index f2a6559a2710..ea78c6c8ca7a 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -725,7 +725,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset,
* monitor doesn't power down exactly after the throw away read.
*/
if (!aux->is_remote) {
- ret = drm_dp_dpcd_probe(aux, DP_DPCD_REV);
+ ret = drm_dp_dpcd_probe(aux, DP_TRAINING_PATTERN_SET);
if (ret < 0)
return ret;
}
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 241c855f891f..66aff35f8647 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -405,6 +405,49 @@ drm_get_buddy(struct drm_buddy_block *block)
EXPORT_SYMBOL(drm_get_buddy);
/**
+ * drm_buddy_reset_clear - reset blocks clear state
+ *
+ * @mm: DRM buddy manager
+ * @is_clear: blocks clear state
+ *
+ * Reset the clear state based on @is_clear value for each block
+ * in the freelist.
+ */
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
+{
+ u64 root_size, size, start;
+ unsigned int order;
+ int i;
+
+ size = mm->size;
+ for (i = 0; i < mm->n_roots; ++i) {
+ order = ilog2(size) - ilog2(mm->chunk_size);
+ start = drm_buddy_block_offset(mm->roots[i]);
+ __force_merge(mm, start, start + size, order);
+
+ root_size = mm->chunk_size << order;
+ size -= root_size;
+ }
+
+ for (i = 0; i <= mm->max_order; ++i) {
+ struct drm_buddy_block *block;
+
+ list_for_each_entry_reverse(block, &mm->free_list[i], link) {
+ if (is_clear != drm_buddy_block_is_clear(block)) {
+ if (is_clear) {
+ mark_cleared(block);
+ mm->clear_avail += drm_buddy_block_size(mm, block);
+ } else {
+ clear_reset(block);
+ mm->clear_avail -= drm_buddy_block_size(mm, block);
+ }
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(drm_buddy_reset_clear);
+
+/**
* drm_buddy_free_block - free a block
*
* @mm: DRM buddy manager
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index b781601946db..63a70f285cce 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free);
int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
const struct drm_framebuffer_funcs *funcs)
{
+ unsigned int i;
int ret;
+ bool exists;
if (WARN_ON_ONCE(fb->dev != dev || !fb->format))
return -EINVAL;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)))
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ if (fb->obj[i]) {
+ exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]);
+ if (exists)
+ fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
+
INIT_LIST_HEAD(&fb->filp_head);
fb->funcs = funcs;
@@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
false, drm_framebuffer_free);
if (ret)
- goto out;
+ goto err;
mutex_lock(&dev->mode_config.fb_lock);
dev->mode_config.num_fb++;
@@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
mutex_unlock(&dev->mode_config.fb_lock);
drm_mode_object_register(dev, &fb->base);
-out:
+
+ return 0;
+
+err:
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) {
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
return ret;
}
EXPORT_SYMBOL(drm_framebuffer_init);
@@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private);
void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
{
struct drm_device *dev = fb->dev;
+ unsigned int i;
+
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ }
mutex_lock(&dev->mode_config.fb_lock);
list_del(&fb->head);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 1e659d2660f7..ac0524595bd6 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -212,6 +212,46 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj)
}
EXPORT_SYMBOL(drm_gem_private_object_fini);
+static void drm_gem_object_handle_get(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+
+ drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock));
+
+ if (obj->handle_count++ == 0)
+ drm_gem_object_get(obj);
+}
+
+/**
+ * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any
+ * @obj: GEM object
+ *
+ * Acquires a reference on the GEM buffer object's handle. Required to keep
+ * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked()
+ * to release the reference. Does nothing if the buffer object has no handle.
+ *
+ * Returns:
+ * True if a handle exists, or false otherwise
+ */
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+
+ guard(mutex)(&dev->object_name_lock);
+
+ /*
+ * First ref taken during GEM object creation, if any. Some
+ * drivers set up internal framebuffers with GEM objects that
+ * do not have a GEM handle. Hence, this counter can be zero.
+ */
+ if (!obj->handle_count)
+ return false;
+
+ drm_gem_object_handle_get(obj);
+
+ return true;
+}
+
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
* @obj: GEM object to clean up.
@@ -242,20 +282,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj)
}
}
-static void
-drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
+/**
+ * drm_gem_object_handle_put_unlocked - releases reference on user-space handle
+ * @obj: GEM object
+ *
+ * Releases a reference on the GEM buffer object's handle. Possibly releases
+ * the GEM buffer object and associated dma-buf objects.
+ */
+void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
bool final = false;
- if (WARN_ON(READ_ONCE(obj->handle_count) == 0))
+ if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0))
return;
/*
- * Must bump handle count first as this may be the last
- * ref, in which case the object would disappear before we
- * checked for a name
- */
+ * Must bump handle count first as this may be the last
+ * ref, in which case the object would disappear before
+ * we checked for a name.
+ */
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
@@ -279,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
struct drm_file *file_priv = data;
struct drm_gem_object *obj = ptr;
+ if (drm_WARN_ON(obj->dev, !data))
+ return 0;
+
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
@@ -389,8 +438,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
int ret;
WARN_ON(!mutex_is_locked(&dev->object_name_lock));
- if (obj->handle_count++ == 0)
- drm_gem_object_get(obj);
+
+ drm_gem_object_handle_get(obj);
/*
* Get the user-visible handle using idr. Preload and perform
@@ -399,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
idr_preload(GFP_KERNEL);
spin_lock(&file_priv->table_lock);
- ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
+ ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT);
spin_unlock(&file_priv->table_lock);
idr_preload_end();
@@ -420,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
goto err_revoke;
}
+ /* mirrors drm_gem_handle_delete to avoid races */
+ spin_lock(&file_priv->table_lock);
+ obj = idr_replace(&file_priv->object_idr, obj, handle);
+ WARN_ON(obj != NULL);
+ spin_unlock(&file_priv->table_lock);
*handlep = handle;
return 0;
diff --git a/drivers/gpu/drm/drm_gem_dma_helper.c b/drivers/gpu/drm/drm_gem_dma_helper.c
index b7f033d4352a..4f0320df858f 100644
--- a/drivers/gpu/drm/drm_gem_dma_helper.c
+++ b/drivers/gpu/drm/drm_gem_dma_helper.c
@@ -230,7 +230,7 @@ void drm_gem_dma_free(struct drm_gem_dma_object *dma_obj)
if (drm_gem_is_imported(gem_obj)) {
if (dma_obj->vaddr)
- dma_buf_vunmap_unlocked(gem_obj->dma_buf, &map);
+ dma_buf_vunmap_unlocked(gem_obj->import_attach->dmabuf, &map);
drm_prime_gem_destroy(gem_obj, dma_obj->sgt);
} else if (dma_obj->vaddr) {
if (dma_obj->map_noncoherent)
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 6f72e7a0f427..6ff22e04029e 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -419,6 +419,7 @@ EXPORT_SYMBOL(drm_gem_fb_vunmap);
static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir,
unsigned int num_planes)
{
+ struct dma_buf_attachment *import_attach;
struct drm_gem_object *obj;
int ret;
@@ -427,9 +428,10 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat
obj = drm_gem_fb_get_obj(fb, num_planes);
if (!obj)
continue;
+ import_attach = obj->import_attach;
if (!drm_gem_is_imported(obj))
continue;
- ret = dma_buf_end_cpu_access(obj->dma_buf, dir);
+ ret = dma_buf_end_cpu_access(import_attach->dmabuf, dir);
if (ret)
drm_err(fb->dev, "dma_buf_end_cpu_access(%u, %d) failed: %d\n",
ret, num_planes, dir);
@@ -452,6 +454,7 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat
*/
int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir)
{
+ struct dma_buf_attachment *import_attach;
struct drm_gem_object *obj;
unsigned int i;
int ret;
@@ -462,9 +465,10 @@ int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direct
ret = -EINVAL;
goto err___drm_gem_fb_end_cpu_access;
}
+ import_attach = obj->import_attach;
if (!drm_gem_is_imported(obj))
continue;
- ret = dma_buf_begin_cpu_access(obj->dma_buf, dir);
+ ret = dma_buf_begin_cpu_access(import_attach->dmabuf, dir);
if (ret)
goto err___drm_gem_fb_end_cpu_access;
}
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index aa43265f4f4f..a5dbee6974ab 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -349,7 +349,7 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
int ret = 0;
if (drm_gem_is_imported(obj)) {
- ret = dma_buf_vmap(obj->dma_buf, map);
+ ret = dma_buf_vmap(obj->import_attach->dmabuf, map);
} else {
pgprot_t prot = PAGE_KERNEL;
@@ -409,7 +409,7 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
struct drm_gem_object *obj = &shmem->base;
if (drm_gem_is_imported(obj)) {
- dma_buf_vunmap(obj->dma_buf, map);
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
} else {
dma_resv_assert_held(shmem->base.resv);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index e44f28fd81d3..60c282881958 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev);
/* drm_gem.c */
int drm_gem_init(struct drm_device *dev);
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj);
+void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj);
int drm_gem_handle_create_tail(struct drm_file *file_priv,
struct drm_gem_object *obj,
u32 *handlep);
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index e5184a0c2465..21fd647f8ce1 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = {
.restore = pm_generic_restore,
};
-static const struct bus_type mipi_dsi_bus_type = {
+const struct bus_type mipi_dsi_bus_type = {
.name = "mipi-dsi",
.match = mipi_dsi_device_match,
.uevent = mipi_dsi_uevent,
.pm = &mipi_dsi_device_pm_ops,
};
+EXPORT_SYMBOL_GPL(mipi_dsi_bus_type);
/**
* of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a
diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs
index dd55b1cb764d..18492daae4b3 100644
--- a/drivers/gpu/drm/drm_panic_qr.rs
+++ b/drivers/gpu/drm/drm_panic_qr.rs
@@ -27,7 +27,7 @@
//! * <https://github.com/erwanvivien/fast_qr>
//! * <https://github.com/bjguillot/qr>
-use kernel::{prelude::*, str::CStr};
+use kernel::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
struct Version(usize);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index d828502268b8..a0a5d725eab0 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -453,7 +453,13 @@ struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
}
mutex_lock(&dev->object_name_lock);
- /* re-export the original imported/exported object */
+ /* re-export the original imported object */
+ if (obj->import_attach) {
+ dmabuf = obj->import_attach->dmabuf;
+ get_dma_buf(dmabuf);
+ goto out_have_obj;
+ }
+
if (obj->dma_buf) {
get_dma_buf(obj->dma_buf);
dmabuf = obj->dma_buf;
diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index edbeab88ff2b..d983ee85cf13 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -343,17 +343,18 @@ EXPORT_SYMBOL(drm_writeback_connector_init_with_encoder);
/**
* drm_writeback_connector_cleanup - Cleanup the writeback connector
* @dev: DRM device
- * @wb_connector: Pointer to the writeback connector to clean up
+ * @data: Pointer to the writeback connector to clean up
*
* This will decrement the reference counter of blobs and destroy properties. It
* will also clean the remaining jobs in this writeback connector. Caution: This helper will not
* clean up the attached encoder and the drm_connector.
*/
static void drm_writeback_connector_cleanup(struct drm_device *dev,
- struct drm_writeback_connector *wb_connector)
+ void *data)
{
unsigned long flags;
struct drm_writeback_job *pos, *n;
+ struct drm_writeback_connector *wb_connector = data;
delete_writeback_properties(dev);
drm_property_blob_put(wb_connector->pixel_formats_blob_ptr);
@@ -405,7 +406,7 @@ int drmm_writeback_connector_init(struct drm_device *dev,
if (ret)
return ret;
- ret = drmm_add_action_or_reset(dev, (void *)drm_writeback_connector_cleanup,
+ ret = drmm_add_action_or_reset(dev, drm_writeback_connector_cleanup,
wb_connector);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 917ad527c961..40a50c60dfff 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -65,7 +65,7 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
struct iosys_map map = IOSYS_MAP_INIT_VADDR(etnaviv_obj->vaddr);
if (etnaviv_obj->vaddr)
- dma_buf_vunmap_unlocked(etnaviv_obj->base.dma_buf, &map);
+ dma_buf_vunmap_unlocked(etnaviv_obj->base.import_attach->dmabuf, &map);
/* Don't drop the pages for imported dmabuf, as they are not
* ours, just free the array we allocated:
@@ -82,7 +82,7 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
lockdep_assert_held(&etnaviv_obj->lock);
- ret = dma_buf_vmap(etnaviv_obj->base.dma_buf, &map);
+ ret = dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf, &map);
if (ret)
return NULL;
return map.vaddr;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 76a3a3e517d8..71e2e6b9d713 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -35,6 +35,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
*sched_job)
{
struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
+ struct drm_gpu_scheduler *sched = sched_job->sched;
struct etnaviv_gpu *gpu = submit->gpu;
u32 dma_addr, primid = 0;
int change;
@@ -89,7 +90,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
return DRM_GPU_SCHED_STAT_NOMINAL;
out_no_timeout:
- list_add(&sched_job->list, &sched_job->sched->pending_list);
+ spin_lock(&sched->job_list_lock);
+ list_add(&sched_job->list, &sched->pending_list);
+ spin_unlock(&sched->job_list_lock);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f91daefa9d2b..805aa28c1723 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -636,6 +636,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
if (!ctx->drm_dev)
goto out;
+ /* check if crtc and vblank have been initialized properly */
+ if (!drm_dev_has_vblank(ctx->drm_dev))
+ goto out;
+
if (!ctx->i80_if) {
drm_crtc_handle_vblank(&ctx->crtc->base);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index c394cc702d7d..205c238cc73a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -187,6 +187,7 @@ struct fimd_context {
u32 i80ifcon;
bool i80_if;
bool suspended;
+ bool dp_clk_enabled;
wait_queue_head_t wait_vsync_queue;
atomic_t wait_vsync_event;
atomic_t win_updated;
@@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable)
struct fimd_context *ctx = container_of(clk, struct fimd_context,
dp_clk);
u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE;
+
+ if (enable == ctx->dp_clk_enabled)
+ return;
+
+ if (enable)
+ pm_runtime_resume_and_get(ctx->dev);
+
+ ctx->dp_clk_enabled = enable;
writel(val, ctx->regs + DP_MIE_CLKCON);
+
+ if (!enable)
+ pm_runtime_put(ctx->dev);
}
static const struct exynos_drm_crtc_ops fimd_crtc_ops = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 4787fee4696f..d44401a695e2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -174,7 +174,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
return ERR_PTR(ret);
}
- DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp);
+ DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp);
return exynos_gem;
}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index ea9f66037600..03c8490af4f4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task *
task->src.rect.h = task->dst.rect.h = UINT_MAX;
task->transform.rotation = DRM_MODE_ROTATE_0;
- DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task);
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task);
return task;
}
@@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task,
}
DRM_DEV_DEBUG_DRIVER(task->dev,
- "Got task %pK configuration from userspace\n",
+ "Got task %p configuration from userspace\n",
task);
return 0;
}
@@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf)
static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp,
struct exynos_drm_ipp_task *task)
{
- DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task);
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task);
exynos_drm_ipp_task_release_buf(&task->src);
exynos_drm_ipp_task_release_buf(&task->dst);
@@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task,
DRM_EXYNOS_IPP_FORMAT_DESTINATION);
if (!fmt) {
DRM_DEV_DEBUG_DRIVER(task->dev,
- "Task %pK: %s format not supported\n",
+ "Task %p: %s format not supported\n",
task, buf == src ? "src" : "dst");
return -EINVAL;
}
@@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
bool rotate = (rotation != DRM_MODE_ROTATE_0);
bool scale = false;
- DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task);
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task);
if (src->rect.w == UINT_MAX)
src->rect.w = src->buf.width;
@@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
dst->rect.x + dst->rect.w > (dst->buf.width) ||
dst->rect.y + dst->rect.h > (dst->buf.height)) {
DRM_DEV_DEBUG_DRIVER(task->dev,
- "Task %pK: defined area is outside provided buffers\n",
+ "Task %p: defined area is outside provided buffers\n",
task);
return -EINVAL;
}
@@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
(!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) ||
(!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) &&
src->buf.fourcc != dst->buf.fourcc)) {
- DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n",
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n",
task);
return -EINVAL;
}
@@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
if (ret)
return ret;
- DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n",
+ DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n",
task);
return ret;
@@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task,
struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
int ret = 0;
- DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n",
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n",
task);
ret = exynos_drm_ipp_task_setup_buffer(src, filp);
if (ret) {
DRM_DEV_DEBUG_DRIVER(task->dev,
- "Task %pK: src buffer setup failed\n",
+ "Task %p: src buffer setup failed\n",
task);
return ret;
}
ret = exynos_drm_ipp_task_setup_buffer(dst, filp);
if (ret) {
DRM_DEV_DEBUG_DRIVER(task->dev,
- "Task %pK: dst buffer setup failed\n",
+ "Task %p: dst buffer setup failed\n",
task);
return ret;
}
- DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n",
+ DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n",
task);
return ret;
@@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret)
struct exynos_drm_ipp *ipp = task->ipp;
unsigned long flags;
- DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n",
+ DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n",
ipp->id, task, ret);
spin_lock_irqsave(&ipp->lock, flags);
@@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp)
spin_unlock_irqrestore(&ipp->lock, flags);
DRM_DEV_DEBUG_DRIVER(ipp->dev,
- "ipp: %d, selected task %pK to run\n", ipp->id,
+ "ipp: %d, selected task %p to run\n", ipp->id,
task);
ret = ipp->funcs->commit(ipp, task);
@@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data,
*/
if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) {
DRM_DEV_DEBUG_DRIVER(ipp->dev,
- "ipp: %d, nonblocking processing task %pK\n",
+ "ipp: %d, nonblocking processing task %p\n",
ipp->id, task);
task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
exynos_drm_ipp_schedule_task(task->ipp, task);
ret = 0;
} else {
- DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n",
+ DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n",
ipp->id, task);
exynos_drm_ipp_schedule_task(ipp, task);
ret = wait_event_interruptible(ipp->done_wq,
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index ba7b8938b17c..166ee11831ab 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1938,7 +1938,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display,
int index, len;
if (drm_WARN_ON(display->drm,
- !data || panel->vbt.dsi.seq_version != 1))
+ !data || panel->vbt.dsi.seq_version >= 3))
return 0;
/* index = 1 to skip sequence byte */
@@ -1961,7 +1961,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display,
}
/*
- * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
+ * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence.
* The deassert must be done before calling intel_dsi_device_ready, so for
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
@@ -1972,9 +1972,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display,
u8 *init_otp;
int len;
- /* Limit this to v1 vid-mode sequences */
+ /* Limit this to v1/v2 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
- panel->vbt.dsi.seq_version != 1)
+ panel->vbt.dsi.seq_version >= 3)
return;
/* Only do this if there are otp and assert seqs and no deassert seq */
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 6f0a0bc71b06..43aa1f97378b 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7061,7 +7061,8 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
struct drm_i915_private *i915 = to_i915(intel_state->base.dev);
struct drm_plane *plane;
struct drm_plane_state *new_plane_state;
- int ret, i;
+ long ret;
+ int i;
for_each_new_plane_in_state(&intel_state->base, plane, new_plane_state, i) {
if (new_plane_state->fence) {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 640c43bf62d4..724de7ed3c04 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1604,6 +1604,12 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
u8 *link_bw, u8 *rate_select)
{
+ struct intel_display *display = to_intel_display(intel_dp);
+
+ /* FIXME g4x can't generate an exact 2.7GHz with the 96MHz non-SSC refclk */
+ if (display->platform.g4x && port_clock == 268800)
+ port_clock = 270000;
+
/* eDP 1.4 rate select method. */
if (intel_dp->use_rate_select) {
*link_bw = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c
index 74bb3bedf30f..5111bdc3075b 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c
@@ -103,8 +103,8 @@ static void get_ana_cp_int_prop(u64 vco_clk,
DIV_ROUND_DOWN_ULL(curve_1_interpolated, CURVE0_MULTIPLIER)));
ana_cp_int_temp =
- DIV_ROUND_CLOSEST_ULL(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1),
- CURVE2_MULTIPLIER);
+ DIV64_U64_ROUND_CLOSEST(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1),
+ CURVE2_MULTIPLIER);
*ana_cp_int = max(1, min(ana_cp_int_temp, 127));
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 346737f15fa9..2007bb9d974d 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1056,7 +1056,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder,
BXT_MIPI_TRANS_VACTIVE(port));
adjusted_mode->crtc_vtotal =
intel_de_read(display,
- BXT_MIPI_TRANS_VTOTAL(port));
+ BXT_MIPI_TRANS_VTOTAL(port)) + 1;
hactive = adjusted_mode->crtc_hdisplay;
hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port));
@@ -1260,7 +1260,7 @@ static void set_dsi_timings(struct intel_encoder *encoder,
intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port),
adjusted_mode->crtc_vdisplay);
intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port),
- adjusted_mode->crtc_vtotal);
+ adjusted_mode->crtc_vtotal - 1);
}
intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port),
@@ -1589,8 +1589,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector)
static void vlv_dphy_param_init(struct intel_dsi *intel_dsi)
{
+ struct intel_display *display = to_intel_display(&intel_dsi->base);
struct intel_connector *connector = intel_dsi->attached_connector;
- struct intel_display *display = to_intel_display(connector);
struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
u32 tlpx_ns, extra_byte_count, tlpx_ui;
u32 ui_num, ui_den;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 19a3eb82dc6a..9cbb0f68a5bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -6,6 +6,7 @@
#include <linux/pagevec.h>
#include <linux/shmem_fs.h>
#include <linux/swap.h>
+#include <linux/uio.h>
#include <drm/drm_cache.h>
@@ -400,12 +401,12 @@ static int
shmem_pwrite(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg)
{
- struct address_space *mapping = obj->base.filp->f_mapping;
- const struct address_space_operations *aops = mapping->a_ops;
char __user *user_data = u64_to_user_ptr(arg->data_ptr);
- u64 remain;
- loff_t pos;
- unsigned int pg;
+ struct file *file = obj->base.filp;
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ ssize_t written;
+ u64 size = arg->size;
/* Caller already validated user args */
GEM_BUG_ON(!access_ok(user_data, arg->size));
@@ -428,63 +429,24 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
if (obj->mm.madv != I915_MADV_WILLNEED)
return -EFAULT;
- /*
- * Before the pages are instantiated the object is treated as being
- * in the CPU domain. The pages will be clflushed as required before
- * use, and we can freely write into the pages directly. If userspace
- * races pwrite with any other operation; corruption will ensue -
- * that is userspace's prerogative!
- */
+ if (size > MAX_RW_COUNT)
+ return -EFBIG;
- remain = arg->size;
- pos = arg->offset;
- pg = offset_in_page(pos);
+ if (!file->f_op->write_iter)
+ return -EINVAL;
- do {
- unsigned int len, unwritten;
- struct folio *folio;
- void *data, *vaddr;
- int err;
- char __maybe_unused c;
-
- len = PAGE_SIZE - pg;
- if (len > remain)
- len = remain;
-
- /* Prefault the user page to reduce potential recursion */
- err = __get_user(c, user_data);
- if (err)
- return err;
-
- err = __get_user(c, user_data + len - 1);
- if (err)
- return err;
-
- err = aops->write_begin(obj->base.filp, mapping, pos, len,
- &folio, &data);
- if (err < 0)
- return err;
-
- vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos));
- pagefault_disable();
- unwritten = __copy_from_user_inatomic(vaddr, user_data, len);
- pagefault_enable();
- kunmap_local(vaddr);
-
- err = aops->write_end(obj->base.filp, mapping, pos, len,
- len - unwritten, folio, data);
- if (err < 0)
- return err;
-
- /* We don't handle -EFAULT, leave it to the caller to check */
- if (unwritten)
- return -ENODEV;
-
- remain -= len;
- user_data += len;
- pos += len;
- pg = 0;
- } while (remain);
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = arg->offset;
+ iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)user_data, size);
+
+ written = file->f_op->write_iter(&kiocb, &iter);
+ BUG_ON(written == -EIOCBQUEUED);
+
+ if (written != size)
+ return -EIO;
+
+ if (written < 0)
+ return written;
return 0;
}
@@ -637,9 +599,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
{
struct drm_i915_gem_object *obj;
struct file *file;
- const struct address_space_operations *aops;
- loff_t pos;
- int err;
+ loff_t pos = 0;
+ ssize_t err;
GEM_WARN_ON(IS_DGFX(i915));
obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
@@ -649,29 +610,15 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
file = obj->base.filp;
- aops = file->f_mapping->a_ops;
- pos = 0;
- do {
- unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
- struct folio *folio;
- void *fsdata;
-
- err = aops->write_begin(file, file->f_mapping, pos, len,
- &folio, &fsdata);
- if (err < 0)
- goto fail;
+ err = kernel_write(file, data, size, &pos);
- memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len);
+ if (err < 0)
+ goto fail;
- err = aops->write_end(file, file->f_mapping, pos, len, len,
- folio, fsdata);
- if (err < 0)
- goto fail;
-
- size -= len;
- data += len;
- pos += len;
- } while (size);
+ if (err != size) {
+ err = -EIO;
+ goto fail;
+ }
return obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index 65d84a93c525..a09e2eb47175 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -5,16 +5,23 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include "i915_drv.h"
#include "i915_gemfs.h"
#include "i915_utils.h"
+static int add_param(struct fs_context *fc, const char *key, const char *val)
+{
+ return vfs_parse_fs_string(fc, key, val, strlen(val));
+}
+
void i915_gemfs_init(struct drm_i915_private *i915)
{
- char huge_opt[] = "huge=within_size"; /* r/w */
struct file_system_type *type;
+ struct fs_context *fc;
struct vfsmount *gemfs;
+ int ret;
/*
* By creating our own shmemfs mountpoint, we can pass in
@@ -38,8 +45,16 @@ void i915_gemfs_init(struct drm_i915_private *i915)
if (!type)
goto err;
- gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
- if (IS_ERR(gemfs))
+ fc = fs_context_for_mount(type, SB_KERNMOUNT);
+ if (IS_ERR(fc))
+ goto err;
+ ret = add_param(fc, "source", "tmpfs");
+ if (!ret)
+ ret = add_param(fc, "huge", "within_size");
+ if (!ret)
+ gemfs = fc_mount_longterm(fc);
+ put_fs_context(fc);
+ if (ret)
goto err;
i915->mm.gemfs = gemfs;
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index 1e925c75fb08..c43febc862dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
if (gt->gsc.intf[intf_id].irq < 0)
return;
- ret = generic_handle_irq(gt->gsc.intf[intf_id].irq);
+ ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq);
if (ret)
gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index a876a34455f1..2a6d79abf25b 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce)
/* One ringbuffer to rule them all */
GEM_BUG_ON(!engine->legacy.ring);
ce->ring = engine->legacy.ring;
- ce->timeline = intel_timeline_get(engine->legacy.timeline);
GEM_BUG_ON(ce->state);
if (engine->context_size) {
@@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce)
ce->state = vma;
}
+ ce->timeline = intel_timeline_get(engine->legacy.timeline);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index e5a188ce3185..5bc696bfbb0f 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -108,11 +108,11 @@ static unsigned int config_bit(const u64 config)
return other_bit(config);
}
-static u32 config_mask(const u64 config)
+static __always_inline u32 config_mask(const u64 config)
{
unsigned int bit = config_bit(config);
- if (__builtin_constant_p(config))
+ if (__builtin_constant_p(bit))
BUILD_BUG_ON(bit >
BITS_PER_TYPE(typeof_member(struct i915_pmu,
enable)) - 1);
@@ -121,7 +121,7 @@ static u32 config_mask(const u64 config)
BITS_PER_TYPE(typeof_member(struct i915_pmu,
enable)) - 1);
- return BIT(config_bit(config));
+ return BIT(bit);
}
static bool is_engine_event(struct perf_event *event)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 88870844b5bd..2fb7a9e7efec 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -73,8 +73,8 @@ static int igt_add_request(void *arg)
/* Basic preliminary test to create a request and let it loose! */
request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
i915_request_add(request);
@@ -91,8 +91,8 @@ static int igt_wait_request(void *arg)
/* Submit a request, then wait upon it */
request = mock_request(rcs0(i915)->kernel_context, T);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
i915_request_get(request);
@@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg)
/* Submit a request, treat it as a fence and wait upon it */
request = mock_request(rcs0(i915)->kernel_context, T);
- if (!request)
- return -ENOMEM;
+ if (IS_ERR(request))
+ return PTR_ERR(request);
if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
pr_err("fence wait success before submit (expected timeout)!\n");
@@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg)
GEM_BUG_ON(IS_ERR(ce));
request = mock_request(ce, 2 * HZ);
intel_context_put(ce);
- if (!request) {
- err = -ENOMEM;
+ if (IS_ERR(request)) {
+ err = PTR_ERR(request);
goto err_context_0;
}
@@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg)
GEM_BUG_ON(IS_ERR(ce));
vip = mock_request(ce, 0);
intel_context_put(ce);
- if (!vip) {
- err = -ENOMEM;
+ if (IS_ERR(vip)) {
+ err = PTR_ERR(vip);
goto err_context_1;
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index 09f747228dff..1b0cf073e964 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay)
/* NB the i915->requests slab cache is enlarged to fit mock_request */
request = intel_context_create_request(ce);
if (IS_ERR(request))
- return NULL;
+ return request;
request->mock.delay = delay;
return request;
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
index 41f5d89e78b8..3e349d039fc0 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -386,13 +386,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
if (!err) {
if (hard_reset) {
pvr_dev->fw_dev.booted = false;
- WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev));
+ WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev));
err = pvr_fw_hard_reset(pvr_dev);
if (err)
goto err_device_lost;
- err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev);
+ err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev);
pvr_dev->fw_dev.booted = true;
if (err)
goto err_device_lost;
diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c
index 8f6fba4217ec..bc7527542fdc 100644
--- a/drivers/gpu/drm/mediatek/mtk_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_crtc.c
@@ -719,6 +719,39 @@ int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
return 0;
}
+void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane)
+{
+#if IS_REACHABLE(CONFIG_MTK_CMDQ)
+ struct mtk_crtc *mtk_crtc = to_mtk_crtc(crtc);
+ struct mtk_plane_state *plane_state = to_mtk_plane_state(plane->state);
+ int i;
+
+ /* no need to wait for disabling the plane by CPU */
+ if (!mtk_crtc->cmdq_client.chan)
+ return;
+
+ if (!mtk_crtc->enabled)
+ return;
+
+ /* set pending plane state to disabled */
+ for (i = 0; i < mtk_crtc->layer_nr; i++) {
+ struct drm_plane *mtk_plane = &mtk_crtc->planes[i];
+ struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(mtk_plane->state);
+
+ if (mtk_plane->index == plane->index) {
+ memcpy(mtk_plane_state, plane_state, sizeof(*plane_state));
+ break;
+ }
+ }
+ mtk_crtc_update_config(mtk_crtc, false);
+
+ /* wait for planes to be disabled by CMDQ */
+ wait_event_timeout(mtk_crtc->cb_blocking_queue,
+ mtk_crtc->cmdq_vblank_cnt == 0,
+ msecs_to_jiffies(500));
+#endif
+}
+
void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
struct drm_atomic_state *state)
{
@@ -930,7 +963,8 @@ static int mtk_crtc_init_comp_planes(struct drm_device *drm_dev,
mtk_ddp_comp_supported_rotations(comp),
mtk_ddp_comp_get_blend_modes(comp),
mtk_ddp_comp_get_formats(comp),
- mtk_ddp_comp_get_num_formats(comp), i);
+ mtk_ddp_comp_get_num_formats(comp),
+ mtk_ddp_comp_is_afbc_supported(comp), i);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.h b/drivers/gpu/drm/mediatek/mtk_crtc.h
index 388e900b6f4d..828f109b83e7 100644
--- a/drivers/gpu/drm/mediatek/mtk_crtc.h
+++ b/drivers/gpu/drm/mediatek/mtk_crtc.h
@@ -21,6 +21,7 @@ int mtk_crtc_create(struct drm_device *drm_dev, const unsigned int *path,
unsigned int num_conn_routes);
int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
struct mtk_plane_state *state);
+void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane);
void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane,
struct drm_atomic_state *plane_state);
struct device *mtk_crtc_dma_dev_get(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
index edc6417639e6..ac6620e10262 100644
--- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c
@@ -366,6 +366,7 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = {
.get_blend_modes = mtk_ovl_get_blend_modes,
.get_formats = mtk_ovl_get_formats,
.get_num_formats = mtk_ovl_get_num_formats,
+ .is_afbc_supported = mtk_ovl_is_afbc_supported,
};
static const struct mtk_ddp_comp_funcs ddp_postmask = {
diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
index 39720b27f4e9..7289b3dcf22f 100644
--- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h
@@ -83,6 +83,7 @@ struct mtk_ddp_comp_funcs {
u32 (*get_blend_modes)(struct device *dev);
const u32 *(*get_formats)(struct device *dev);
size_t (*get_num_formats)(struct device *dev);
+ bool (*is_afbc_supported)(struct device *dev);
void (*connect)(struct device *dev, struct device *mmsys_dev, unsigned int next);
void (*disconnect)(struct device *dev, struct device *mmsys_dev, unsigned int next);
void (*add)(struct device *dev, struct mtk_mutex *mutex);
@@ -294,6 +295,14 @@ size_t mtk_ddp_comp_get_num_formats(struct mtk_ddp_comp *comp)
return 0;
}
+static inline bool mtk_ddp_comp_is_afbc_supported(struct mtk_ddp_comp *comp)
+{
+ if (comp->funcs && comp->funcs->is_afbc_supported)
+ return comp->funcs->is_afbc_supported(comp->dev);
+
+ return false;
+}
+
static inline bool mtk_ddp_comp_add(struct mtk_ddp_comp *comp, struct mtk_mutex *mutex)
{
if (comp->funcs && comp->funcs->add) {
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
index 04217a36939c..679d413bf10b 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
@@ -106,6 +106,7 @@ void mtk_ovl_disable_vblank(struct device *dev);
u32 mtk_ovl_get_blend_modes(struct device *dev);
const u32 *mtk_ovl_get_formats(struct device *dev);
size_t mtk_ovl_get_num_formats(struct device *dev);
+bool mtk_ovl_is_afbc_supported(struct device *dev);
void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex);
void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index d0581c4e3c99..e0236353d499 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -236,6 +236,13 @@ size_t mtk_ovl_get_num_formats(struct device *dev)
return ovl->data->num_formats;
}
+bool mtk_ovl_is_afbc_supported(struct device *dev)
+{
+ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+
+ return ovl->data->supports_afbc;
+}
+
int mtk_ovl_clk_enable(struct device *dev)
{
struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 6fb85bc6487a..a2fdceadf209 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -1095,7 +1095,6 @@ static const u32 mt8183_output_fmts[] = {
};
static const u32 mt8195_dpi_output_fmts[] = {
- MEDIA_BUS_FMT_BGR888_1X24,
MEDIA_BUS_FMT_RGB888_1X24,
MEDIA_BUS_FMT_RGB888_2X12_LE,
MEDIA_BUS_FMT_RGB888_2X12_BE,
@@ -1103,18 +1102,19 @@ static const u32 mt8195_dpi_output_fmts[] = {
MEDIA_BUS_FMT_YUYV8_1X16,
MEDIA_BUS_FMT_YUYV10_1X20,
MEDIA_BUS_FMT_YUYV12_1X24,
+ MEDIA_BUS_FMT_BGR888_1X24,
MEDIA_BUS_FMT_YUV8_1X24,
MEDIA_BUS_FMT_YUV10_1X30,
};
static const u32 mt8195_dp_intf_output_fmts[] = {
- MEDIA_BUS_FMT_BGR888_1X24,
MEDIA_BUS_FMT_RGB888_1X24,
MEDIA_BUS_FMT_RGB888_2X12_LE,
MEDIA_BUS_FMT_RGB888_2X12_BE,
MEDIA_BUS_FMT_RGB101010_1X30,
MEDIA_BUS_FMT_YUYV8_1X16,
MEDIA_BUS_FMT_YUYV10_1X20,
+ MEDIA_BUS_FMT_BGR888_1X24,
MEDIA_BUS_FMT_YUV8_1X24,
MEDIA_BUS_FMT_YUV10_1X30,
};
diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c
index 655106bbb76d..cbc4f37da8ba 100644
--- a/drivers/gpu/drm/mediatek/mtk_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_plane.c
@@ -285,9 +285,14 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane,
struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
plane);
struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state);
+ struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+ plane);
+
mtk_plane_state->pending.enable = false;
wmb(); /* Make sure the above parameter is set before update */
mtk_plane_state->pending.dirty = true;
+
+ mtk_crtc_plane_disable(old_state->crtc, plane);
}
static void mtk_plane_atomic_update(struct drm_plane *plane,
@@ -321,7 +326,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = {
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
unsigned long possible_crtcs, enum drm_plane_type type,
unsigned int supported_rotations, const u32 blend_modes,
- const u32 *formats, size_t num_formats, unsigned int plane_idx)
+ const u32 *formats, size_t num_formats,
+ bool supports_afbc, unsigned int plane_idx)
{
int err;
@@ -332,7 +338,9 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
err = drm_universal_plane_init(dev, plane, possible_crtcs,
&mtk_plane_funcs, formats,
- num_formats, modifiers, type, NULL);
+ num_formats,
+ supports_afbc ? modifiers : NULL,
+ type, NULL);
if (err) {
DRM_ERROR("failed to initialize plane\n");
return err;
diff --git a/drivers/gpu/drm/mediatek/mtk_plane.h b/drivers/gpu/drm/mediatek/mtk_plane.h
index 3b13b89989c7..95c5fa5295d8 100644
--- a/drivers/gpu/drm/mediatek/mtk_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_plane.h
@@ -49,5 +49,6 @@ to_mtk_plane_state(struct drm_plane_state *state)
int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
unsigned long possible_crtcs, enum drm_plane_type type,
unsigned int supported_rotations, const u32 blend_modes,
- const u32 *formats, size_t num_formats, unsigned int plane_idx);
+ const u32 *formats, size_t num_formats,
+ bool supports_afbc, unsigned int plane_idx);
#endif
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index 47136bbbe8c6..ab08d690d882 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -109,7 +109,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
venc_freq /= 2;
dev_dbg(priv->dev,
- "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n",
+ "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n",
phy_freq, vclk_freq, venc_freq, hdmi_freq,
priv->venc.hdmi_use_enci);
diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
index 3325580d885d..dfe0c28a0f05 100644
--- a/drivers/gpu/drm/meson/meson_vclk.c
+++ b/drivers/gpu/drm/meson/meson_vclk.c
@@ -110,10 +110,7 @@
#define HDMI_PLL_LOCK BIT(31)
#define HDMI_PLL_LOCK_G12A (3 << 30)
-#define PIXEL_FREQ_1000_1001(_freq) \
- DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL)
-#define PHY_FREQ_1000_1001(_freq) \
- (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10)
+#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL)
/* VID PLL Dividers */
enum {
@@ -772,6 +769,36 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv,
pll_freq);
}
+static bool meson_vclk_freqs_are_matching_param(unsigned int idx,
+ unsigned long long phy_freq,
+ unsigned long long vclk_freq)
+{
+ DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n",
+ idx, params[idx].vclk_freq,
+ FREQ_1000_1001(params[idx].vclk_freq));
+ DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n",
+ idx, params[idx].phy_freq,
+ FREQ_1000_1001(params[idx].phy_freq));
+
+ /* Match strict frequency */
+ if (phy_freq == params[idx].phy_freq &&
+ vclk_freq == params[idx].vclk_freq)
+ return true;
+
+ /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz
+ * (drm EDID is defined in 1kHz steps, so everything smaller must be
+ * rounding error) and the PHY freq deviation has to be less than
+ * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything
+ * smaller must be rounding error as well).
+ */
+ if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 &&
+ abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000)
+ return true;
+
+ /* no match */
+ return false;
+}
+
enum drm_mode_status
meson_vclk_vic_supported_freq(struct meson_drm *priv,
unsigned long long phy_freq,
@@ -790,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv,
}
for (i = 0 ; params[i].pixel_freq ; ++i) {
- DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n",
- i, params[i].pixel_freq,
- PIXEL_FREQ_1000_1001(params[i].pixel_freq));
- DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n",
- i, params[i].phy_freq,
- PHY_FREQ_1000_1001(params[i].phy_freq));
- /* Match strict frequency */
- if (phy_freq == params[i].phy_freq &&
- vclk_freq == params[i].vclk_freq)
- return MODE_OK;
- /* Match 1000/1001 variant */
- if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) &&
- vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq))
+ if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq))
return MODE_OK;
}
@@ -1075,10 +1090,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
}
for (freq = 0 ; params[freq].pixel_freq ; ++freq) {
- if ((phy_freq == params[freq].phy_freq ||
- phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) &&
- (vclk_freq == params[freq].vclk_freq ||
- vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) {
+ if (meson_vclk_freqs_are_matching_param(freq, phy_freq,
+ vclk_freq)) {
if (vclk_freq != params[freq].vclk_freq)
vic_alternate_clock = true;
else
diff --git a/drivers/gpu/drm/mgag200/mgag200_ddc.c b/drivers/gpu/drm/mgag200/mgag200_ddc.c
index 6d81ea8931e8..c31673eaa554 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ddc.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ddc.c
@@ -26,7 +26,6 @@
* Authors: Dave Airlie <airlied@redhat.com>
*/
-#include <linux/export.h>
#include <linux/i2c-algo-bit.h>
#include <linux/i2c.h>
#include <linux/pci.h>
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c
index 39641551eeb6..4280f71e472a 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c
@@ -71,10 +71,6 @@ static int a2xx_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len)
return 0;
}
-static void a2xx_gpummu_resume_translation(struct msm_mmu *mmu)
-{
-}
-
static void a2xx_gpummu_destroy(struct msm_mmu *mmu)
{
struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu);
@@ -90,7 +86,6 @@ static const struct msm_mmu_funcs funcs = {
.map = a2xx_gpummu_map,
.unmap = a2xx_gpummu_unmap,
.destroy = a2xx_gpummu_destroy,
- .resume_translation = a2xx_gpummu_resume_translation,
};
struct msm_mmu *a2xx_gpummu_new(struct device *dev, struct msm_gpu *gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 650e5bac225f..60aef0796236 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -131,6 +131,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
ring->cur_ctx_seqno = 0;
a5xx_submit_in_rb(gpu, submit);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index bf3758f010f4..491fde0083a2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -130,6 +130,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno - 1);
+
+ OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
+ OUT_RING(ring, CP_SET_THREAD_BOTH);
+
+ /* Reset state used to synchronize BR and BV */
+ OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
+ OUT_RING(ring,
+ CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
+ CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
+ CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
+ CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
+
+ OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
+ OUT_RING(ring, CP_SET_THREAD_BR);
}
if (!sysprof) {
@@ -212,6 +226,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
a6xx_set_pagetable(a6xx_gpu, ring, submit);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
@@ -335,6 +351,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
/*
* Toggle concurrent binning for pagetable switch and set the thread to
* BR since only it can execute the pagetable switch packets.
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index f5e1490d07c1..16e7ac444efd 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -137,9 +137,8 @@ err_disable_rpm:
return NULL;
}
-static int find_chipid(struct device *dev, uint32_t *chipid)
+static int find_chipid(struct device_node *node, uint32_t *chipid)
{
- struct device_node *node = dev->of_node;
const char *compat;
int ret;
@@ -173,15 +172,36 @@ static int find_chipid(struct device *dev, uint32_t *chipid)
/* and if that fails, fall back to legacy "qcom,chipid" property: */
ret = of_property_read_u32(node, "qcom,chipid", chipid);
if (ret) {
- DRM_DEV_ERROR(dev, "could not parse qcom,chipid: %d\n", ret);
+ DRM_ERROR("%pOF: could not parse qcom,chipid: %d\n",
+ node, ret);
return ret;
}
- dev_warn(dev, "Using legacy qcom,chipid binding!\n");
+ pr_warn("%pOF: Using legacy qcom,chipid binding!\n", node);
return 0;
}
+bool adreno_has_gpu(struct device_node *node)
+{
+ const struct adreno_info *info;
+ uint32_t chip_id;
+ int ret;
+
+ ret = find_chipid(node, &chip_id);
+ if (ret)
+ return false;
+
+ info = adreno_info(chip_id);
+ if (!info) {
+ pr_warn("%pOF: Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n",
+ node, ADRENO_CHIPID_ARGS(chip_id));
+ return false;
+ }
+
+ return true;
+}
+
static int adreno_bind(struct device *dev, struct device *master, void *data)
{
static struct adreno_platform_config config = {};
@@ -191,19 +211,18 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
struct msm_gpu *gpu;
int ret;
- ret = find_chipid(dev, &config.chip_id);
- if (ret)
+ ret = find_chipid(dev->of_node, &config.chip_id);
+ /* We shouldn't have gotten this far if we can't parse the chip_id */
+ if (WARN_ON(ret))
return ret;
dev->platform_data = &config;
priv->gpu_pdev = to_platform_device(dev);
info = adreno_info(config.chip_id);
- if (!info) {
- dev_warn(drm->dev, "Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n",
- ADRENO_CHIPID_ARGS(config.chip_id));
+ /* We shouldn't have gotten this far if we don't recognize the GPU: */
+ if (WARN_ON(!info))
return -ENXIO;
- }
config.info = info;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2348ffb35f7e..86bff915c3e7 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -259,24 +259,54 @@ u64 adreno_private_address_space_size(struct msm_gpu *gpu)
return BIT(ttbr1_cfg->ias) - ADRENO_VM_START;
}
+void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu)
+{
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct msm_drm_private *priv = gpu->dev->dev_private;
+ unsigned long flags;
+
+ /*
+ * Wait until the cooldown period has passed and we would actually
+ * collect a crashdump to re-enable stall-on-fault.
+ */
+ spin_lock_irqsave(&priv->fault_stall_lock, flags);
+ if (!priv->stall_enabled &&
+ ktime_after(ktime_get(), priv->stall_reenable_time) &&
+ !READ_ONCE(gpu->crashstate)) {
+ priv->stall_enabled = true;
+
+ gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true);
+ }
+ spin_unlock_irqrestore(&priv->fault_stall_lock, flags);
+}
+
#define ARM_SMMU_FSR_TF BIT(1)
#define ARM_SMMU_FSR_PF BIT(3)
#define ARM_SMMU_FSR_EF BIT(4)
+#define ARM_SMMU_FSR_SS BIT(30)
int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
struct adreno_smmu_fault_info *info, const char *block,
u32 scratch[4])
{
+ struct msm_drm_private *priv = gpu->dev->dev_private;
const char *type = "UNKNOWN";
- bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
+ bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) &&
+ !READ_ONCE(gpu->crashstate);
+ unsigned long irq_flags;
/*
- * If we aren't going to be resuming later from fault_worker, then do
- * it now.
+ * In case there is a subsequent storm of pagefaults, disable
+ * stall-on-fault for at least half a second.
*/
- if (!do_devcoredump) {
- gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
+ spin_lock_irqsave(&priv->fault_stall_lock, irq_flags);
+ if (priv->stall_enabled) {
+ priv->stall_enabled = false;
+
+ gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false);
}
+ priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500);
+ spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags);
/*
* Print a default message if we couldn't get the data from the
@@ -304,16 +334,18 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
scratch[0], scratch[1], scratch[2], scratch[3]);
if (do_devcoredump) {
+ struct msm_gpu_fault_info fault_info = {};
+
/* Turn off the hangcheck timer to keep it from bothering us */
timer_delete(&gpu->hangcheck_timer);
- gpu->fault_info.ttbr0 = info->ttbr0;
- gpu->fault_info.iova = iova;
- gpu->fault_info.flags = flags;
- gpu->fault_info.type = type;
- gpu->fault_info.block = block;
+ fault_info.ttbr0 = info->ttbr0;
+ fault_info.iova = iova;
+ fault_info.flags = flags;
+ fault_info.type = type;
+ fault_info.block = block;
- kthread_queue_work(gpu->worker, &gpu->fault_work);
+ msm_gpu_fault_crashstate_capture(gpu, &fault_info);
}
return 0;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index a8f4bf416e64..bc063594a359 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -636,6 +636,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
struct adreno_smmu_fault_info *info, const char *block,
u32 scratch[4]);
+void adreno_check_and_reenable_stall(struct adreno_gpu *gpu);
+
int adreno_read_speedbin(struct device *dev, u32 *speedbin);
/*
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 8a618841e3ea..1c468ca5d692 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -94,17 +94,21 @@ static void drm_mode_to_intf_timing_params(
timing->vsync_polarity = 0;
}
- /* for DP/EDP, Shift timings to align it to bottom right */
- if (phys_enc->hw_intf->cap->type == INTF_DP) {
+ timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent);
+ timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent);
+
+ /*
+ * For DP/EDP, Shift timings to align it to bottom right.
+ * wide_bus_en is set for everything excluding SDM845 &
+ * porch changes cause DisplayPort failure and HDMI tearing.
+ */
+ if (phys_enc->hw_intf->cap->type == INTF_DP && timing->wide_bus_en) {
timing->h_back_porch += timing->h_front_porch;
timing->h_front_porch = 0;
timing->v_back_porch += timing->v_front_porch;
timing->v_front_porch = 0;
}
- timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent);
- timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent);
-
/*
* for DP, divide the horizonal parameters by 2 when
* widebus is enabled
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 386c4669c831..a48e6db4f156 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -128,6 +128,11 @@ static const struct msm_dp_desc msm_dp_desc_sa8775p[] = {
{}
};
+static const struct msm_dp_desc msm_dp_desc_sdm845[] = {
+ { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0 },
+ {}
+};
+
static const struct msm_dp_desc msm_dp_desc_sc7180[] = {
{ .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true },
{}
@@ -180,7 +185,7 @@ static const struct of_device_id msm_dp_dt_match[] = {
{ .compatible = "qcom,sc8180x-edp", .data = &msm_dp_desc_sc8180x },
{ .compatible = "qcom,sc8280xp-dp", .data = &msm_dp_desc_sc8280xp },
{ .compatible = "qcom,sc8280xp-edp", .data = &msm_dp_desc_sc8280xp },
- { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sc7180 },
+ { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sdm845 },
{ .compatible = "qcom,sm8350-dp", .data = &msm_dp_desc_sc7180 },
{ .compatible = "qcom,sm8650-dp", .data = &msm_dp_desc_sm8650 },
{ .compatible = "qcom,x1e80100-dp", .data = &msm_dp_desc_x1e80100 },
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
index 9812b4d69197..af2e30f3f842 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
@@ -704,6 +704,13 @@ static int dsi_pll_10nm_init(struct msm_dsi_phy *phy)
/* TODO: Remove this when we have proper display handover support */
msm_dsi_phy_pll_save_state(phy);
+ /*
+ * Store also proper vco_current_rate, because its value will be used in
+ * dsi_10nm_pll_restore_state().
+ */
+ if (!dsi_pll_10nm_vco_recalc_rate(&pll_10nm->clk_hw, VCO_REF_CLK_RATE))
+ pll_10nm->vco_current_rate = pll_10nm->phy->cfg->min_pll_rate;
+
return 0;
}
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 7ab607252d18..6af72162cda4 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -208,6 +208,35 @@ DEFINE_DEBUGFS_ATTRIBUTE(shrink_fops,
shrink_get, shrink_set,
"0x%08llx\n");
+/*
+ * Return the number of microseconds to wait until stall-on-fault is
+ * re-enabled. If 0 then it is already enabled or will be re-enabled on the
+ * next submit (unless there's a leftover devcoredump). This is useful for
+ * kernel tests that intentionally produce a fault and check the devcoredump to
+ * wait until the cooldown period is over.
+ */
+
+static int
+stall_reenable_time_get(void *data, u64 *val)
+{
+ struct msm_drm_private *priv = data;
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&priv->fault_stall_lock, irq_flags);
+
+ if (priv->stall_enabled)
+ *val = 0;
+ else
+ *val = max(ktime_us_delta(priv->stall_reenable_time, ktime_get()), 0);
+
+ spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(stall_reenable_time_fops,
+ stall_reenable_time_get, NULL,
+ "%lld\n");
static int msm_gem_show(struct seq_file *m, void *arg)
{
@@ -319,6 +348,9 @@ static void msm_debugfs_gpu_init(struct drm_minor *minor)
debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root,
&priv->disable_err_irq);
+ debugfs_create_file("stall_reenable_time_us", 0400, minor->debugfs_root,
+ priv, &stall_reenable_time_fops);
+
gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root);
debugfs_create_bool("idle_clamp",0600, gpu_devfreq,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f316e6776f67..d007687c2446 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -245,6 +245,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock);
+ /* Initialize stall-on-fault */
+ spin_lock_init(&priv->fault_stall_lock);
+ priv->stall_enabled = true;
+
/* Teach lockdep about lock ordering wrt. shrinker: */
fs_reclaim_acquire(GFP_KERNEL);
might_lock(&priv->lru.lock);
@@ -926,7 +930,7 @@ static const struct drm_driver msm_driver = {
* is no external component that we need to add since LVDS is within MDP4
* itself.
*/
-static int add_components_mdp(struct device *master_dev,
+static int add_mdp_components(struct device *master_dev,
struct component_match **matchptr)
{
struct device_node *np = master_dev->of_node;
@@ -1030,7 +1034,7 @@ static int add_gpu_components(struct device *dev,
if (!np)
return 0;
- if (of_device_is_available(np))
+ if (of_device_is_available(np) && adreno_has_gpu(np))
drm_of_component_match_add(dev, matchptr, component_compare_of, np);
of_node_put(np);
@@ -1071,7 +1075,7 @@ int msm_drv_probe(struct device *master_dev,
/* Add mdp components if we have KMS. */
if (kms_init) {
- ret = add_components_mdp(master_dev, &match);
+ ret = add_mdp_components(master_dev, &match);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index a65077855201..c8afb1ea6040 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -222,6 +222,29 @@ struct msm_drm_private {
* the sw hangcheck mechanism.
*/
bool disable_err_irq;
+
+ /**
+ * @fault_stall_lock:
+ *
+ * Serialize changes to stall-on-fault state.
+ */
+ spinlock_t fault_stall_lock;
+
+ /**
+ * @fault_stall_reenable_time:
+ *
+ * If stall_enabled is false, when to reenable stall-on-fault.
+ * Protected by @fault_stall_lock.
+ */
+ ktime_t stall_reenable_time;
+
+ /**
+ * @stall_enabled:
+ *
+ * Whether stall-on-fault is currently enabled. Protected by
+ * @fault_stall_lock.
+ */
+ bool stall_enabled;
};
const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 3e9aa2cc38ef..d4f71bb54e84 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref)
container_of(kref, struct msm_gem_submit, ref);
unsigned i;
+ /*
+ * In error paths, we could unref the submit without calling
+ * drm_sched_entity_push_job(), so msm_job_free() will never
+ * get called. Since drm_sched_job_cleanup() will NULL out
+ * s_fence, we can use that to detect this case.
+ */
+ if (submit->base.s_fence)
+ drm_sched_job_cleanup(&submit->base);
+
if (submit->fence_id) {
spin_lock(&submit->queue->idr_lock);
idr_remove(&submit->queue->fence_idr, submit->fence_id);
@@ -649,6 +658,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_ringbuffer *ring;
struct msm_submit_post_dep *post_deps = NULL;
struct drm_syncobj **syncobjs_to_reset = NULL;
+ struct sync_file *sync_file = NULL;
int out_fence_fd = -1;
unsigned i;
int ret;
@@ -858,7 +868,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
}
if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
- struct sync_file *sync_file = sync_file_create(submit->user_fence);
+ sync_file = sync_file_create(submit->user_fence);
if (!sync_file) {
ret = -ENOMEM;
} else {
@@ -892,8 +902,11 @@ out:
out_unlock:
mutex_unlock(&queue->lock);
out_post_unlock:
- if (ret && (out_fence_fd >= 0))
+ if (ret && (out_fence_fd >= 0)) {
put_unused_fd(out_fence_fd);
+ if (sync_file)
+ fput(sync_file->file);
+ }
if (!IS_ERR_OR_NULL(submit)) {
msm_gem_submit_put(submit);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 197871fdf508..3947f7ba1421 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -257,7 +257,8 @@ out:
}
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
- struct msm_gem_submit *submit, char *comm, char *cmd)
+ struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info,
+ char *comm, char *cmd)
{
struct msm_gpu_state *state;
@@ -276,7 +277,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
/* Fill in the additional crash state information */
state->comm = kstrdup(comm, GFP_KERNEL);
state->cmd = kstrdup(cmd, GFP_KERNEL);
- state->fault_info = gpu->fault_info;
+ if (fault_info)
+ state->fault_info = *fault_info;
if (submit) {
int i;
@@ -308,7 +310,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
}
#else
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
- struct msm_gem_submit *submit, char *comm, char *cmd)
+ struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info,
+ char *comm, char *cmd)
{
}
#endif
@@ -405,7 +408,7 @@ static void recover_worker(struct kthread_work *work)
/* Record the crash state */
pm_runtime_get_sync(&gpu->pdev->dev);
- msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
+ msm_gpu_crashstate_capture(gpu, submit, NULL, comm, cmd);
kfree(cmd);
kfree(comm);
@@ -459,9 +462,8 @@ out_unlock:
msm_gpu_retire(gpu);
}
-static void fault_worker(struct kthread_work *work)
+void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info)
{
- struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work);
struct msm_gem_submit *submit;
struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
char *comm = NULL, *cmd = NULL;
@@ -484,16 +486,13 @@ static void fault_worker(struct kthread_work *work)
/* Record the crash state */
pm_runtime_get_sync(&gpu->pdev->dev);
- msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
+ msm_gpu_crashstate_capture(gpu, submit, fault_info, comm, cmd);
pm_runtime_put_sync(&gpu->pdev->dev);
kfree(cmd);
kfree(comm);
resume_smmu:
- memset(&gpu->fault_info, 0, sizeof(gpu->fault_info));
- gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
-
mutex_unlock(&gpu->lock);
}
@@ -882,7 +881,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
init_waitqueue_head(&gpu->retire_event);
kthread_init_work(&gpu->retire_work, retire_worker);
kthread_init_work(&gpu->recover_work, recover_worker);
- kthread_init_work(&gpu->fault_work, fault_worker);
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index e25009150579..5bf7cd985b9c 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -253,12 +253,6 @@ struct msm_gpu {
#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
struct timer_list hangcheck_timer;
- /* Fault info for most recent iova fault: */
- struct msm_gpu_fault_info fault_info;
-
- /* work for handling GPU ioval faults: */
- struct kthread_work fault_work;
-
/* work for handling GPU recovery: */
struct kthread_work recover_work;
@@ -668,6 +662,7 @@ msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *ta
void msm_gpu_cleanup(struct msm_gpu *gpu);
struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
+bool adreno_has_gpu(struct device_node *node);
void __init adreno_register(void);
void __exit adreno_unregister(void);
@@ -705,6 +700,8 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
mutex_unlock(&gpu->lock);
}
+void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info);
+
/*
* Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can
* support expanded privileges
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index fd73dcd3f30e..739ce2c283a4 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -345,7 +345,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev
unsigned long iova, int flags, void *arg)
{
struct msm_iommu *iommu = arg;
- struct msm_mmu *mmu = &iommu->base;
struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev);
struct adreno_smmu_fault_info info, *ptr = NULL;
@@ -359,9 +358,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev
pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags);
- if (mmu->funcs->resume_translation)
- mmu->funcs->resume_translation(mmu);
-
return 0;
}
@@ -376,12 +372,12 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de
return -ENOSYS;
}
-static void msm_iommu_resume_translation(struct msm_mmu *mmu)
+static void msm_iommu_set_stall(struct msm_mmu *mmu, bool enable)
{
struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev);
- if (adreno_smmu->resume_translation)
- adreno_smmu->resume_translation(adreno_smmu->cookie, true);
+ if (adreno_smmu->set_stall)
+ adreno_smmu->set_stall(adreno_smmu->cookie, enable);
}
static void msm_iommu_detach(struct msm_mmu *mmu)
@@ -431,7 +427,7 @@ static const struct msm_mmu_funcs funcs = {
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
.destroy = msm_iommu_destroy,
- .resume_translation = msm_iommu_resume_translation,
+ .set_stall = msm_iommu_set_stall,
};
struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks)
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index daf91529e02b..0c694907140d 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -15,7 +15,7 @@ struct msm_mmu_funcs {
size_t len, int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
void (*destroy)(struct msm_mmu *mmu);
- void (*resume_translation)(struct msm_mmu *mmu);
+ void (*set_stall)(struct msm_mmu *mmu, bool enable);
};
enum msm_mmu_type {
diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
index 5a6ae9fc3194..462713401622 100644
--- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
@@ -2255,7 +2255,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="0" name="0">
<bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/>
<bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/>
- <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/>
+ <bitfield name="CLEAR_BV_BR_COUNTER" pos="2" type="boolean"/>
+ <bitfield name="RESET_GLOBAL_LOCAL_TS" pos="3" type="boolean"/>
</reg32>
</domain>
diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py
index 3926485bb197..a409404627c7 100644
--- a/drivers/gpu/drm/msm/registers/gen_header.py
+++ b/drivers/gpu/drm/msm/registers/gen_header.py
@@ -11,6 +11,7 @@ import collections
import argparse
import time
import datetime
+import re
class Error(Exception):
def __init__(self, message):
@@ -877,13 +878,14 @@ The rules-ng-ng source files this header was generated from are:
""")
maxlen = 0
for filepath in p.xml_files:
- maxlen = max(maxlen, len(filepath))
+ new_filepath = re.sub("^.+drivers","drivers",filepath)
+ maxlen = max(maxlen, len(new_filepath))
for filepath in p.xml_files:
- pad = " " * (maxlen - len(filepath))
+ pad = " " * (maxlen - len(new_filepath))
filesize = str(os.path.getsize(filepath))
filesize = " " * (7 - len(filesize)) + filesize
filetime = time.ctime(os.path.getmtime(filepath))
- print("- " + filepath + pad + " (" + filesize + " bytes, from " + filetime + ")")
+ print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)")
if p.copyright_year:
current_year = str(datetime.date.today().year)
print()
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index d47442125fa1..9aae26eb7d8f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -42,7 +42,7 @@
#include "nouveau_acpi.h"
static struct ida bl_ida;
-#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0'
+#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0'
static bool
nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 200e65a7cefc..c7869a639bef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm)
drm->debugfs = NULL;
}
-int
+void
nouveau_module_debugfs_init(void)
{
nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL);
- if (IS_ERR(nouveau_debugfs_root))
- return PTR_ERR(nouveau_debugfs_root);
-
- return 0;
}
void
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
index b7617b344ee2..d05ed0e641c4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
@@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *);
extern struct dentry *nouveau_debugfs_root;
-int nouveau_module_debugfs_init(void);
+void nouveau_module_debugfs_init(void);
void nouveau_module_debugfs_fini(void);
#else
static inline void
@@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm)
{
}
-static inline int
+static inline void
nouveau_module_debugfs_init(void)
{
- return 0;
}
static inline void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 0c82a63cd49d..1527b801f013 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1461,9 +1461,7 @@ nouveau_drm_init(void)
if (!nouveau_modeset)
return 0;
- ret = nouveau_module_debugfs_init();
- if (ret)
- return ret;
+ nouveau_module_debugfs_init();
#ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
platform_driver_register(&nouveau_platform_driver);
diff --git a/drivers/gpu/drm/nouveau/nvif/chan.c b/drivers/gpu/drm/nouveau/nvif/chan.c
index baa10227d51a..80c01017d642 100644
--- a/drivers/gpu/drm/nouveau/nvif/chan.c
+++ b/drivers/gpu/drm/nouveau/nvif/chan.c
@@ -39,6 +39,9 @@ nvif_chan_gpfifo_post(struct nvif_chan *chan)
const u32 pbptr = (chan->push.cur - map) + chan->func->gpfifo.post_size;
const u32 gpptr = (chan->gpfifo.cur + 1) & chan->gpfifo.max;
+ if (!chan->func->gpfifo.post)
+ return 0;
+
return chan->func->gpfifo.post(chan, gpptr, pbptr);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
index baf42339f93e..588cb4ab85cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
@@ -719,7 +719,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = 4,
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
caps->status = 0xffff;
@@ -727,17 +726,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a)))
return;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
caps->status = 0;
caps->optimusCaps = *(u32 *)obj->buffer.pointer;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
@@ -754,24 +758,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = sizeof(caps),
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
jt->status = 0xffff;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
jt->status = 0;
jt->jtCaps = *(u32 *)obj->buffer.pointer;
jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20;
jt->bSBIOSCaps = 0;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
@@ -1744,6 +1752,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt);
return ret;
}
+
+ /*
+ * TODO: Debug the GSP firmware / RPC handling to find out why
+ * without this Turing (but none of the other architectures)
+ * ends up resetting all channels after resume.
+ */
+ msleep(50);
}
ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c
index 5acb98d137bd..9d06ff722fea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c
@@ -637,12 +637,18 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
if (payload_size > max_payload_size) {
const u32 fn = rpc->function;
u32 remain_payload_size = payload_size;
+ void *next;
- /* Adjust length, and send initial RPC. */
- rpc->length = sizeof(*rpc) + max_payload_size;
- msg->checksum = rpc->length;
+ /* Send initial RPC. */
+ next = r535_gsp_rpc_get(gsp, fn, max_payload_size);
+ if (IS_ERR(next)) {
+ repv = next;
+ goto done;
+ }
- repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
+ memcpy(next, payload, max_payload_size);
+
+ repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
if (IS_ERR(repv))
goto done;
@@ -653,7 +659,6 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
while (remain_payload_size) {
u32 size = min(remain_payload_size,
max_payload_size);
- void *next;
next = r535_gsp_rpc_get(gsp, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size);
if (IS_ERR(next)) {
@@ -674,6 +679,8 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
/* Wait for reply. */
repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size +
sizeof(*rpc));
+ if (!IS_ERR(repv))
+ kvfree(msg);
} else {
repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c
index 52f2e5f14517..f25ea610cd99 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c
@@ -121,7 +121,7 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external)
page_shift -= desc->bits;
ctrl->levels[i].physAddress = pd->pt[0]->addr;
- ctrl->levels[i].size = (1 << desc->bits) * desc->size;
+ ctrl->levels[i].size = BIT_ULL(desc->bits) * desc->size;
ctrl->levels[i].aperture = 1;
ctrl->levels[i].pageShift = page_shift;
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 0a3b26bb4d73..9f81fa960b46 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -26,6 +26,7 @@
#include <linux/i2c.h>
#include <linux/media-bus-format.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -136,6 +137,14 @@ struct panel_desc {
int connector_type;
};
+struct panel_desc_dsi {
+ struct panel_desc desc;
+
+ unsigned long flags;
+ enum mipi_dsi_pixel_format format;
+ unsigned int lanes;
+};
+
struct panel_simple {
struct drm_panel base;
@@ -430,10 +439,7 @@ static const struct drm_panel_funcs panel_simple_funcs = {
.get_timings = panel_simple_get_timings,
};
-static struct panel_desc panel_dpi;
-
-static int panel_dpi_probe(struct device *dev,
- struct panel_simple *panel)
+static struct panel_desc *panel_dpi_probe(struct device *dev)
{
struct display_timing *timing;
const struct device_node *np;
@@ -445,17 +451,17 @@ static int panel_dpi_probe(struct device *dev,
np = dev->of_node;
desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
if (!desc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL);
if (!timing)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ret = of_get_display_timing(np, "panel-timing", timing);
if (ret < 0) {
dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n",
np);
- return ret;
+ return ERR_PTR(ret);
}
desc->timings = timing;
@@ -473,9 +479,7 @@ static int panel_dpi_probe(struct device *dev,
/* We do not know the connector for the DT node, so guess it */
desc->connector_type = DRM_MODE_CONNECTOR_DPI;
- panel->desc = desc;
-
- return 0;
+ return desc;
}
#define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \
@@ -570,8 +574,44 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev,
return 0;
}
-static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
+static const struct panel_desc *panel_simple_get_desc(struct device *dev)
{
+ if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) &&
+ dev_is_mipi_dsi(dev)) {
+ const struct panel_desc_dsi *dsi_desc;
+
+ dsi_desc = of_device_get_match_data(dev);
+ if (!dsi_desc)
+ return ERR_PTR(-ENODEV);
+
+ return &dsi_desc->desc;
+ }
+
+ if (dev_is_platform(dev)) {
+ const struct panel_desc *desc;
+
+ desc = of_device_get_match_data(dev);
+ if (!desc) {
+ /*
+ * panel-dpi probes without a descriptor and
+ * panel_dpi_probe() will initialize one for us
+ * based on the device tree.
+ */
+ if (of_device_is_compatible(dev->of_node, "panel-dpi"))
+ return panel_dpi_probe(dev);
+ else
+ return ERR_PTR(-ENODEV);
+ }
+
+ return desc;
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static struct panel_simple *panel_simple_probe(struct device *dev)
+{
+ const struct panel_desc *desc;
struct panel_simple *panel;
struct display_timing dt;
struct device_node *ddc;
@@ -579,27 +619,31 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
u32 bus_flags;
int err;
+ desc = panel_simple_get_desc(dev);
+ if (IS_ERR(desc))
+ return ERR_CAST(desc);
+
panel = devm_drm_panel_alloc(dev, struct panel_simple, base,
&panel_simple_funcs, desc->connector_type);
if (IS_ERR(panel))
- return PTR_ERR(panel);
+ return ERR_CAST(panel);
panel->desc = desc;
panel->supply = devm_regulator_get(dev, "power");
if (IS_ERR(panel->supply))
- return PTR_ERR(panel->supply);
+ return ERR_CAST(panel->supply);
panel->enable_gpio = devm_gpiod_get_optional(dev, "enable",
GPIOD_OUT_LOW);
if (IS_ERR(panel->enable_gpio))
- return dev_err_probe(dev, PTR_ERR(panel->enable_gpio),
- "failed to request GPIO\n");
+ return dev_err_cast_probe(dev, panel->enable_gpio,
+ "failed to request GPIO\n");
err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation);
if (err) {
dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err);
- return err;
+ return ERR_PTR(err);
}
ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0);
@@ -608,19 +652,12 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
of_node_put(ddc);
if (!panel->ddc)
- return -EPROBE_DEFER;
+ return ERR_PTR(-EPROBE_DEFER);
}
- if (desc == &panel_dpi) {
- /* Handle the generic panel-dpi binding */
- err = panel_dpi_probe(dev, panel);
- if (err)
- goto free_ddc;
- desc = panel->desc;
- } else {
- if (!of_get_display_timing(dev->of_node, "panel-timing", &dt))
- panel_simple_parse_panel_timing_node(dev, panel, &dt);
- }
+ if (!of_device_is_compatible(dev->of_node, "panel-dpi") &&
+ !of_get_display_timing(dev->of_node, "panel-timing", &dt))
+ panel_simple_parse_panel_timing_node(dev, panel, &dt);
if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) {
/* Optional data-mapping property for overriding bus format */
@@ -703,7 +740,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
drm_panel_add(&panel->base);
- return 0;
+ return panel;
disable_pm_runtime:
pm_runtime_dont_use_autosuspend(dev);
@@ -712,7 +749,7 @@ free_ddc:
if (panel->ddc)
put_device(&panel->ddc->dev);
- return err;
+ return ERR_PTR(err);
}
static void panel_simple_shutdown(struct device *dev)
@@ -5367,7 +5404,12 @@ static const struct of_device_id platform_of_match[] = {
}, {
/* Must be the last entry */
.compatible = "panel-dpi",
- .data = &panel_dpi,
+
+ /*
+ * Explicitly NULL, the panel_desc structure will be
+ * allocated by panel_dpi_probe().
+ */
+ .data = NULL,
}, {
/* sentinel */
}
@@ -5376,13 +5418,13 @@ MODULE_DEVICE_TABLE(of, platform_of_match);
static int panel_simple_platform_probe(struct platform_device *pdev)
{
- const struct panel_desc *desc;
+ struct panel_simple *panel;
- desc = of_device_get_match_data(&pdev->dev);
- if (!desc)
- return -ENODEV;
+ panel = panel_simple_probe(&pdev->dev);
+ if (IS_ERR(panel))
+ return PTR_ERR(panel);
- return panel_simple_probe(&pdev->dev, desc);
+ return 0;
}
static void panel_simple_platform_remove(struct platform_device *pdev)
@@ -5412,14 +5454,6 @@ static struct platform_driver panel_simple_platform_driver = {
.shutdown = panel_simple_platform_shutdown,
};
-struct panel_desc_dsi {
- struct panel_desc desc;
-
- unsigned long flags;
- enum mipi_dsi_pixel_format format;
- unsigned int lanes;
-};
-
static const struct drm_display_mode auo_b080uan01_mode = {
.clock = 154500,
.hdisplay = 1200,
@@ -5653,16 +5687,14 @@ MODULE_DEVICE_TABLE(of, dsi_of_match);
static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi)
{
const struct panel_desc_dsi *desc;
+ struct panel_simple *panel;
int err;
- desc = of_device_get_match_data(&dsi->dev);
- if (!desc)
- return -ENODEV;
-
- err = panel_simple_probe(&dsi->dev, &desc->desc);
- if (err < 0)
- return err;
+ panel = panel_simple_probe(&dsi->dev);
+ if (IS_ERR(panel))
+ return PTR_ERR(panel);
+ desc = container_of(panel->desc, struct panel_desc_dsi, desc);
dsi->mode_flags = desc->flags;
dsi->format = desc->format;
dsi->lanes = desc->lanes;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 5657106c2f7d..15e2d505550f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -841,7 +841,6 @@ int panfrost_job_init(struct panfrost_device *pfdev)
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
.credit_limit = 2,
.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
- .timeout_wq = pfdev->reset.wq,
.name = "pan_js",
.dev = pfdev->dev,
};
@@ -879,6 +878,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
if (!pfdev->reset.wq)
return -ENOMEM;
+ args.timeout_wq = pfdev->reset.wq;
for (j = 0; j < NUM_JOB_SLOTS; j++) {
js->queue[j].fence_context = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index bbd39348a7ab..7a3e510327b7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -26,7 +26,6 @@
* Jerome Glisse
*/
-#include <linux/console.h>
#include <linux/efi.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
@@ -1635,11 +1634,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
pci_set_power_state(pdev, PCI_D3hot);
}
- if (notify_clients) {
- console_lock();
- drm_client_dev_suspend(dev, true);
- console_unlock();
- }
+ if (notify_clients)
+ drm_client_dev_suspend(dev, false);
+
return 0;
}
@@ -1661,17 +1658,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (notify_clients) {
- console_lock();
- }
if (resume) {
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
- if (pci_enable_device(pdev)) {
- if (notify_clients)
- console_unlock();
+ if (pci_enable_device(pdev))
return -1;
- }
}
/* resume AGP if in use */
radeon_agp_resume(rdev);
@@ -1747,10 +1738,8 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients)
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
radeon_pm_compute_clocks(rdev);
- if (notify_clients) {
- drm_client_dev_resume(dev, true);
- console_unlock();
- }
+ if (notify_clients)
+ drm_client_dev_resume(dev, false);
return 0;
}
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index e671aa241720..ac678de7fe5e 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -355,17 +355,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity)
}
EXPORT_SYMBOL(drm_sched_entity_destroy);
-/* drm_sched_entity_clear_dep - callback to clear the entities dependency */
-static void drm_sched_entity_clear_dep(struct dma_fence *f,
- struct dma_fence_cb *cb)
-{
- struct drm_sched_entity *entity =
- container_of(cb, struct drm_sched_entity, cb);
-
- entity->dependency = NULL;
- dma_fence_put(f);
-}
-
/*
* drm_sched_entity_wakeup - callback to clear the entity's dependency and
* wake up the scheduler
@@ -376,7 +365,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
struct drm_sched_entity *entity =
container_of(cb, struct drm_sched_entity, cb);
- drm_sched_entity_clear_dep(f, cb);
+ entity->dependency = NULL;
+ dma_fence_put(f);
drm_sched_wakeup(entity->rq->sched);
}
@@ -429,13 +419,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
fence = dma_fence_get(&s_fence->scheduled);
dma_fence_put(entity->dependency);
entity->dependency = fence;
- if (!dma_fence_add_callback(fence, &entity->cb,
- drm_sched_entity_clear_dep))
- return true;
-
- /* Ignore it when it is already scheduled */
- dma_fence_put(fence);
- return false;
}
if (!dma_fence_add_callback(entity->dependency, &entity->cb,
diff --git a/drivers/gpu/drm/sitronix/Kconfig b/drivers/gpu/drm/sitronix/Kconfig
index c069d0d41775..741d1bb4b83f 100644
--- a/drivers/gpu/drm/sitronix/Kconfig
+++ b/drivers/gpu/drm/sitronix/Kconfig
@@ -5,6 +5,7 @@ config DRM_ST7571_I2C
select DRM_GEM_SHMEM_HELPER
select DRM_KMS_HELPER
select REGMAP_I2C
+ select VIDEOMODE_HELPERS
help
DRM driver for Sitronix ST7571 panels controlled over I2C.
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index dd2006d51c7a..eec43d1a5595 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array)
static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array)
{
- unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH);
+ unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH);
unsigned int height = ssd130x->height;
memset(data_array, 0, columns * height);
diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c
index 4d62c78e7d1e..f7532db3831f 100644
--- a/drivers/gpu/drm/sysfb/vesadrm.c
+++ b/drivers/gpu/drm/sysfb/vesadrm.c
@@ -362,14 +362,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv,
if (!__screen_info_vbe_mode_nonvga(si)) {
vesa->cmap_write = vesadrm_vga_cmap_write;
-#if defined(CONFIG_X86_32)
} else {
+#if defined(CONFIG_X86_32)
phys_addr_t pmi_base = __screen_info_vesapm_info_base(si);
- const u16 *pmi_addr = phys_to_virt(pmi_base);
- vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2];
- vesa->cmap_write = vesadrm_pmi_cmap_write;
+ if (pmi_base) {
+ const u16 *pmi_addr = phys_to_virt(pmi_base);
+
+ vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2];
+ vesa->cmap_write = vesadrm_pmi_cmap_write;
+ } else
#endif
+ if (format->is_color_indexed)
+ drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n");
}
#ifdef CONFIG_X86
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 2d9a0a3f6c38..7a38664e890e 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
if (!client->group) {
virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL);
-
- err = dma_mapping_error(nvdec->dev, iova);
- if (err < 0)
- return err;
+ if (!virt)
+ return -ENOMEM;
} else {
virt = tegra_drm_alloc(tegra, size, &iova);
if (IS_ERR(virt))
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 15cab9bda17f..bd90404ea609 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
+ ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+ if (ret) {
+ dma_resv_unlock(&fbo->base.base._resv);
+ kfree(fbo);
+ return ret;
+ }
+
if (fbo->base.resource) {
ttm_resource_set_bo(fbo->base.resource, &fbo->base);
bo->resource = NULL;
@@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.bulk_move = NULL;
}
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
- if (ret) {
- kfree(fbo);
- return ret;
- }
-
ttm_bo_get(bo);
fbo->bo = bo;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b51f0b648a08..411e47702f8a 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -101,6 +101,12 @@ enum v3d_gen {
V3D_GEN_71 = 71,
};
+enum v3d_irq {
+ V3D_CORE_IRQ,
+ V3D_HUB_IRQ,
+ V3D_MAX_IRQS,
+};
+
struct v3d_dev {
struct drm_device drm;
@@ -112,6 +118,8 @@ struct v3d_dev {
bool single_irq_line;
+ int irq[V3D_MAX_IRQS];
+
struct v3d_perfmon_info perfmon_info;
void __iomem *hub_regs;
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index d7d16da78db3..37bf5eecdd2c 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d)
if (false)
v3d_idle_axi(v3d, 0);
+ v3d_irq_disable(v3d);
+
v3d_idle_gca(v3d);
v3d_reset_sms(v3d);
v3d_reset_v3d(v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c
index 4c5e18590a5c..8ec6ed82b3d9 100644
--- a/drivers/gpu/drm/v3d/v3d_gemfs.c
+++ b/drivers/gpu/drm/v3d/v3d_gemfs.c
@@ -3,14 +3,21 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include "v3d_drv.h"
+static int add_param(struct fs_context *fc, const char *key, const char *val)
+{
+ return vfs_parse_fs_string(fc, key, val, strlen(val));
+}
+
void v3d_gemfs_init(struct v3d_dev *v3d)
{
- char huge_opt[] = "huge=within_size";
struct file_system_type *type;
+ struct fs_context *fc;
struct vfsmount *gemfs;
+ int ret;
/*
* By creating our own shmemfs mountpoint, we can pass in
@@ -28,8 +35,16 @@ void v3d_gemfs_init(struct v3d_dev *v3d)
if (!type)
goto err;
- gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
- if (IS_ERR(gemfs))
+ fc = fs_context_for_mount(type, SB_KERNMOUNT);
+ if (IS_ERR(fc))
+ goto err;
+ ret = add_param(fc, "source", "tmpfs");
+ if (!ret)
+ ret = add_param(fc, "huge", "within_size");
+ if (!ret)
+ gemfs = fc_mount_longterm(fc);
+ put_fs_context(fc);
+ if (ret)
goto err;
v3d->gemfs = gemfs;
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 2cca5d3a26a2..a515a301e480 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -260,7 +260,7 @@ v3d_hub_irq(int irq, void *arg)
int
v3d_irq_init(struct v3d_dev *v3d)
{
- int irq1, ret, core;
+ int irq, ret, core;
INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work);
@@ -271,17 +271,24 @@ v3d_irq_init(struct v3d_dev *v3d)
V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver));
- irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
- if (irq1 == -EPROBE_DEFER)
- return irq1;
- if (irq1 > 0) {
- ret = devm_request_irq(v3d->drm.dev, irq1,
+ irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1);
+ if (irq == -EPROBE_DEFER)
+ return irq;
+ if (irq > 0) {
+ v3d->irq[V3D_CORE_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ],
v3d_irq, IRQF_SHARED,
"v3d_core0", v3d);
if (ret)
goto fail;
- ret = devm_request_irq(v3d->drm.dev,
- platform_get_irq(v3d_to_pdev(v3d), 0),
+
+ irq = platform_get_irq(v3d_to_pdev(v3d), 0);
+ if (irq < 0)
+ return irq;
+ v3d->irq[V3D_HUB_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ],
v3d_hub_irq, IRQF_SHARED,
"v3d_hub", v3d);
if (ret)
@@ -289,8 +296,12 @@ v3d_irq_init(struct v3d_dev *v3d)
} else {
v3d->single_irq_line = true;
- ret = devm_request_irq(v3d->drm.dev,
- platform_get_irq(v3d_to_pdev(v3d), 0),
+ irq = platform_get_irq(v3d_to_pdev(v3d), 0);
+ if (irq < 0)
+ return irq;
+ v3d->irq[V3D_CORE_IRQ] = irq;
+
+ ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ],
v3d_irq, IRQF_SHARED,
"v3d", v3d);
if (ret)
@@ -331,6 +342,12 @@ v3d_irq_disable(struct v3d_dev *v3d)
V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0);
V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0);
+ /* Finish any interrupt handler still in flight. */
+ for (int i = 0; i < V3D_MAX_IRQS; i++) {
+ if (v3d->irq[i])
+ synchronize_irq(v3d->irq[i]);
+ }
+
/* Clear any pending interrupts we might have left. */
for (core = 0; core < v3d->cores; core++)
V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver));
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 35f131a46d07..42df9d3567e7 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -199,7 +199,6 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_dev *v3d = job->v3d;
struct v3d_file_priv *file = job->file->driver_priv;
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
- struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
unsigned long flags;
@@ -209,7 +208,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
else
preempt_disable();
- v3d_stats_update(local_stats, now);
+ /* Don't update the local stats if the file context has already closed */
+ if (file)
+ v3d_stats_update(&file->stats[queue], now);
+ else
+ drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n");
+
v3d_stats_update(global_stats, now);
if (IS_ENABLED(CONFIG_LOCKDEP))
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index a29a6ef266f9..163d092bd973 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -560,12 +560,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev,
if (ret)
return ret;
- ret = drm_connector_hdmi_audio_init(connector, dev->dev,
- &vc4_hdmi_audio_funcs,
- 8, false, -1);
- if (ret)
- return ret;
-
drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs);
/*
@@ -2291,6 +2285,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
return ret;
}
+ ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev,
+ &vc4_hdmi_audio_funcs, 8, false,
+ -1);
+ if (ret)
+ return ret;
+
dai_link->cpus = &vc4_hdmi->audio.cpu;
dai_link->codecs = &vc4_hdmi->audio.codec;
dai_link->platforms = &vc4_hdmi->audio.platform;
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 1118a0250279..ce49282198cb 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -204,15 +204,16 @@ static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj)
{
struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
struct virtio_gpu_device *vgdev = obj->dev->dev_private;
+ struct dma_buf_attachment *attach = obj->import_attach;
if (drm_gem_is_imported(obj)) {
- struct dma_buf *dmabuf = obj->dma_buf;
+ struct dma_buf *dmabuf = attach->dmabuf;
dma_resv_lock(dmabuf->resv, NULL);
virtgpu_dma_buf_unmap(bo);
dma_resv_unlock(dmabuf->resv);
- dma_buf_detach(dmabuf, obj->import_attach);
+ dma_buf_detach(dmabuf, attach);
dma_buf_put(dmabuf);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 0695a342b1ef..5205552b1970 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -749,7 +749,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev,
dev->fifo_mem = devm_memremap(dev->drm.dev,
fifo_start,
fifo_size,
- MEMREMAP_WB);
+ MEMREMAP_WB | MEMREMAP_DEC);
if (IS_ERR(dev->fifo_mem)) {
drm_err(&dev->drm,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index c55382167c1b..e417921af584 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -85,10 +85,10 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map)
int ret;
if (drm_gem_is_imported(obj)) {
- ret = dma_buf_vmap(obj->dma_buf, map);
+ ret = dma_buf_vmap(obj->import_attach->dmabuf, map);
if (!ret) {
if (drm_WARN_ON(obj->dev, map->is_iomem)) {
- dma_buf_vunmap(obj->dma_buf, map);
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
return -EIO;
}
}
@@ -102,7 +102,7 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map)
static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
{
if (drm_gem_is_imported(obj))
- dma_buf_vunmap(obj->dma_buf, map);
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
else
drm_gem_ttm_vunmap(obj, map);
}
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index fcc2677a4229..99a91355842e 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
- tristate "Intel Xe Graphics"
- depends on DRM && PCI && (m || (y && KUNIT=y))
+ tristate "Intel Xe2 Graphics"
+ depends on DRM && PCI
+ depends on KUNIT || !KUNIT
depends on INTEL_VSEC || !INTEL_VSEC
depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
select INTERVAL_TREE
@@ -46,7 +47,8 @@ config DRM_XE
select AUXILIARY_BUS
select HMM_MIRROR
help
- Experimental driver for Intel Xe series GPUs
+ Driver for Intel Xe2 series GPUs and later. Experimental support
+ for Xe series is also available.
If "M" is selected, the module will be called xe.
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 68f064f33d4b..9f4ade25787a 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe)
spin_lock_init(&xe->display.fb_tracking.lock);
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+ if (!xe->display.hotplug.dp_wq)
+ return -ENOMEM;
return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f95375451e2f..9f941fc2e36b 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
- xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
- xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
@@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
/*
* The memory barrier here is to ensure coherency of DSB vs MMIO,
* both for weak ordering archs and discrete cards.
*/
- xe_device_wmb(dsb_buf->vma->bo->tile->xe);
+ xe_device_wmb(xe);
+ xe_device_l2_flush(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index d918ae1c8061..55259969480b 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
vma->dpt = dpt;
vma->node = dpt->ggtt_node[tile0->id];
+
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return 0;
}
@@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
- /* Ensure DPT writes are flushed */
- xe_device_l2_flush(xe);
return vma;
err_unpin:
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
index 5394a1373a6b..ef2bf984723f 100644
--- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -40,6 +40,7 @@
#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
#define PWR_LIM_VAL REG_GENMASK(14, 0)
#define PWR_LIM_EN REG_BIT(15)
+#define PWR_LIM REG_GENMASK(15, 0)
#define PWR_LIM_TIME REG_GENMASK(23, 17)
#define PWR_LIM_TIME_X REG_GENMASK(23, 22)
#define PWR_LIM_TIME_Y REG_GENMASK(21, 17)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 7a8af2311318..11e60d687572 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
+/**
+ * xe_devcoredump_read() - Read data from the Xe device coredump snapshot
+ * @buffer: Destination buffer to copy the coredump data into
+ * @offset: Offset in the coredump data to start reading from
+ * @count: Number of bytes to read
+ * @data: Pointer to the xe_devcoredump structure
+ * @datalen: Length of the data (unused)
+ *
+ * Reads a chunk of the coredump snapshot data into the provided buffer.
+ * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX),
+ * it is read directly from a pre-written buffer. For larger devcoredumps,
+ * the pre-written buffer must be periodically repopulated from the snapshot
+ * state due to kmalloc size limitations.
+ *
+ * Return: Number of bytes copied on success, or a negative error code on failure.
+ */
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_devcoredump_snapshot *ss;
- ssize_t byte_copied;
+ ssize_t byte_copied = 0;
u32 chunk_offset;
ssize_t new_chunk_position;
+ bool pm_needed = false;
+ int ret = 0;
if (!coredump)
return -ENODEV;
@@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX;
+ if (pm_needed)
xe_pm_runtime_get(gt_to_xe(ss->gt));
mutex_lock(&coredump->lock);
if (!ss->read.buffer) {
- mutex_unlock(&coredump->lock);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unlock;
}
- if (offset >= ss->read.size) {
- mutex_unlock(&coredump->lock);
- return 0;
- }
+ if (offset >= ss->read.size)
+ goto unlock;
new_chunk_position = div_u64_rem(offset,
XE_DEVCOREDUMP_CHUNK_MAX,
@@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
+unlock:
mutex_unlock(&coredump->lock);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ if (pm_needed)
xe_pm_runtime_put(gt_to_xe(ss->gt));
- return byte_copied;
+ return byte_copied ? byte_copied : ret;
}
static void xe_devcoredump_free(void *data)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index c02c4c4e9412..e9f3c1a53db2 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -40,6 +40,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
@@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe)
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
-/**
- * xe_device_td_flush() - Flush transient L3 cache entries
- * @xe: The device
- *
- * Display engine has direct access to memory and is never coherent with L3/L4
- * caches (or CPU caches), however KMD is responsible for specifically flushing
- * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
- * can happen from such a surface without seeing corruption.
- *
- * Display surfaces can be tagged as transient by mapping it using one of the
- * various L3:XD PAT index modes on Xe2.
- *
- * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
- * at the end of each submission via PIPE_CONTROL for compute/render, since SA
- * Media is not coherent with L3 and we want to support render-vs-media
- * usescases. For other engines like copy/blt the HW internally forces uncached
- * behaviour, hence why we can skip the TDF on such platforms.
+/*
+ * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
*/
-void xe_device_td_flush(struct xe_device *xe)
+static void tdf_request_sync(struct xe_device *xe)
{
- struct xe_gt *gt;
unsigned int fw_ref;
+ struct xe_gt *gt;
u8 id;
- if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
- return;
-
- if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
- xe_device_l2_flush(xe);
- return;
- }
-
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
+
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *root_gt;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ root_gt = xe_root_mmio_gt(xe);
+ if (XE_WA(root_gt, 16023588340)) {
+ /* A transient flush is not sufficient: flush the L2 */
+ xe_device_l2_flush(xe);
+ } else {
+ xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
+ tdf_request_sync(xe);
+ xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
+ }
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
index d61650d4aa0b..95242a375e54 100644
--- a/drivers/gpu/drm/xe/xe_drv.h
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -9,7 +9,7 @@
#include <drm/drm_drv.h>
#define DRIVER_NAME "xe"
-#define DRIVER_DESC "Intel Xe Graphics"
+#define DRIVER_DESC "Intel Xe2 Graphics"
/* Interface history:
*
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 7062115909f2..2c799958c1e4 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
.ggtt_set_pte = xe_ggtt_set_pte_and_flush,
};
+static void dev_fini_ggtt(void *arg)
+{
+ struct xe_ggtt *ggtt = arg;
+
+ drain_workqueue(ggtt->wq);
+}
+
/**
* xe_ggtt_init_early - Early GGTT initialization
* @ggtt: the &xe_ggtt to be initialized
@@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
if (err)
return err;
+ err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
+ if (err)
+ return err;
+
if (IS_SRIOV_VF(xe)) {
err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0));
if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0e5d243c9451..e3517ce2e18c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -118,7 +118,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
}
- xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
@@ -417,6 +417,8 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
+ xe_mocs_init_early(gt);
+
return 0;
}
@@ -630,17 +632,15 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
- err = xe_gt_pagefault_init(gt);
+ err = xe_gt_sysfs_init(gt);
if (err)
return err;
- xe_mocs_init_early(gt);
-
- err = xe_gt_sysfs_init(gt);
+ err = gt_fw_domain_init(gt);
if (err)
return err;
- err = gt_fw_domain_init(gt);
+ err = xe_gt_pagefault_init(gt);
if (err)
return err;
@@ -839,6 +839,9 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
}
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_stop_prepare(gt);
+
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
xe_gt_pagefault_reset(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 187fa6490eaf..6357325f3939 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -24,7 +24,7 @@
extern struct fault_attr gt_reset_failure;
static inline bool xe_fault_inject_gt_reset(void)
{
- return should_fail(&gt_reset_failure, 1);
+ return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&gt_reset_failure, 1);
}
struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 10622ca471a2..6717a636b1d9 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
#define PF_MULTIPLIER 8
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+ pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw);
#undef PF_MULTIPLIER
pf_queue->gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index c08efca6420e..35489fa81825 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
pf_clear_vf_scratch_regs(gt, vfid);
}
+static void pf_cancel_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (cancel_work_sync(&gt->sriov.pf.workers.restart))
+ xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n");
+}
+
+/**
+ * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on the PF.
+ */
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+ pf_cancel_restart(gt);
+}
+
static void pf_restart(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f474509411c0..e2b2ff8132dc 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
int xe_gt_sriov_pf_init(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
@@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
+static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+}
+
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 2420a548cacc..53a44702c04a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -2364,6 +2364,21 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
return err;
}
+static int pf_push_self_config(struct xe_gt *gt)
+{
+ int err;
+
+ err = pf_push_full_vf_config(gt, PFID);
+ if (err) {
+ xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n");
+ return 0;
+}
+
static void fini_config(void *arg)
{
struct xe_gt *gt = arg;
@@ -2387,9 +2402,17 @@ static void fini_config(void *arg)
int xe_gt_sriov_pf_config_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ int err;
xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (err)
+ return err;
+
return devm_add_action_or_reset(xe->drm.dev, fini_config, gt);
}
@@ -2407,6 +2430,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
unsigned int fail = 0, skip = 0;
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
for (n = 1; n <= total_vfs; n++) {
if (xe_gt_sriov_pf_config_is_empty(gt, n))
skip++;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 084cbdeba8ea..e1362e608146 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
int pending_seqno;
/*
+ * we can get here before the CTs are even initialized if we're wedging
+ * very early, in which case there are not going to be any pending
+ * fences so we can bail immediately.
+ */
+ if (!xe_guc_ct_initialized(&gt->uc.guc.ct))
+ return;
+
+ /*
* CT channel is already disabled at this point. No new TLB requests can
* appear.
*/
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 2447de0ebedf..bbcbb348256f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -34,6 +34,11 @@
#include "xe_pm.h"
#include "xe_trace_guc.h"
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
/* Internal states, not error conditions */
@@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+ ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
}
-static void receive_g2h(struct xe_guc_ct *ct);
-static void g2h_worker_func(struct work_struct *w);
-static void safe_mode_worker_func(struct work_struct *w);
-
static void primelockdep(struct xe_guc_ct *ct)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
@@ -514,6 +516,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct)
*/
void xe_guc_ct_stop(struct xe_guc_ct *ct)
{
+ if (!xe_guc_ct_initialized(ct))
+ return;
+
xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
stop_g2h_handler(ct);
}
@@ -760,7 +765,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
u16 seqno;
int ret;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
xe_gt_assert(gt, !g2h_len || !g2h_fence);
xe_gt_assert(gt, !num_g2h || !g2h_fence);
xe_gt_assert(gt, !g2h_len || num_g2h);
@@ -1344,7 +1349,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
u32 action;
u32 *hxg;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
lockdep_assert_held(&ct->fast_lock);
if (ct->state == XE_GUC_CT_STATE_DISABLED)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 82c4ae458dda..582aac106469 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -22,6 +22,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
+static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
+{
+ return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED;
+}
+
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
return ct->state == XE_GUC_CT_STATE_ENABLED;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 18c623992035..c0ca61695d76 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -5,8 +5,11 @@
#include "xe_guc_pc.h"
+#include <linux/cleanup.h>
#include <linux/delay.h>
+#include <linux/jiffies.h>
#include <linux/ktime.h>
+#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
@@ -51,9 +54,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define BMG_MIN_FREQ 1200
+#define BMG_MERT_FLUSH_FREQ_CAP 2600
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
return -ETIMEDOUT;
}
+static int wait_for_flush_complete(struct xe_guc_pc *pc)
+{
+ const unsigned long timeout = msecs_to_jiffies(30);
+
+ if (!wait_var_event_timeout(&pc->flush_freq_limit,
+ !atomic_read(&pc->flush_freq_limit),
+ timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
+{
+ int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
+ int slept, wait = 10;
+
+ for (slept = 0; slept < timeout_us;) {
+ if (xe_guc_pc_get_act_freq(pc) <= freq)
+ return 0;
+
+ usleep_range(wait, wait << 1);
+ slept += wait;
+ wait <<= 1;
+ if (slept + wait > timeout_us)
+ wait = timeout_us - slept;
+ }
+
+ return -ETIMEDOUT;
+}
static int pc_action_reset(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
return pc->rpn_freq;
}
+static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
+ int ret;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ return ret;
+
+ *freq = pc_get_min_freq(pc);
+
+ return 0;
+}
+
/**
* xe_guc_pc_get_min_freq - Get the min operational frequency
* @pc: The GuC PC
@@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- xe_device_assert_mem_access(pc_to_xe(pc));
+ lockdep_assert_held(&pc->freq_lock);
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
- ret = pc_action_query_task_state(pc);
+ ret = pc_set_min_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_min_freq(pc);
+ pc->user_requested_min = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -596,24 +653,28 @@ out:
*/
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_set_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_set_min_freq(pc, freq);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
if (ret)
- goto out;
+ return ret;
- pc->user_requested_min = freq;
+ *freq = pc_get_max_freq(pc);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -626,24 +687,28 @@ out:
*/
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_max_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_action_query_task_state(pc);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_set_max_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_max_freq(pc);
+ pc->user_requested_max = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -657,24 +722,14 @@ out:
*/
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
- int ret;
-
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
+ if (XE_WA(pc_to_gt(pc), 22019338487)) {
+ if (wait_for_flush_complete(pc) != 0)
+ return -EAGAIN;
}
- ret = pc_set_max_freq(pc, freq);
- if (ret)
- goto out;
-
- pc->user_requested_max = freq;
+ guard(mutex)(&pc->freq_lock);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return xe_guc_pc_set_max_freq_locked(pc, freq);
}
/**
@@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
{
+ struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
int ret;
lockdep_assert_held(&pc->freq_lock);
@@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
if (pc_get_min_freq(pc) > pc->rp0_freq)
ret = pc_set_min_freq(pc, pc->rp0_freq);
+ if (XE_WA(tile->primary_gt, 14022085890))
+ ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
+
out:
return ret;
}
@@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
-static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
- int ret = 0;
+ struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(pc_to_gt(pc), 22019338487)) {
- /*
- * Get updated min/max and stash them.
- */
- ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
- if (!ret)
- ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
- if (ret)
- return ret;
+ return XE_WA(gt, 22019338487) &&
+ pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
+}
+
+/**
+ * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
+ * @pc: the xe_guc_pc object
+ *
+ * As per the WA, reduce max GT frequency during L2 cache flush
+ */
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 max_freq;
+ int ret;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ guard(mutex)(&pc->freq_lock);
+
+ ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
+ if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
+ ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret) {
+ xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+ return;
+ }
+
+ atomic_set(&pc->flush_freq_limit, 1);
/*
- * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ * If user has previously changed max freq, stash that value to
+ * restore later, otherwise use the current max. New user
+ * requests wait on flush.
*/
- mutex_lock(&pc->freq_lock);
- ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
- if (!ret)
- ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
- mutex_unlock(&pc->freq_lock);
+ if (pc->user_requested_max != 0)
+ pc->stashed_max_freq = pc->user_requested_max;
+ else
+ pc->stashed_max_freq = max_freq;
}
+ /*
+ * Wait for actual freq to go below the flush cap: even if the previous
+ * max was below cap, the current one might still be above it
+ */
+ ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret)
+ xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+}
+
+/**
+ * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
+ * @pc: the xe_guc_pc object
+ *
+ * Retrieve the previous GT max frequency value.
+ */
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ if (!atomic_read(&pc->flush_freq_limit))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
+ if (ret)
+ xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
+ pc->stashed_max_freq, ret);
+
+ atomic_set(&pc->flush_freq_limit, 0);
+ mutex_unlock(&pc->freq_lock);
+ wake_up_var(&pc->flush_freq_limit);
+}
+
+static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ if (!XE_WA(pc_to_gt(pc), 22019338487))
+ return 0;
+
+ guard(mutex)(&pc->freq_lock);
+
+ /*
+ * Get updated min/max and stash them.
+ */
+ ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
+ if (!ret)
+ ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ */
+ ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
+ if (!ret)
+ ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
+
return ret;
}
@@ -1068,7 +1214,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
goto out;
}
- memset(pc->bo->vmap.vaddr, 0, size);
+ xe_map_memset(xe, &pc->bo->vmap, 0, 0, size);
slpc_shared_data_write(pc, header.size, size);
earlier = ktime_get();
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 0a2664d5c811..52ecdd5ddbff 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 2978ac9a249b..c02053948a57 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -15,6 +15,8 @@
struct xe_guc_pc {
/** @bo: GGTT buffer object that is shared with GuC PC */
struct xe_bo *bo;
+ /** @flush_freq_limit: 1 when max freq changes are limited by driver */
+ atomic_t flush_freq_limit;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
/** @rpa_freq: HW RPa frequency - The Achievable one */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 6d84a52b660a..2ac87ff4a057 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_gpu_scheduler *sched = &ge->sched;
- bool wedged;
+ bool wedged = false;
xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
@@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
pid_t pid = -1;
int i = 0;
- bool wedged, skip_timeout_check;
+ bool wedged = false, skip_timeout_check;
/*
* TDR has fired before free job worker. Common if exec queue
@@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
* genuine timeouts.
*/
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Engine state now stable, disable scheduling to check timestamp */
if (!wedged && exec_queue_registered(q)) {
@@ -1762,6 +1764,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
+ if (!guc->submission_state.initialized)
+ return 0;
+
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 74f31639b37f..f008e8049700 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -159,8 +159,8 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att
return ret;
}
-static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel,
- u32 uval)
+static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel,
+ u32 clr, u32 set)
{
struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
u32 val0, val1;
@@ -179,7 +179,7 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at
channel, val0, val1, ret);
if (attr == PL1_HWMON_ATTR)
- val0 = uval;
+ val0 = (val0 & ~clr) | set;
else
return -EIO;
@@ -339,7 +339,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
if (hwmon->xe->info.has_mbx_power_limits) {
drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n",
PWR_ATTR_TO_STR(attr), channel);
- xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0);
xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val);
} else {
reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0);
@@ -370,10 +370,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
}
if (hwmon->xe->info.has_mbx_power_limits)
- ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val);
+ ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val);
else
- reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL,
- reg_val);
+ reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val);
unlock:
mutex_unlock(&hwmon->hwmon_lock);
return ret;
@@ -563,14 +562,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
mutex_lock(&hwmon->hwmon_lock);
- if (hwmon->xe->info.has_mbx_power_limits) {
- ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r);
- r = (r & ~PWR_LIM_TIME) | rxy;
- xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r);
- } else {
+ if (hwmon->xe->info.has_mbx_power_limits)
+ xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy);
+ else
r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel),
PWR_LIM_TIME, rxy);
- }
mutex_unlock(&hwmon->hwmon_lock);
@@ -1138,12 +1134,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
} else {
drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n");
/* Write default limits to read from pcode from now on. */
- xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR,
- CHANNEL_CARD,
- hwmon->pl1_on_boot[CHANNEL_CARD]);
- xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR,
- CHANNEL_PKG,
- hwmon->pl1_on_boot[CHANNEL_PKG]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_CARD]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_PKG]);
hwmon->scl_shift_power = PWR_UNIT;
hwmon->scl_shift_energy = ENERGY_UNIT;
hwmon->scl_shift_time = TIME_UNIT;
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 63db66df064b..023ed6a6b49d 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
}
lmtt_assert(lmtt, xe_bo_is_vram(bo));
+ lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE));
+
+ xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size);
pt->level = level;
pt->bo = bo;
@@ -91,6 +94,9 @@ out:
static void lmtt_pt_free(struct xe_lmtt_pt *pt)
{
+ lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n",
+ pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE));
+
xe_bo_unpin_map_no_vm(pt->bo);
kfree(pt);
}
@@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
switch (lmtt->ops->lmtt_pte_size(level)) {
case sizeof(u32):
+ lmtt_assert(lmtt, !overflows_type(pte, u32));
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
break;
case sizeof(u64):
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
break;
default:
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 63d74e27f54c..6e7b70532d11 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -40,6 +40,7 @@
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
+#define LRC_WA_BB_SIZE SZ_4K
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
@@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
{
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
xe_bo_unpin_map_no_vm(lrc->bo);
- xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+}
+
+static size_t wa_bb_offset(struct xe_lrc *lrc)
+{
+ return lrc->bo->size - LRC_WA_BB_SIZE;
}
/*
@@ -941,11 +946,19 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
* store it in the PPHSWP.
*/
#define CONTEXT_ACTIVE 1ULL
-static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
{
- u32 *cmd;
+ const size_t max_size = LRC_WA_BB_SIZE;
+ u32 *cmd, *buf = NULL;
- cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+ if (lrc->bo->vmap.is_iomem) {
+ buf = kmalloc(max_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cmd = buf;
+ } else {
+ cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
+ }
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
*cmd++ = ENGINE_ID(0).addr;
@@ -966,9 +979,17 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
*cmd++ = MI_BATCH_BUFFER_END;
- xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
- xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+ if (buf) {
+ xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
+ wa_bb_offset(lrc), buf,
+ (cmd - buf) * sizeof(*cmd));
+ kfree(buf);
+ }
+
+ xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
+ wa_bb_offset(lrc) + 1);
+ return 0;
}
#define PVC_CTX_ASID (0x2e + 1)
@@ -1004,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
- lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
+ lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
+ lrc_size + LRC_WA_BB_SIZE,
ttm_bo_type_kernel,
bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
- lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
- ttm_bo_type_kernel,
- bo_flags);
- if (IS_ERR(lrc->bb_per_ctx_bo)) {
- err = PTR_ERR(lrc->bb_per_ctx_bo);
- goto err_lrc_finish;
- }
-
lrc->size = lrc_size;
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
@@ -1125,7 +1139,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
- xe_lrc_setup_utilization(lrc);
+ err = xe_lrc_setup_utilization(lrc);
+ if (err)
+ goto err_lrc_finish;
return 0;
@@ -1803,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+ snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
+ LRC_WA_BB_SIZE;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index ae24cf6f8dd9..883e550a9423 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -53,9 +53,6 @@ struct xe_lrc {
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u64 ctx_timestamp;
-
- /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
- struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 8f8e9fdfb2a8..07a5161c7d5b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -82,7 +82,7 @@ struct xe_migrate {
* of the instruction. Subtracting the instruction header (1 dword) and
* address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
*/
-#define MAX_PTE_PER_SDI 0x1FE
+#define MAX_PTE_PER_SDI 0x1FEU
/**
* xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
@@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat,
&src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
@@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size)
u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+
/*
* MI_STORE_DATA_IMM command is used to update page table. Each
- * instruction can update maximumly 0x1ff pte entries. To update
- * n (n <= 0x1ff) pte entries, we need:
- * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
- * 2 dword for the page table's physical location
- * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+ * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To
+ * update n (n <= MAX_PTE_PER_SDI) pte entries, we need:
+ *
+ * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+ * - 2 dword for the page table's physical location
+ * - 2*n dword for value of pte to fill (each pte entry is 2 dwords)
*/
- num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
+ num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI);
num_dword += entries * 2;
return num_dword;
@@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) {
- u32 chunk = min(0x1ffU, ptes);
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = pt_offset;
@@ -1815,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
xe_bo_assert_held(bo);
/* Use bounce buffer for small access and unaligned access */
- if (len & XE_CACHELINE_MASK ||
- ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) {
+ if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
+ !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
int buf_offset = 0;
/*
@@ -1846,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
err = xe_migrate_access_memory(m, bo,
offset & ~XE_CACHELINE_MASK,
(void *)ptr,
- sizeof(bounce), 0);
+ sizeof(bounce), write);
if (err)
return err;
} else {
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e4742e27e2cd..da6793c2f991 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -20,7 +20,7 @@
struct xe_modparam xe_modparam = {
.probe_display = true,
- .guc_log_level = 3,
+ .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
.svm_notifier_size = 512,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index ac4beaed58ff..278af53c74dc 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -140,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
.has_flat_ccs = 1, \
- .has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.has_64bit_timestamp = 1, \
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index ff749edc005b..ad263de44111 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe)
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- goto err_pxp;
+ goto err_display;
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
@@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe)
err_display:
xe_display_pm_resume(xe);
-err_pxp:
xe_pxp_pm_resume(xe->pxp);
err:
drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
@@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
/**
- * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
* @xe: xe device instance
- * @threshold: VRAM size in bites for the D3cold threshold
+ * @threshold: VRAM size in MiB for the D3cold threshold
*
- * Returns 0 for success, negative error code otherwise.
+ * Return:
+ * * 0 - success
+ * * -EINVAL - invalid argument
*/
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index bc1689db4cd7..7b50c7c1ee21 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
return i;
}
-static int emit_flush_invalidate(u32 *dw, int i)
+static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
{
dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
- MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
- dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
- dw[i++] = 0;
+ MI_FLUSH_IMM_DW;
+
+ dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
+ dw[i++] = val;
return i;
}
@@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
static void emit_migration_job_gen12(struct xe_sched_job *job,
struct xe_lrc *lrc, u32 seqno)
{
+ u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
u32 dw[MAX_JOB_SIZE_DW], i = 0;
i = emit_copy_timestamp(lrc, dw, i);
- i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
- seqno, dw, i);
+ i = emit_store_imm_ggtt(saddr, seqno, dw, i);
dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
- if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
- /* XXX: Do we need this? Leaving for now. */
- dw[i++] = preparser_disable(true);
- i = emit_flush_invalidate(dw, i);
- dw[i++] = preparser_disable(false);
- }
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(saddr, seqno, dw, i);
+ dw[i++] = preparser_disable(false);
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6345896585de..f0b167b3fb6a 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -764,7 +764,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
return false;
}
- if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2741849bbf4d..a6612105201a 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -114,10 +114,10 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
- fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
- fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9efc5accd43d..6d70109fcc43 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -21,7 +21,8 @@
GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
-14018094691 GRAPHICS_VERSION(2004)
+14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
+ GRAPHICS_VERSION(2004)
14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
18024947630 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
@@ -37,10 +38,10 @@
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
22019338487 MEDIA_VERSION(2000)
- GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
22019338487_display PLATFORM(LUNARLAKE)
-16023588340 GRAPHICS_VERSION(2001)
+16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
no_media_l3 MEDIA_VERSION(3000)
@@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000)
MEDIA_VERSION_RANGE(1301, 3000)
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
MEDIA_VERSION_RANGE(1300, 3000)
+
+# SoC workaround - currently applies to all platforms with the following
+# primary GT GMDID
+14022085890 GRAPHICS_VERSION(2001)
diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c
index 6f251b284018..b00687e67ce8 100644
--- a/drivers/hid/hid-appletb-kbd.c
+++ b/drivers/hid/hid-appletb-kbd.c
@@ -430,14 +430,20 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id
ret = appletb_kbd_set_mode(kbd, appletb_tb_def_mode);
if (ret) {
dev_err_probe(dev, ret, "Failed to set touchbar mode\n");
- goto close_hw;
+ goto unregister_handler;
}
hid_set_drvdata(hdev, kbd);
return 0;
+unregister_handler:
+ input_unregister_handler(&kbd->inp_handler);
close_hw:
+ if (kbd->backlight_dev) {
+ put_device(&kbd->backlight_dev->dev);
+ timer_delete_sync(&kbd->inactivity_timer);
+ }
hid_hw_close(hdev);
stop_hw:
hid_hw_stop(hdev);
@@ -451,7 +457,10 @@ static void appletb_kbd_remove(struct hid_device *hdev)
appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
input_unregister_handler(&kbd->inp_handler);
- timer_delete_sync(&kbd->inactivity_timer);
+ if (kbd->backlight_dev) {
+ put_device(&kbd->backlight_dev->dev);
+ timer_delete_sync(&kbd->inactivity_timer);
+ }
hid_hw_close(hdev);
hid_hw_stop(hdev);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index b348d0464314..b31b8a2fd540 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1883,9 +1883,12 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags)
/*
* 7 extra bytes are necessary to achieve proper functionality
* of implement() working on 8 byte chunks
+ * 1 extra byte for the report ID if it is null (not used) so
+ * we can reserve that extra byte in the first position of the buffer
+ * when sending it to .raw_request()
*/
- u32 len = hid_report_len(report) + 7;
+ u32 len = hid_report_len(report) + 7 + (report->id == 0);
return kzalloc(len, flags);
}
@@ -1973,7 +1976,7 @@ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
int __hid_request(struct hid_device *hid, struct hid_report *report,
enum hid_class_request reqtype)
{
- char *buf;
+ char *buf, *data_buf;
int ret;
u32 len;
@@ -1981,13 +1984,19 @@ int __hid_request(struct hid_device *hid, struct hid_report *report,
if (!buf)
return -ENOMEM;
+ data_buf = buf;
len = hid_report_len(report);
+ if (report->id == 0) {
+ /* reserve the first byte for the report ID */
+ data_buf++;
+ len++;
+ }
+
if (reqtype == HID_REQ_SET_REPORT)
- hid_output_report(report, buf);
+ hid_output_report(report, data_buf);
- ret = hid->ll_driver->raw_request(hid, report->id, buf, len,
- report->type, reqtype);
+ ret = hid_hw_raw_request(hid, report->id, buf, len, report->type, reqtype);
if (ret < 0) {
dbg_hid("unable to complete request: %d\n", ret);
goto out;
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 8433306148d5..4424c0512bae 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -3298,8 +3298,8 @@ static const char *keys[KEY_MAX + 1] = {
[BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus",
[BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap",
[BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap",
- [BTN_GEAR_DOWN] = "WheelBtn",
- [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok",
+ [BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp",
+ [KEY_OK] = "Ok",
[KEY_SELECT] = "Select", [KEY_GOTO] = "Goto",
[KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2",
[KEY_OPTION] = "Option", [KEY_INFO] = "Info",
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index defcf91fdd14..0ad7d25d9864 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -89,7 +89,8 @@ static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
break;
case USB_DEVICE_ID_ELECOM_M_DT1URBK:
case USB_DEVICE_ID_ELECOM_M_DT1DRBK:
- case USB_DEVICE_ID_ELECOM_M_HT1URBK:
+ case USB_DEVICE_ID_ELECOM_M_HT1URBK_010C:
+ case USB_DEVICE_ID_ELECOM_M_HT1URBK_019B:
case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D:
/*
* Report descriptor format:
@@ -122,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
{ }
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index e3fb4e2fe911..33cc5820f2be 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -312,6 +312,8 @@
#define USB_DEVICE_ID_ASUS_AK1D 0x1125
#define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408
#define USB_DEVICE_ID_CHICONY_ACER_SWITCH12 0x1421
+#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA 0xb824
+#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2 0xb82c
#define USB_VENDOR_ID_CHUNGHWAT 0x2247
#define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH 0x0001
@@ -446,7 +448,8 @@
#define USB_DEVICE_ID_ELECOM_M_XT4DRBK 0x00fd
#define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe
#define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff
-#define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c
+#define USB_DEVICE_ID_ELECOM_M_HT1URBK_010C 0x010c
+#define USB_DEVICE_ID_ELECOM_M_HT1URBK_019B 0x019b
#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d
#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c
@@ -819,6 +822,7 @@
#define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067
#define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085
#define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3
+#define USB_DEVICE_ID_LENOVO_X1_TAB2 0x60a4
#define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5
#define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe
#define USB_DEVICE_ID_LENOVO_X12_TAB2 0x61ae
@@ -1525,4 +1529,7 @@
#define USB_VENDOR_ID_SIGNOTEC 0x2133
#define USB_DEVICE_ID_SIGNOTEC_VIEWSONIC_PD1011 0x0018
+#define USB_VENDOR_ID_SMARTLINKTECHNOLOGY 0x4c4a
+#define USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155 0x4155
+
#endif
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 9d80635a91eb..ff1784b5c2a4 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -2343,7 +2343,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
}
if (list_empty(&hid->inputs)) {
- hid_err(hid, "No inputs registered, leaving\n");
+ hid_dbg(hid, "No inputs registered, leaving\n");
goto out_unwind;
}
diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c
index af29ba840522..b3121fa7a72d 100644
--- a/drivers/hid/hid-lenovo.c
+++ b/drivers/hid/hid-lenovo.c
@@ -492,6 +492,7 @@ static int lenovo_input_mapping(struct hid_device *hdev,
case USB_DEVICE_ID_LENOVO_X12_TAB:
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max);
default:
@@ -548,11 +549,14 @@ static void lenovo_features_set_cptkbd(struct hid_device *hdev)
/*
* Tell the keyboard a driver understands it, and turn F7, F9, F11 into
- * regular keys
+ * regular keys (Compact only)
*/
- ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
- if (ret)
- hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);
+ if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD ||
+ hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) {
+ ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
+ if (ret)
+ hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);
+ }
/* Switch middle button to native mode */
ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01);
@@ -605,6 +609,7 @@ static ssize_t attr_fn_lock_store(struct device *dev,
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_TP10UBKBD:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value);
if (ret)
@@ -861,6 +866,7 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field,
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_TP10UBKBD:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
return lenovo_event_tp10ubkbd(hdev, field, usage, value);
default:
@@ -1144,6 +1150,7 @@ static int lenovo_led_brightness_set(struct led_classdev *led_cdev,
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_TP10UBKBD:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value);
break;
@@ -1384,6 +1391,7 @@ static int lenovo_probe(struct hid_device *hdev,
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_TP10UBKBD:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
ret = lenovo_probe_tp10ubkbd(hdev);
break;
@@ -1473,6 +1481,7 @@ static void lenovo_remove(struct hid_device *hdev)
case USB_DEVICE_ID_LENOVO_X12_TAB2:
case USB_DEVICE_ID_LENOVO_TP10UBKBD:
case USB_DEVICE_ID_LENOVO_X1_TAB:
+ case USB_DEVICE_ID_LENOVO_X1_TAB2:
case USB_DEVICE_ID_LENOVO_X1_TAB3:
lenovo_remove_tp10ubkbd(hdev);
break;
@@ -1524,6 +1533,8 @@ static const struct hid_device_id lenovo_devices[] = {
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB2) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB) },
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index b41001e02da7..a1c54ffe02b4 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -2132,12 +2132,18 @@ static const struct hid_device_id mt_devices[] = {
HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC,
USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) },
- /* Lenovo X1 TAB Gen 2 */
+ /* Lenovo X1 TAB Gen 1 */
{ .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
USB_VENDOR_ID_LENOVO,
USB_DEVICE_ID_LENOVO_X1_TAB) },
+ /* Lenovo X1 TAB Gen 2 */
+ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+ HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_LENOVO,
+ USB_DEVICE_ID_LENOVO_X1_TAB2) },
+
/* Lenovo X1 TAB Gen 3 */
{ .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index 839d5bcd72b1..fb4985988615 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -308,6 +308,7 @@ enum joycon_ctlr_state {
JOYCON_CTLR_STATE_INIT,
JOYCON_CTLR_STATE_READ,
JOYCON_CTLR_STATE_REMOVED,
+ JOYCON_CTLR_STATE_SUSPENDED,
};
/* Controller type received as part of device info */
@@ -2750,14 +2751,46 @@ static void nintendo_hid_remove(struct hid_device *hdev)
static int nintendo_hid_resume(struct hid_device *hdev)
{
- int ret = joycon_init(hdev);
+ struct joycon_ctlr *ctlr = hid_get_drvdata(hdev);
+ int ret;
+
+ hid_dbg(hdev, "resume\n");
+ if (!joycon_using_usb(ctlr)) {
+ hid_dbg(hdev, "no-op resume for bt ctlr\n");
+ ctlr->ctlr_state = JOYCON_CTLR_STATE_READ;
+ return 0;
+ }
+ ret = joycon_init(hdev);
if (ret)
- hid_err(hdev, "Failed to restore controller after resume");
+ hid_err(hdev,
+ "Failed to restore controller after resume: %d\n",
+ ret);
+ else
+ ctlr->ctlr_state = JOYCON_CTLR_STATE_READ;
return ret;
}
+static int nintendo_hid_suspend(struct hid_device *hdev, pm_message_t message)
+{
+ struct joycon_ctlr *ctlr = hid_get_drvdata(hdev);
+
+ hid_dbg(hdev, "suspend: %d\n", message.event);
+ /*
+ * Avoid any blocking loops in suspend/resume transitions.
+ *
+ * joycon_enforce_subcmd_rate() can result in repeated retries if for
+ * whatever reason the controller stops providing input reports.
+ *
+ * This has been observed with bluetooth controllers which lose
+ * connectivity prior to suspend (but not long enough to result in
+ * complete disconnection).
+ */
+ ctlr->ctlr_state = JOYCON_CTLR_STATE_SUSPENDED;
+ return 0;
+}
+
#endif
static const struct hid_device_id nintendo_hid_devices[] = {
@@ -2796,6 +2829,7 @@ static struct hid_driver nintendo_hid_driver = {
#ifdef CONFIG_PM
.resume = nintendo_hid_resume,
+ .suspend = nintendo_hid_suspend,
#endif
};
static int __init nintendo_init(void)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 7fefeb413ec3..9bf9ce8dc803 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -410,7 +410,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
#endif
@@ -757,6 +758,8 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) },
{ HID_USB_DEVICE(USB_VENDOR_ID_AXENTIA, USB_DEVICE_ID_AXENTIA_FM_RADIO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI4713) },
@@ -904,6 +907,7 @@ static const struct hid_device_id hid_ignore_list[] = {
#endif
{ HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SMARTLINKTECHNOLOGY, USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155) },
{ }
};
diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index 07e90d51f073..fa5d68c36313 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -38,6 +38,7 @@
#define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845
#define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345
#define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445
+#define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45
#define REVISION_ID_CHT_A0 0x6
#define REVISION_ID_CHT_Ax_SI 0x0
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index ff0fc8010072..c57483224db6 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -27,10 +27,12 @@ enum ishtp_driver_data_index {
ISHTP_DRIVER_DATA_NONE,
ISHTP_DRIVER_DATA_LNL_M,
ISHTP_DRIVER_DATA_PTL,
+ ISHTP_DRIVER_DATA_WCL,
};
#define ISH_FW_GEN_LNL_M "lnlm"
#define ISH_FW_GEN_PTL "ptl"
+#define ISH_FW_GEN_WCL "wcl"
#define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin"
#define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin"
@@ -42,6 +44,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = {
[ISHTP_DRIVER_DATA_PTL] = {
.fw_generation = ISH_FW_GEN_PTL,
},
+ [ISHTP_DRIVER_DATA_WCL] = {
+ .fw_generation = ISH_FW_GEN_WCL,
+ },
};
static const struct pci_device_id ish_pci_tbl[] = {
@@ -67,9 +72,10 @@ static const struct pci_device_id ish_pci_tbl[] = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_MTL_P)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL},
+ {PCI_DEVICE_DATA(INTEL, ISH_LNL_M, ISHTP_DRIVER_DATA_LNL_M)},
+ {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)},
+ {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)},
+ {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)},
{}
};
MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c
index f493df0d5dc4..a63f8c833252 100644
--- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c
+++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c
@@ -4,6 +4,7 @@
#include <linux/bitfield.h>
#include <linux/hid.h>
#include <linux/hid-over-i2c.h>
+#include <linux/unaligned.h>
#include "intel-thc-dev.h"
#include "intel-thc-dma.h"
@@ -200,6 +201,9 @@ int quicki2c_set_report(struct quicki2c_device *qcdev, u8 report_type,
int quicki2c_reset(struct quicki2c_device *qcdev)
{
+ u16 input_reg = le16_to_cpu(qcdev->dev_desc.input_reg);
+ size_t read_len = HIDI2C_LENGTH_LEN;
+ u32 prd_len = read_len;
int ret;
qcdev->reset_ack = false;
@@ -213,12 +217,32 @@ int quicki2c_reset(struct quicki2c_device *qcdev)
ret = wait_event_interruptible_timeout(qcdev->reset_ack_wq, qcdev->reset_ack,
HIDI2C_RESET_TIMEOUT * HZ);
- if (ret <= 0 || !qcdev->reset_ack) {
+ if (qcdev->reset_ack)
+ return 0;
+
+ /*
+ * Manually read reset response if it wasn't received, in case reset interrupt
+ * was missed by touch device or THC hardware.
+ */
+ ret = thc_tic_pio_read(qcdev->thc_hw, input_reg, read_len, &prd_len,
+ (u32 *)qcdev->input_buf);
+ if (ret) {
+ dev_err_once(qcdev->dev, "Read Reset Response failed, ret %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * Check response packet length, it's first 16 bits of packet.
+ * If response packet length is zero, it's reset response, otherwise not.
+ */
+ if (get_unaligned_le16(qcdev->input_buf)) {
dev_err_once(qcdev->dev,
"Wait reset response timed out ret:%d timeout:%ds\n",
ret, HIDI2C_RESET_TIMEOUT);
return -ETIMEDOUT;
}
+ qcdev->reset_ack = true;
+
return 0;
}
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index eaf099b2efdb..9a57504e51a1 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2048,14 +2048,18 @@ static int wacom_initialize_remotes(struct wacom *wacom)
remote->remote_dir = kobject_create_and_add("wacom_remote",
&wacom->hdev->dev.kobj);
- if (!remote->remote_dir)
+ if (!remote->remote_dir) {
+ kfifo_free(&remote->remote_fifo);
return -ENOMEM;
+ }
error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs);
if (error) {
hid_err(wacom->hdev,
"cannot create sysfs group err: %d\n", error);
+ kfifo_free(&remote->remote_fifo);
+ kobject_put(remote->remote_dir);
return error;
}
@@ -2901,6 +2905,7 @@ static void wacom_remove(struct hid_device *hdev)
hid_hw_stop(hdev);
cancel_delayed_work_sync(&wacom->init_work);
+ cancel_delayed_work_sync(&wacom->aes_battery_work);
cancel_work_sync(&wacom->wireless_work);
cancel_work_sync(&wacom->battery_work);
cancel_work_sync(&wacom->remote_work);
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 1cd188b73b74..57623ca7f350 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -9,7 +9,7 @@ config HYPERV
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
select OF_EARLY_FLATTREE if OF
- select SYSFB if !HYPERV_VTL_MODE
+ select SYSFB if EFI && !HYPERV_VTL_MODE
help
Select this option to run Linux as a Hyper-V client operating
system.
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 35f26fa1ffe7..7c7c66e0dc3f 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -18,6 +18,7 @@
#include <linux/uio.h>
#include <linux/interrupt.h>
#include <linux/set_memory.h>
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/mshyperv.h>
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6e084c207414..65dd299e2944 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -20,6 +20,7 @@
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/hyperv.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
#include <linux/sched/isolation.h>
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index be490c598785..1fe3573ae52a 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -519,7 +519,10 @@ void vmbus_set_event(struct vmbus_channel *channel)
else
WARN_ON_ONCE(1);
} else {
- hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
+ u64 control = HVCALL_SIGNAL_EVENT;
+
+ control |= hv_nested ? HV_HYPERCALL_NESTED : 0;
+ hv_do_fast_hypercall8(control, channel->sig_event);
}
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 308c8f279df8..b14c5f9e0ef2 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -85,8 +85,10 @@ int hv_post_message(union hv_connection_id connection_id,
else
status = HV_STATUS_INVALID_PARAMETER;
} else {
- status = hv_do_hypercall(HVCALL_POST_MESSAGE,
- aligned_msg, NULL);
+ u64 control = HVCALL_POST_MESSAGE;
+
+ control |= hv_nested ? HV_HYPERCALL_NESTED : 0;
+ status = hv_do_hypercall(control, aligned_msg, NULL);
}
local_irq_restore(flags);
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index 7d7ecb6f6137..fbb4eb3901bb 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
#include <linux/minmax.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
/*
diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
index 2575e6d7a71f..6f227a8a5af7 100644
--- a/drivers/hv/mshv_common.c
+++ b/drivers/hv/mshv_common.c
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include <asm/mshyperv.h>
#include <linux/resume_user_mode.h>
+#include <linux/export.h>
#include "mshv.h"
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 8dd22be2ca0b..48c514da34eb 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -377,10 +377,11 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
struct mshv_irqfd *irqfd, *tmp;
unsigned int events;
- struct fd f;
int ret;
int idx;
+ CLASS(fd, f)(args->fd);
+
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;
@@ -390,8 +391,7 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
INIT_WORK(&irqfd->irqfd_shutdown, mshv_irqfd_shutdown);
seqcount_spinlock_init(&irqfd->irqfd_irqe_sc, &pt->pt_irqfds_lock);
- f = fdget(args->fd);
- if (!fd_file(f)) {
+ if (fd_empty(f)) {
ret = -EBADF;
goto out;
}
@@ -496,12 +496,6 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
mshv_assert_irq_slow(irqfd);
srcu_read_unlock(&pt->pt_irq_srcu, idx);
- /*
- * do not drop the file until the irqfd is fully initialized, otherwise
- * we might race against the POLLHUP
- */
- fdput(f);
-
return 0;
fail:
@@ -514,8 +508,6 @@ fail:
if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd);
- fdput(f);
-
out:
kfree(irqfd);
return ret;
diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
index a222a16107f6..c9c274f29c3c 100644
--- a/drivers/hv/mshv_root_hv_call.c
+++ b/drivers/hv/mshv_root_hv_call.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
#include "mshv_root.h"
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 3c9b02471760..23ce1fb70de1 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/prefetch.h>
#include <linux/io.h>
+#include <linux/export.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 33b524b4eb5e..2ed5a1e89d69 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2509,7 +2509,7 @@ static int vmbus_acpi_add(struct platform_device *pdev)
return 0;
}
#endif
-
+#ifndef HYPERVISOR_CALLBACK_VECTOR
static int vmbus_set_irq(struct platform_device *pdev)
{
struct irq_data *data;
@@ -2534,6 +2534,7 @@ static int vmbus_set_irq(struct platform_device *pdev)
return 0;
}
+#endif
static int vmbus_device_add(struct platform_device *pdev)
{
@@ -2549,11 +2550,11 @@ static int vmbus_device_add(struct platform_device *pdev)
if (ret)
return ret;
- if (!__is_defined(HYPERVISOR_CALLBACK_VECTOR))
- ret = vmbus_set_irq(pdev);
+#ifndef HYPERVISOR_CALLBACK_VECTOR
+ ret = vmbus_set_irq(pdev);
if (ret)
return ret;
-
+#endif
for_each_of_range(&parser, &range) {
struct resource *res;
diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c
index e1a7f7aa7f80..b7b911f8359c 100644
--- a/drivers/hwmon/corsair-cpro.c
+++ b/drivers/hwmon/corsair-cpro.c
@@ -89,6 +89,7 @@ struct ccp_device {
struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */
u8 *cmd_buffer;
u8 *buffer;
+ int buffer_recv_size; /* number of received bytes in buffer */
int target[6];
DECLARE_BITMAP(temp_cnct, NUM_TEMP_SENSORS);
DECLARE_BITMAP(fan_cnct, NUM_FANS);
@@ -146,6 +147,9 @@ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2,
if (!t)
return -ETIMEDOUT;
+ if (ccp->buffer_recv_size != IN_BUFFER_SIZE)
+ return -EPROTO;
+
return ccp_get_errno(ccp);
}
@@ -157,6 +161,7 @@ static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8
spin_lock(&ccp->wait_input_report_lock);
if (!completion_done(&ccp->wait_input_report)) {
memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size));
+ ccp->buffer_recv_size = size;
complete_all(&ccp->wait_input_report);
}
spin_unlock(&ccp->wait_input_report_lock);
diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c
index a3a07662e491..8aeec16a7a90 100644
--- a/drivers/hwmon/ftsteutates.c
+++ b/drivers/hwmon/ftsteutates.c
@@ -423,13 +423,16 @@ static int fts_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
break;
case hwmon_pwm:
switch (attr) {
- case hwmon_pwm_auto_channels_temp:
- if (data->fan_source[channel] == FTS_FAN_SOURCE_INVALID)
+ case hwmon_pwm_auto_channels_temp: {
+ u8 fan_source = data->fan_source[channel];
+
+ if (fan_source == FTS_FAN_SOURCE_INVALID || fan_source >= BITS_PER_LONG)
*val = 0;
else
- *val = BIT(data->fan_source[channel]);
+ *val = BIT(fan_source);
return 0;
+ }
default:
break;
}
diff --git a/drivers/hwmon/ina238.c b/drivers/hwmon/ina238.c
index a4a41742786b..9a5fd16a4ec2 100644
--- a/drivers/hwmon/ina238.c
+++ b/drivers/hwmon/ina238.c
@@ -97,7 +97,7 @@
* Power (mW) = 0.2 * register value * 20000 / rshunt / 4 * gain
* (Specific for SQ52206)
* Power (mW) = 0.24 * register value * 20000 / rshunt / 4 * gain
- * Energy (mJ) = 16 * 0.24 * register value * 20000 / rshunt / 4 * gain
+ * Energy (uJ) = 16 * 0.24 * register value * 20000 / rshunt / 4 * gain * 1000
*/
#define INA238_CALIBRATION_VALUE 16384
#define INA238_FIXED_SHUNT 20000
@@ -500,9 +500,9 @@ static ssize_t energy1_input_show(struct device *dev,
if (ret)
return ret;
- /* result in mJ */
- energy = div_u64(regval * INA238_FIXED_SHUNT * data->gain * 16 *
- data->config->power_calculate_factor, 4 * 100 * data->rshunt);
+ /* result in uJ */
+ energy = div_u64(regval * INA238_FIXED_SHUNT * data->gain * 16 * 10 *
+ data->config->power_calculate_factor, 4 * data->rshunt);
return sysfs_emit(buf, "%llu\n", energy);
}
diff --git a/drivers/hwmon/ltc4282.c b/drivers/hwmon/ltc4282.c
index 7f38d2696239..f607fe8f7937 100644
--- a/drivers/hwmon/ltc4282.c
+++ b/drivers/hwmon/ltc4282.c
@@ -1512,13 +1512,6 @@ static int ltc4282_setup(struct ltc4282_state *st, struct device *dev)
}
if (device_property_read_bool(dev, "adi,fault-log-enable")) {
- ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL,
- LTC4282_FAULT_LOG_EN_MASK);
- if (ret)
- return ret;
- }
-
- if (device_property_read_bool(dev, "adi,fault-log-enable")) {
ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, LTC4282_FAULT_LOG_EN_MASK);
if (ret)
return ret;
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index 9486db249c64..b3694a4209b9 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -459,12 +459,10 @@ static ssize_t occ_show_power_1(struct device *dev,
return sysfs_emit(buf, "%llu\n", val);
}
-static u64 occ_get_powr_avg(u64 *accum, u32 *samples)
+static u64 occ_get_powr_avg(u64 accum, u32 samples)
{
- u64 divisor = get_unaligned_be32(samples);
-
- return (divisor == 0) ? 0 :
- div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor);
+ return (samples == 0) ? 0 :
+ mul_u64_u32_div(accum, 1000000UL, samples);
}
static ssize_t occ_show_power_2(struct device *dev,
@@ -489,8 +487,8 @@ static ssize_t occ_show_power_2(struct device *dev,
get_unaligned_be32(&power->sensor_id),
power->function_id, power->apss_channel);
case 1:
- val = occ_get_powr_avg(&power->accumulator,
- &power->update_tag);
+ val = occ_get_powr_avg(get_unaligned_be64(&power->accumulator),
+ get_unaligned_be32(&power->update_tag));
break;
case 2:
val = (u64)get_unaligned_be32(&power->update_tag) *
@@ -527,8 +525,8 @@ static ssize_t occ_show_power_a0(struct device *dev,
return sysfs_emit(buf, "%u_system\n",
get_unaligned_be32(&power->sensor_id));
case 1:
- val = occ_get_powr_avg(&power->system.accumulator,
- &power->system.update_tag);
+ val = occ_get_powr_avg(get_unaligned_be64(&power->system.accumulator),
+ get_unaligned_be32(&power->system.update_tag));
break;
case 2:
val = (u64)get_unaligned_be32(&power->system.update_tag) *
@@ -541,8 +539,8 @@ static ssize_t occ_show_power_a0(struct device *dev,
return sysfs_emit(buf, "%u_proc\n",
get_unaligned_be32(&power->sensor_id));
case 5:
- val = occ_get_powr_avg(&power->proc.accumulator,
- &power->proc.update_tag);
+ val = occ_get_powr_avg(get_unaligned_be64(&power->proc.accumulator),
+ get_unaligned_be32(&power->proc.update_tag));
break;
case 6:
val = (u64)get_unaligned_be32(&power->proc.update_tag) *
@@ -555,8 +553,8 @@ static ssize_t occ_show_power_a0(struct device *dev,
return sysfs_emit(buf, "%u_vdd\n",
get_unaligned_be32(&power->sensor_id));
case 9:
- val = occ_get_powr_avg(&power->vdd.accumulator,
- &power->vdd.update_tag);
+ val = occ_get_powr_avg(get_unaligned_be64(&power->vdd.accumulator),
+ get_unaligned_be32(&power->vdd.update_tag));
break;
case 10:
val = (u64)get_unaligned_be32(&power->vdd.update_tag) *
@@ -569,8 +567,8 @@ static ssize_t occ_show_power_a0(struct device *dev,
return sysfs_emit(buf, "%u_vdn\n",
get_unaligned_be32(&power->sensor_id));
case 13:
- val = occ_get_powr_avg(&power->vdn.accumulator,
- &power->vdn.update_tag);
+ val = occ_get_powr_avg(get_unaligned_be64(&power->vdn.accumulator),
+ get_unaligned_be32(&power->vdn.update_tag));
break;
case 14:
val = (u64)get_unaligned_be32(&power->vdn.update_tag) *
@@ -747,29 +745,30 @@ static ssize_t occ_show_extended(struct device *dev,
}
/*
- * Some helper macros to make it easier to define an occ_attribute. Since these
- * are dynamically allocated, we shouldn't use the existing kernel macros which
+ * A helper to make it easier to define an occ_attribute. Since these
+ * are dynamically allocated, we cannot use the existing kernel macros which
* stringify the name argument.
*/
-#define ATTR_OCC(_name, _mode, _show, _store) { \
- .attr = { \
- .name = _name, \
- .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
- }, \
- .show = _show, \
- .store = _store, \
-}
-
-#define SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index) { \
- .dev_attr = ATTR_OCC(_name, _mode, _show, _store), \
- .index = _index, \
- .nr = _nr, \
+static void occ_init_attribute(struct occ_attribute *attr, int mode,
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf),
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count),
+ int nr, int index, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(attr->name, sizeof(attr->name), fmt, args);
+ va_end(args);
+
+ attr->sensor.dev_attr.attr.name = attr->name;
+ attr->sensor.dev_attr.attr.mode = mode;
+ attr->sensor.dev_attr.show = show;
+ attr->sensor.dev_attr.store = store;
+ attr->sensor.index = index;
+ attr->sensor.nr = nr;
}
-#define OCC_INIT_ATTR(_name, _mode, _show, _store, _nr, _index) \
- ((struct sensor_device_attribute_2) \
- SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index))
-
/*
* Allocate and instatiate sensor_device_attribute_2s. It's most efficient to
* use our own instead of the built-in hwmon attribute types.
@@ -855,14 +854,15 @@ static int occ_setup_sensor_attrs(struct occ *occ)
sensors->extended.num_sensors = 0;
}
- occ->attrs = devm_kzalloc(dev, sizeof(*occ->attrs) * num_attrs,
+ occ->attrs = devm_kcalloc(dev, num_attrs, sizeof(*occ->attrs),
GFP_KERNEL);
if (!occ->attrs)
return -ENOMEM;
/* null-terminated list */
- occ->group.attrs = devm_kzalloc(dev, sizeof(*occ->group.attrs) *
- num_attrs + 1, GFP_KERNEL);
+ occ->group.attrs = devm_kcalloc(dev, num_attrs + 1,
+ sizeof(*occ->group.attrs),
+ GFP_KERNEL);
if (!occ->group.attrs)
return -ENOMEM;
@@ -872,43 +872,33 @@ static int occ_setup_sensor_attrs(struct occ *occ)
s = i + 1;
temp = ((struct temp_sensor_2 *)sensors->temp.data) + i;
- snprintf(attr->name, sizeof(attr->name), "temp%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL,
- 0, i);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 0, i, "temp%d_label", s);
attr++;
if (sensors->temp.version == 2 &&
temp->fru_type == OCC_FRU_TYPE_VRM) {
- snprintf(attr->name, sizeof(attr->name),
- "temp%d_alarm", s);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 1, i, "temp%d_alarm", s);
} else {
- snprintf(attr->name, sizeof(attr->name),
- "temp%d_input", s);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 1, i, "temp%d_input", s);
}
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL,
- 1, i);
attr++;
if (sensors->temp.version > 1) {
- snprintf(attr->name, sizeof(attr->name),
- "temp%d_fru_type", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_temp, NULL, 2, i);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 2, i, "temp%d_fru_type", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "temp%d_fault", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_temp, NULL, 3, i);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 3, i, "temp%d_fault", s);
attr++;
if (sensors->temp.version == 0x10) {
- snprintf(attr->name, sizeof(attr->name),
- "temp%d_max", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_temp, NULL,
- 4, i);
+ occ_init_attribute(attr, 0444, show_temp, NULL,
+ 4, i, "temp%d_max", s);
attr++;
}
}
@@ -917,14 +907,12 @@ static int occ_setup_sensor_attrs(struct occ *occ)
for (i = 0; i < sensors->freq.num_sensors; ++i) {
s = i + 1;
- snprintf(attr->name, sizeof(attr->name), "freq%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL,
- 0, i);
+ occ_init_attribute(attr, 0444, show_freq, NULL,
+ 0, i, "freq%d_label", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "freq%d_input", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL,
- 1, i);
+ occ_init_attribute(attr, 0444, show_freq, NULL,
+ 1, i, "freq%d_input", s);
attr++;
}
@@ -940,32 +928,24 @@ static int occ_setup_sensor_attrs(struct occ *occ)
s = (i * 4) + 1;
for (j = 0; j < 4; ++j) {
- snprintf(attr->name, sizeof(attr->name),
- "power%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL,
- nr++, i);
+ occ_init_attribute(attr, 0444, show_power,
+ NULL, nr++, i,
+ "power%d_label", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_average", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL,
- nr++, i);
+ occ_init_attribute(attr, 0444, show_power,
+ NULL, nr++, i,
+ "power%d_average", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_average_interval", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL,
- nr++, i);
+ occ_init_attribute(attr, 0444, show_power,
+ NULL, nr++, i,
+ "power%d_average_interval", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_input", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL,
- nr++, i);
+ occ_init_attribute(attr, 0444, show_power,
+ NULL, nr++, i,
+ "power%d_input", s);
attr++;
s++;
@@ -977,28 +957,20 @@ static int occ_setup_sensor_attrs(struct occ *occ)
for (i = 0; i < sensors->power.num_sensors; ++i) {
s = i + 1;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL, 0, i);
+ occ_init_attribute(attr, 0444, show_power, NULL,
+ 0, i, "power%d_label", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_average", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL, 1, i);
+ occ_init_attribute(attr, 0444, show_power, NULL,
+ 1, i, "power%d_average", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_average_interval", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL, 2, i);
+ occ_init_attribute(attr, 0444, show_power, NULL,
+ 2, i, "power%d_average_interval", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_input", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_power, NULL, 3, i);
+ occ_init_attribute(attr, 0444, show_power, NULL,
+ 3, i, "power%d_input", s);
attr++;
}
@@ -1006,56 +978,43 @@ static int occ_setup_sensor_attrs(struct occ *occ)
}
if (sensors->caps.num_sensors >= 1) {
- snprintf(attr->name, sizeof(attr->name), "power%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 0, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 0, 0, "power%d_label", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "power%d_cap", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 1, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 1, 0, "power%d_cap", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "power%d_input", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 2, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 2, 0, "power%d_input", s);
attr++;
- snprintf(attr->name, sizeof(attr->name),
- "power%d_cap_not_redundant", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 3, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 3, 0, "power%d_cap_not_redundant", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "power%d_cap_max", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 4, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 4, 0, "power%d_cap_max", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "power%d_cap_min", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
- 5, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 5, 0, "power%d_cap_min", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "power%d_cap_user",
- s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0644, show_caps,
- occ_store_caps_user, 6, 0);
+ occ_init_attribute(attr, 0644, show_caps, occ_store_caps_user,
+ 6, 0, "power%d_cap_user", s);
attr++;
if (sensors->caps.version > 1) {
- snprintf(attr->name, sizeof(attr->name),
- "power%d_cap_user_source", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_caps, NULL, 7, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 7, 0, "power%d_cap_user_source", s);
attr++;
if (sensors->caps.version > 2) {
- snprintf(attr->name, sizeof(attr->name),
- "power%d_cap_min_soft", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- show_caps, NULL,
- 8, 0);
+ occ_init_attribute(attr, 0444, show_caps, NULL,
+ 8, 0,
+ "power%d_cap_min_soft", s);
attr++;
}
}
@@ -1064,19 +1023,16 @@ static int occ_setup_sensor_attrs(struct occ *occ)
for (i = 0; i < sensors->extended.num_sensors; ++i) {
s = i + 1;
- snprintf(attr->name, sizeof(attr->name), "extn%d_label", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- occ_show_extended, NULL, 0, i);
+ occ_init_attribute(attr, 0444, occ_show_extended, NULL,
+ 0, i, "extn%d_label", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "extn%d_flags", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- occ_show_extended, NULL, 1, i);
+ occ_init_attribute(attr, 0444, occ_show_extended, NULL,
+ 1, i, "extn%d_flags", s);
attr++;
- snprintf(attr->name, sizeof(attr->name), "extn%d_input", s);
- attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
- occ_show_extended, NULL, 2, i);
+ occ_init_attribute(attr, 0444, occ_show_extended, NULL,
+ 2, i, "extn%d_input", s);
attr++;
}
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 2bc8cccb01fd..52d4000902d5 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -226,15 +226,15 @@ static int ucd9000_gpio_set(struct gpio_chip *gc, unsigned int offset,
}
if (value) {
- if (ret & UCD9000_GPIO_CONFIG_STATUS)
+ if (ret & UCD9000_GPIO_CONFIG_OUT_VALUE)
return 0;
- ret |= UCD9000_GPIO_CONFIG_STATUS;
+ ret |= UCD9000_GPIO_CONFIG_OUT_VALUE;
} else {
- if (!(ret & UCD9000_GPIO_CONFIG_STATUS))
+ if (!(ret & UCD9000_GPIO_CONFIG_OUT_VALUE))
return 0;
- ret &= ~UCD9000_GPIO_CONFIG_STATUS;
+ ret &= ~UCD9000_GPIO_CONFIG_OUT_VALUE;
}
ret |= UCD9000_GPIO_CONFIG_ENABLE;
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index eddf25b90ca8..6544d27e4419 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -619,8 +619,8 @@ static u32 bit_func(struct i2c_adapter *adap)
/* -----exported algorithm data: ------------------------------------- */
const struct i2c_algorithm i2c_bit_algo = {
- .master_xfer = bit_xfer,
- .master_xfer_atomic = bit_xfer_atomic,
+ .xfer = bit_xfer,
+ .xfer_atomic = bit_xfer_atomic,
.functionality = bit_func,
};
EXPORT_SYMBOL(i2c_bit_algo);
diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
index 384af88e58ad..74b66aec33d4 100644
--- a/drivers/i2c/algos/i2c-algo-pca.c
+++ b/drivers/i2c/algos/i2c-algo-pca.c
@@ -361,8 +361,8 @@ static u32 pca_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm pca_algo = {
- .master_xfer = pca_xfer,
- .functionality = pca_func,
+ .xfer = pca_xfer,
+ .functionality = pca_func,
};
static unsigned int pca_probe_chip(struct i2c_adapter *adap)
diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c
index 740066ceaea3..fd563e845d4b 100644
--- a/drivers/i2c/algos/i2c-algo-pcf.c
+++ b/drivers/i2c/algos/i2c-algo-pcf.c
@@ -389,8 +389,8 @@ static u32 pcf_func(struct i2c_adapter *adap)
/* exported algorithm data: */
static const struct i2c_algorithm pcf_algo = {
- .master_xfer = pcf_xfer,
- .functionality = pcf_func,
+ .xfer = pcf_xfer,
+ .functionality = pcf_func,
};
/*
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 48c5ab832009..c8d115b58e44 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -200,7 +200,7 @@ config I2C_ISMT
config I2C_PIIX4
tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
- depends on PCI && HAS_IOPORT && X86
+ depends on PCI && HAS_IOPORT
select I2C_SMBUS
help
If you say yes to this option, support will be included for the Intel
@@ -1530,7 +1530,7 @@ config I2C_XGENE_SLIMPRO
config SCx200_ACB
tristate "Geode ACCESS.bus support"
- depends on X86_32 && PCI
+ depends on X86_32 && PCI && HAS_IOPORT
help
Enable the use of the ACCESS.bus controllers on the Geode SCx200 and
SC1100 processors and the CS5535 and CS5536 Geode companion devices.
diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c
index d9dd0e475d1a..188e24cc4d35 100644
--- a/drivers/i2c/busses/i2c-amd-mp2-plat.c
+++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c
@@ -179,7 +179,7 @@ static u32 i2c_amd_func(struct i2c_adapter *a)
}
static const struct i2c_algorithm i2c_amd_algorithm = {
- .master_xfer = i2c_amd_xfer,
+ .xfer = i2c_amd_xfer,
.functionality = i2c_amd_func,
};
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 1550d3d552ae..a26b74c71206 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -814,11 +814,11 @@ static int aspeed_i2c_unreg_slave(struct i2c_client *client)
#endif /* CONFIG_I2C_SLAVE */
static const struct i2c_algorithm aspeed_i2c_algo = {
- .master_xfer = aspeed_i2c_master_xfer,
- .functionality = aspeed_i2c_functionality,
+ .xfer = aspeed_i2c_master_xfer,
+ .functionality = aspeed_i2c_functionality,
#if IS_ENABLED(CONFIG_I2C_SLAVE)
- .reg_slave = aspeed_i2c_reg_slave,
- .unreg_slave = aspeed_i2c_unreg_slave,
+ .reg_slave = aspeed_i2c_reg_slave,
+ .unreg_slave = aspeed_i2c_unreg_slave,
#endif /* CONFIG_I2C_SLAVE */
};
diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c
index 374fc50bb205..59795c1c24ff 100644
--- a/drivers/i2c/busses/i2c-at91-master.c
+++ b/drivers/i2c/busses/i2c-at91-master.c
@@ -739,8 +739,8 @@ static u32 at91_twi_func(struct i2c_adapter *adapter)
}
static const struct i2c_algorithm at91_twi_algorithm = {
- .master_xfer = at91_twi_xfer,
- .functionality = at91_twi_func,
+ .xfer = at91_twi_xfer,
+ .functionality = at91_twi_func,
};
static int at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr)
diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c
index 50030256cd85..0555eeb6903a 100644
--- a/drivers/i2c/busses/i2c-axxia.c
+++ b/drivers/i2c/busses/i2c-axxia.c
@@ -706,7 +706,7 @@ static int axxia_i2c_unreg_slave(struct i2c_client *slave)
}
static const struct i2c_algorithm axxia_i2c_algo = {
- .master_xfer = axxia_i2c_xfer,
+ .xfer = axxia_i2c_xfer,
.functionality = axxia_i2c_func,
.reg_slave = axxia_i2c_reg_slave,
.unreg_slave = axxia_i2c_unreg_slave,
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 63bc3c8f49d3..e418a4f23f15 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -1041,7 +1041,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave)
}
static struct i2c_algorithm bcm_iproc_algo = {
- .master_xfer = bcm_iproc_i2c_xfer,
+ .xfer = bcm_iproc_i2c_xfer,
.functionality = bcm_iproc_i2c_functionality,
.reg_slave = bcm_iproc_i2c_reg_slave,
.unreg_slave = bcm_iproc_i2c_unreg_slave,
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 8df63aaf2a80..697d095afbe4 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -1231,12 +1231,12 @@ static int cdns_unreg_slave(struct i2c_client *slave)
#endif
static const struct i2c_algorithm cdns_i2c_algo = {
- .master_xfer = cdns_i2c_master_xfer,
- .master_xfer_atomic = cdns_i2c_master_xfer_atomic,
- .functionality = cdns_i2c_func,
+ .xfer = cdns_i2c_master_xfer,
+ .xfer_atomic = cdns_i2c_master_xfer_atomic,
+ .functionality = cdns_i2c_func,
#if IS_ENABLED(CONFIG_I2C_SLAVE)
- .reg_slave = cdns_reg_slave,
- .unreg_slave = cdns_unreg_slave,
+ .reg_slave = cdns_reg_slave,
+ .unreg_slave = cdns_unreg_slave,
#endif
};
diff --git a/drivers/i2c/busses/i2c-cgbc.c b/drivers/i2c/busses/i2c-cgbc.c
index f054d167ac47..25a74fa51aa0 100644
--- a/drivers/i2c/busses/i2c-cgbc.c
+++ b/drivers/i2c/busses/i2c-cgbc.c
@@ -331,8 +331,8 @@ static u32 cgbc_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm cgbc_i2c_algorithm = {
- .master_xfer = cgbc_i2c_xfer,
- .functionality = cgbc_i2c_func,
+ .xfer = cgbc_i2c_xfer,
+ .functionality = cgbc_i2c_func,
};
static struct i2c_algo_cgbc_data cgbc_i2c_algo_data[] = {
diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c
index ad6f08338124..450793d5f839 100644
--- a/drivers/i2c/busses/i2c-designware-amdisp.c
+++ b/drivers/i2c/busses/i2c-designware-amdisp.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/soc/amd/isp4_misc.h>
#include "i2c-designware-core.h"
@@ -62,6 +63,7 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev)
adap = &isp_i2c_dev->adapter;
adap->owner = THIS_MODULE;
+ scnprintf(adap->name, sizeof(adap->name), AMDISP_I2C_ADAP_NAME);
ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev));
adap->dev.of_node = pdev->dev.of_node;
/* use dynamically allocated adapter id */
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index c5394229b77f..cbd88ffa5610 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
dev->msgs = msgs;
dev->msgs_num = num_msgs;
+ dev->msg_write_idx = 0;
i2c_dw_xfer_init(dev);
/* Initiate messages read/write transaction */
@@ -1042,8 +1043,9 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev)
if (ret)
return ret;
- snprintf(adap->name, sizeof(adap->name),
- "Synopsys DesignWare I2C adapter");
+ if (!adap->name[0])
+ scnprintf(adap->name, sizeof(adap->name),
+ "Synopsys DesignWare I2C adapter");
adap->retries = 3;
adap->algo = &i2c_dw_algo;
adap->quirks = &i2c_dw_quirks;
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index efdaddf99f9e..27ea3c130a16 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -690,7 +690,7 @@ static u32 pch_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm pch_algorithm = {
- .master_xfer = pch_i2c_xfer,
+ .xfer = pch_i2c_xfer,
.functionality = pch_i2c_func
};
diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c
index 2512cef8e2a2..ece019b3d066 100644
--- a/drivers/i2c/busses/i2c-emev2.c
+++ b/drivers/i2c/busses/i2c-emev2.c
@@ -351,10 +351,10 @@ static int em_i2c_unreg_slave(struct i2c_client *slave)
}
static const struct i2c_algorithm em_i2c_algo = {
- .master_xfer = em_i2c_xfer,
+ .xfer = em_i2c_xfer,
.functionality = em_i2c_func,
- .reg_slave = em_i2c_reg_slave,
- .unreg_slave = em_i2c_unreg_slave,
+ .reg_slave = em_i2c_reg_slave,
+ .unreg_slave = em_i2c_unreg_slave,
};
static int em_i2c_probe(struct platform_device *pdev)
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index 02f24479aa07..9c1c5f3c09f6 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -879,9 +879,9 @@ static u32 exynos5_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm exynos5_i2c_algorithm = {
- .master_xfer = exynos5_i2c_xfer,
- .master_xfer_atomic = exynos5_i2c_xfer_atomic,
- .functionality = exynos5_i2c_func,
+ .xfer = exynos5_i2c_xfer,
+ .xfer_atomic = exynos5_i2c_xfer_atomic,
+ .functionality = exynos5_i2c_func,
};
static int exynos5_i2c_probe(struct platform_device *pdev)
diff --git a/drivers/i2c/busses/i2c-gxp.c b/drivers/i2c/busses/i2c-gxp.c
index 0fc39caa6c87..2d117e7e3cb6 100644
--- a/drivers/i2c/busses/i2c-gxp.c
+++ b/drivers/i2c/busses/i2c-gxp.c
@@ -184,11 +184,11 @@ static int gxp_i2c_unreg_slave(struct i2c_client *slave)
#endif
static const struct i2c_algorithm gxp_i2c_algo = {
- .master_xfer = gxp_i2c_master_xfer,
+ .xfer = gxp_i2c_master_xfer,
.functionality = gxp_i2c_func,
#if IS_ENABLED(CONFIG_I2C_SLAVE)
- .reg_slave = gxp_i2c_reg_slave,
- .unreg_slave = gxp_i2c_unreg_slave,
+ .reg_slave = gxp_i2c_reg_slave,
+ .unreg_slave = gxp_i2c_unreg_slave,
#endif
};
diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c
index 3278707bb885..a454f9f25146 100644
--- a/drivers/i2c/busses/i2c-img-scb.c
+++ b/drivers/i2c/busses/i2c-img-scb.c
@@ -1143,7 +1143,7 @@ static u32 img_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm img_i2c_algo = {
- .master_xfer = img_i2c_xfer,
+ .xfer = img_i2c_xfer,
.functionality = img_i2c_func,
};
diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 342d47e67586..064bc83840a6 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -1268,10 +1268,10 @@ static u32 lpi2c_imx_func(struct i2c_adapter *adapter)
}
static const struct i2c_algorithm lpi2c_imx_algo = {
- .master_xfer = lpi2c_imx_xfer,
- .functionality = lpi2c_imx_func,
- .reg_target = lpi2c_imx_register_target,
- .unreg_target = lpi2c_imx_unregister_target,
+ .xfer = lpi2c_imx_xfer,
+ .functionality = lpi2c_imx_func,
+ .reg_target = lpi2c_imx_register_target,
+ .unreg_target = lpi2c_imx_unregister_target,
};
static const struct of_device_id lpi2c_imx_of_match[] = {
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index de01dfecb16e..205cc132fdec 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1008,7 +1008,7 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx)
/* setup bus to read data */
temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp &= ~I2CR_MTX;
- if (i2c_imx->msg->len - 1)
+ if ((i2c_imx->msg->len - 1) || (i2c_imx->msg->flags & I2C_M_RECV_LEN))
temp &= ~I2CR_TXAK;
imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
@@ -1063,6 +1063,7 @@ static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_im
wake_up(&i2c_imx->queue);
}
i2c_imx->msg->len += len;
+ i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = len;
}
static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned int status)
@@ -1692,11 +1693,11 @@ static u32 i2c_imx_func(struct i2c_adapter *adapter)
}
static const struct i2c_algorithm i2c_imx_algo = {
- .master_xfer = i2c_imx_xfer,
- .master_xfer_atomic = i2c_imx_xfer_atomic,
+ .xfer = i2c_imx_xfer,
+ .xfer_atomic = i2c_imx_xfer_atomic,
.functionality = i2c_imx_func,
- .reg_slave = i2c_imx_reg_slave,
- .unreg_slave = i2c_imx_unreg_slave,
+ .reg_slave = i2c_imx_reg_slave,
+ .unreg_slave = i2c_imx_unreg_slave,
};
static int i2c_imx_probe(struct platform_device *pdev)
diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c
index 5965b4cf6220..b68a21fff0b5 100644
--- a/drivers/i2c/busses/i2c-k1.c
+++ b/drivers/i2c/busses/i2c-k1.c
@@ -477,7 +477,7 @@ static int spacemit_i2c_xfer(struct i2c_adapter *adapt, struct i2c_msg *msgs, in
ret = spacemit_i2c_wait_bus_idle(i2c);
if (!ret)
- spacemit_i2c_xfer_msg(i2c);
+ ret = spacemit_i2c_xfer_msg(i2c);
else if (ret < 0)
dev_dbg(i2c->dev, "i2c transfer error: %d\n", ret);
else
diff --git a/drivers/i2c/busses/i2c-keba.c b/drivers/i2c/busses/i2c-keba.c
index 7b9ed2592f5b..9420c8b342b5 100644
--- a/drivers/i2c/busses/i2c-keba.c
+++ b/drivers/i2c/busses/i2c-keba.c
@@ -500,7 +500,7 @@ static u32 ki2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm ki2c_algo = {
- .master_xfer = ki2c_xfer,
+ .xfer = ki2c_xfer,
.functionality = ki2c_func,
};
diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
index 5ef136c3ecb1..bc0f1a0c8ee1 100644
--- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
+++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
@@ -1048,7 +1048,7 @@ static u32 pci1xxxx_i2c_get_funcs(struct i2c_adapter *adap)
}
static const struct i2c_algorithm pci1xxxx_i2c_algo = {
- .master_xfer = pci1xxxx_i2c_xfer,
+ .xfer = pci1xxxx_i2c_xfer,
.functionality = pci1xxxx_i2c_get_funcs,
};
diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c
index e1d69537353b..0d9032953e48 100644
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -448,8 +448,8 @@ static u32 meson_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm meson_i2c_algorithm = {
- .master_xfer = meson_i2c_xfer,
- .master_xfer_atomic = meson_i2c_xfer_atomic,
+ .xfer = meson_i2c_xfer,
+ .xfer_atomic = meson_i2c_xfer_atomic,
.functionality = meson_i2c_func,
};
diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c
index 492bf4c34722..c8599733633e 100644
--- a/drivers/i2c/busses/i2c-microchip-corei2c.c
+++ b/drivers/i2c/busses/i2c-microchip-corei2c.c
@@ -435,6 +435,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned
u8 tx_buf[I2C_SMBUS_BLOCK_MAX + 2];
u8 rx_buf[I2C_SMBUS_BLOCK_MAX + 1];
int num_msgs = 1;
+ int ret;
msgs[CORE_I2C_SMBUS_MSG_WR].addr = addr;
msgs[CORE_I2C_SMBUS_MSG_WR].flags = 0;
@@ -505,7 +506,10 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned
return -EOPNOTSUPP;
}
- mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs);
+ ret = mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs);
+ if (ret < 0)
+ return ret;
+
if (read_write == I2C_SMBUS_WRITE || size <= I2C_SMBUS_BYTE_DATA)
return 0;
@@ -526,7 +530,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned
}
static const struct i2c_algorithm mchp_corei2c_algo = {
- .master_xfer = mchp_corei2c_xfer,
+ .xfer = mchp_corei2c_xfer,
.functionality = mchp_corei2c_func,
.smbus_xfer = mchp_corei2c_smbus_xfer,
};
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 5bd342047d59..ab456c3717db 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -1342,7 +1342,7 @@ static u32 mtk_i2c_functionality(struct i2c_adapter *adap)
}
static const struct i2c_algorithm mtk_i2c_algorithm = {
- .master_xfer = mtk_i2c_transfer,
+ .xfer = mtk_i2c_transfer,
.functionality = mtk_i2c_functionality,
};
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index ad62d56b2186..08c9091a1e35 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -687,7 +687,7 @@ static irqreturn_t mxs_i2c_isr(int this_irq, void *dev_id)
}
static const struct i2c_algorithm mxs_i2c_algo = {
- .master_xfer = mxs_i2c_xfer,
+ .xfer = mxs_i2c_xfer,
.functionality = mxs_i2c_func,
};
diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index d2877e4cc28d..19b648fc094d 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -996,8 +996,8 @@ static unsigned int nmk_i2c_functionality(struct i2c_adapter *adap)
}
static const struct i2c_algorithm nmk_i2c_algo = {
- .master_xfer = nmk_i2c_xfer,
- .functionality = nmk_i2c_functionality
+ .xfer = nmk_i2c_xfer,
+ .functionality = nmk_i2c_functionality
};
static void nmk_i2c_of_probe(struct device_node *np,
diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 892e2d2988a7..8b7e15240fb0 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -2470,11 +2470,11 @@ static const struct i2c_adapter_quirks npcm_i2c_quirks = {
};
static const struct i2c_algorithm npcm_i2c_algo = {
- .master_xfer = npcm_i2c_master_xfer,
+ .xfer = npcm_i2c_master_xfer,
.functionality = npcm_i2c_functionality,
#if IS_ENABLED(CONFIG_I2C_SLAVE)
- .reg_slave = npcm_i2c_reg_slave,
- .unreg_slave = npcm_i2c_unreg_slave,
+ .reg_slave = npcm_i2c_reg_slave,
+ .unreg_slave = npcm_i2c_unreg_slave,
#endif
};
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 876791d20ed5..5fcc9f6c33e5 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1201,9 +1201,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
}
static const struct i2c_algorithm omap_i2c_algo = {
- .master_xfer = omap_i2c_xfer_irq,
- .master_xfer_atomic = omap_i2c_xfer_polling,
- .functionality = omap_i2c_func,
+ .xfer = omap_i2c_xfer_irq,
+ .xfer_atomic = omap_i2c_xfer_polling,
+ .functionality = omap_i2c_func,
};
static const struct i2c_adapter_quirks omap_i2c_quirks = {
@@ -1461,18 +1461,20 @@ omap_i2c_probe(struct platform_device *pdev)
if (IS_ERR(mux_state)) {
r = PTR_ERR(mux_state);
dev_dbg(&pdev->dev, "failed to get I2C mux: %d\n", r);
- goto err_disable_pm;
+ goto err_put_pm;
}
omap->mux_state = mux_state;
r = mux_state_select(omap->mux_state);
if (r) {
dev_err(&pdev->dev, "failed to select I2C mux: %d\n", r);
- goto err_disable_pm;
+ goto err_put_pm;
}
}
/* reset ASAP, clearing any IRQs */
- omap_i2c_init(omap);
+ r = omap_i2c_init(omap);
+ if (r)
+ goto err_mux_state_deselect;
if (omap->rev < OMAP_I2C_OMAP1_REV_2)
r = devm_request_irq(&pdev->dev, omap->irq, omap_i2c_omap1_isr,
@@ -1515,9 +1517,13 @@ omap_i2c_probe(struct platform_device *pdev)
err_unuse_clocks:
omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
- pm_runtime_dont_use_autosuspend(omap->dev);
+err_mux_state_deselect:
+ if (omap->mux_state)
+ mux_state_deselect(omap->mux_state);
+err_put_pm:
pm_runtime_put_sync(omap->dev);
err_disable_pm:
+ pm_runtime_dont_use_autosuspend(omap->dev);
pm_runtime_disable(&pdev->dev);
return r;
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 9d3a4dc2bd60..ac3bb550303f 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -34,7 +34,7 @@
#include <linux/dmi.h>
#include <linux/acpi.h>
#include <linux/io.h>
-#include <asm/amd/fch.h>
+#include <linux/platform_data/x86/amd-fch.h>
#include "i2c-piix4.h"
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 9a1af5bbd604..8daa0008bd05 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -580,7 +580,7 @@ static u32 i2c_pnx_func(struct i2c_adapter *adapter)
}
static const struct i2c_algorithm pnx_algorithm = {
- .master_xfer = i2c_pnx_xfer,
+ .xfer = i2c_pnx_xfer,
.functionality = i2c_pnx_func,
};
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 4415a29f749b..968a8b8794da 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1154,11 +1154,11 @@ static u32 i2c_pxa_functionality(struct i2c_adapter *adap)
}
static const struct i2c_algorithm i2c_pxa_algorithm = {
- .master_xfer = i2c_pxa_xfer,
- .functionality = i2c_pxa_functionality,
+ .xfer = i2c_pxa_xfer,
+ .functionality = i2c_pxa_functionality,
#ifdef CONFIG_I2C_PXA_SLAVE
- .reg_slave = i2c_pxa_slave_reg,
- .unreg_slave = i2c_pxa_slave_unreg,
+ .reg_slave = i2c_pxa_slave_reg,
+ .unreg_slave = i2c_pxa_slave_unreg,
#endif
};
@@ -1244,11 +1244,11 @@ static int i2c_pxa_pio_xfer(struct i2c_adapter *adap,
}
static const struct i2c_algorithm i2c_pxa_pio_algorithm = {
- .master_xfer = i2c_pxa_pio_xfer,
- .functionality = i2c_pxa_functionality,
+ .xfer = i2c_pxa_pio_xfer,
+ .functionality = i2c_pxa_functionality,
#ifdef CONFIG_I2C_PXA_SLAVE
- .reg_slave = i2c_pxa_slave_reg,
- .unreg_slave = i2c_pxa_slave_unreg,
+ .reg_slave = i2c_pxa_slave_reg,
+ .unreg_slave = i2c_pxa_slave_unreg,
#endif
};
diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c
index 05b73326afd4..a3afa11a71a1 100644
--- a/drivers/i2c/busses/i2c-qcom-cci.c
+++ b/drivers/i2c/busses/i2c-qcom-cci.c
@@ -462,8 +462,8 @@ static u32 cci_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm cci_algo = {
- .master_xfer = cci_xfer,
- .functionality = cci_func,
+ .xfer = cci_xfer,
+ .functionality = cci_func,
};
static int cci_enable_clocks(struct cci *cci)
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index ccea575fb783..13889f52b6f7 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -727,8 +727,8 @@ static u32 geni_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm geni_i2c_algo = {
- .master_xfer = geni_i2c_xfer,
- .functionality = geni_i2c_func,
+ .xfer = geni_i2c_xfer,
+ .functionality = geni_i2c_func,
};
#ifdef CONFIG_ACPI
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 3a36d682ed57..fc348924d522 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -452,8 +452,10 @@ static int qup_i2c_bus_active(struct qup_i2c_dev *qup, int len)
if (!(status & I2C_STATUS_BUS_ACTIVE))
break;
- if (time_after(jiffies, timeout))
+ if (time_after(jiffies, timeout)) {
ret = -ETIMEDOUT;
+ break;
+ }
usleep_range(len, len * 2);
}
@@ -1634,13 +1636,13 @@ static u32 qup_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm qup_i2c_algo = {
- .master_xfer = qup_i2c_xfer,
- .functionality = qup_i2c_func,
+ .xfer = qup_i2c_xfer,
+ .functionality = qup_i2c_func,
};
static const struct i2c_algorithm qup_i2c_algo_v2 = {
- .master_xfer = qup_i2c_xfer_v2,
- .functionality = qup_i2c_func,
+ .xfer = qup_i2c_xfer_v2,
+ .functionality = qup_i2c_func,
};
/*
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 5693a38da7b5..d51884ab99f4 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -1084,11 +1084,11 @@ static u32 rcar_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm rcar_i2c_algo = {
- .master_xfer = rcar_i2c_master_xfer,
- .master_xfer_atomic = rcar_i2c_master_xfer_atomic,
- .functionality = rcar_i2c_func,
- .reg_slave = rcar_reg_slave,
- .unreg_slave = rcar_unreg_slave,
+ .xfer = rcar_i2c_master_xfer,
+ .xfer_atomic = rcar_i2c_master_xfer_atomic,
+ .functionality = rcar_i2c_func,
+ .reg_slave = rcar_reg_slave,
+ .unreg_slave = rcar_unreg_slave,
};
static const struct i2c_adapter_quirks rcar_i2c_quirks = {
diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c
index 80d45079b763..e0a76fb5bc31 100644
--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
+++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
@@ -111,6 +111,11 @@ static u32 osif_func(struct i2c_adapter *adapter)
return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
}
+/* prevent invalid 0-length usb_control_msg */
+static const struct i2c_adapter_quirks osif_quirks = {
+ .flags = I2C_AQ_NO_ZERO_LEN_READ,
+};
+
static const struct i2c_algorithm osif_algorithm = {
.xfer = osif_xfer,
.functionality = osif_func,
@@ -143,6 +148,7 @@ static int osif_probe(struct usb_interface *interface,
priv->adapter.owner = THIS_MODULE;
priv->adapter.class = I2C_CLASS_HWMON;
+ priv->adapter.quirks = &osif_quirks;
priv->adapter.algo = &osif_algorithm;
priv->adapter.algo_data = priv;
snprintf(priv->adapter.name, sizeof(priv->adapter.name),
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 0f3cf500df68..f4fa4703acbd 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -800,9 +800,9 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap)
/* i2c bus registration info */
static const struct i2c_algorithm s3c24xx_i2c_algorithm = {
- .master_xfer = s3c24xx_i2c_xfer,
- .master_xfer_atomic = s3c24xx_i2c_xfer_atomic,
- .functionality = s3c24xx_i2c_func,
+ .xfer = s3c24xx_i2c_xfer,
+ .xfer_atomic = s3c24xx_i2c_xfer_atomic,
+ .functionality = s3c24xx_i2c_func,
};
/*
diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
index 620f12596763..43f33988b98f 100644
--- a/drivers/i2c/busses/i2c-sh7760.c
+++ b/drivers/i2c/busses/i2c-sh7760.c
@@ -379,8 +379,8 @@ static u32 sh7760_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm sh7760_i2c_algo = {
- .master_xfer = sh7760_i2c_master_xfer,
- .functionality = sh7760_i2c_func,
+ .xfer = sh7760_i2c_master_xfer,
+ .functionality = sh7760_i2c_func,
};
/* calculate CCR register setting for a desired scl clock. SCL clock is
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index adfcee6c9fdc..dae8967f8749 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -740,8 +740,8 @@ static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter)
static const struct i2c_algorithm sh_mobile_i2c_algorithm = {
.functionality = sh_mobile_i2c_func,
- .master_xfer = sh_mobile_i2c_xfer,
- .master_xfer_atomic = sh_mobile_i2c_xfer_atomic,
+ .xfer = sh_mobile_i2c_xfer,
+ .xfer_atomic = sh_mobile_i2c_xfer_atomic,
};
static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = {
diff --git a/drivers/i2c/busses/i2c-stm32.c b/drivers/i2c/busses/i2c-stm32.c
index 157c64e27d0b..f84ec056e36d 100644
--- a/drivers/i2c/busses/i2c-stm32.c
+++ b/drivers/i2c/busses/i2c-stm32.c
@@ -102,7 +102,6 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma,
void *dma_async_param)
{
struct dma_async_tx_descriptor *txdesc;
- struct device *chan_dev;
int ret;
if (rd_wr) {
@@ -116,11 +115,10 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma,
}
dma->dma_len = len;
- chan_dev = dma->chan_using->device->dev;
- dma->dma_buf = dma_map_single(chan_dev, buf, dma->dma_len,
+ dma->dma_buf = dma_map_single(dev, buf, dma->dma_len,
dma->dma_data_dir);
- if (dma_mapping_error(chan_dev, dma->dma_buf)) {
+ if (dma_mapping_error(dev, dma->dma_buf)) {
dev_err(dev, "DMA mapping failed\n");
return -EINVAL;
}
@@ -150,7 +148,7 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma,
return 0;
err:
- dma_unmap_single(chan_dev, dma->dma_buf, dma->dma_len,
+ dma_unmap_single(dev, dma->dma_buf, dma->dma_len,
dma->dma_data_dir);
return ret;
}
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index 973a3a8c6d4a..73a7b8894c0d 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -739,12 +739,13 @@ static void stm32f7_i2c_disable_dma_req(struct stm32f7_i2c_dev *i2c_dev)
static void stm32f7_i2c_dma_callback(void *arg)
{
- struct stm32f7_i2c_dev *i2c_dev = (struct stm32f7_i2c_dev *)arg;
+ struct stm32f7_i2c_dev *i2c_dev = arg;
struct stm32_i2c_dma *dma = i2c_dev->dma;
- struct device *dev = dma->chan_using->device->dev;
stm32f7_i2c_disable_dma_req(i2c_dev);
- dma_unmap_single(dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir);
+ dmaengine_terminate_async(dma->chan_using);
+ dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len,
+ dma->dma_data_dir);
complete(&dma->dma_complete);
}
@@ -1510,7 +1511,6 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev,
u16 addr = f7_msg->addr;
void __iomem *base = i2c_dev->base;
struct device *dev = i2c_dev->dev;
- struct stm32_i2c_dma *dma = i2c_dev->dma;
/* Bus error */
if (status & STM32F7_I2C_ISR_BERR) {
@@ -1551,10 +1551,8 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev,
}
/* Disable dma */
- if (i2c_dev->use_dma) {
- stm32f7_i2c_disable_dma_req(i2c_dev);
- dmaengine_terminate_async(dma->chan_using);
- }
+ if (i2c_dev->use_dma)
+ stm32f7_i2c_dma_callback(i2c_dev);
i2c_dev->master_mode = false;
complete(&i2c_dev->complete);
@@ -1600,7 +1598,6 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data)
{
struct stm32f7_i2c_dev *i2c_dev = data;
struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
- struct stm32_i2c_dma *dma = i2c_dev->dma;
void __iomem *base = i2c_dev->base;
u32 status, mask;
int ret;
@@ -1619,10 +1616,8 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data)
dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n",
__func__, f7_msg->addr);
writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR);
- if (i2c_dev->use_dma) {
- stm32f7_i2c_disable_dma_req(i2c_dev);
- dmaengine_terminate_async(dma->chan_using);
- }
+ if (i2c_dev->use_dma)
+ stm32f7_i2c_dma_callback(i2c_dev);
f7_msg->result = -ENXIO;
}
@@ -1640,8 +1635,7 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data)
ret = wait_for_completion_timeout(&i2c_dev->dma->dma_complete, HZ);
if (!ret) {
dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__);
- stm32f7_i2c_disable_dma_req(i2c_dev);
- dmaengine_terminate_async(dma->chan_using);
+ stm32f7_i2c_dma_callback(i2c_dev);
f7_msg->result = -ETIMEDOUT;
}
}
@@ -2151,8 +2145,8 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm stm32f7_i2c_algo = {
- .master_xfer = stm32f7_i2c_xfer,
- .master_xfer_atomic = stm32f7_i2c_xfer_atomic,
+ .xfer = stm32f7_i2c_xfer,
+ .xfer_atomic = stm32f7_i2c_xfer_atomic,
.smbus_xfer = stm32f7_i2c_smbus_xfer,
.functionality = stm32f7_i2c_func,
.reg_slave = stm32f7_i2c_reg_slave,
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index 31f8d08e32a4..1230f51e1624 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -520,8 +520,8 @@ static u32 synquacer_i2c_functionality(struct i2c_adapter *adap)
}
static const struct i2c_algorithm synquacer_i2c_algo = {
- .master_xfer = synquacer_i2c_xfer,
- .functionality = synquacer_i2c_functionality,
+ .xfer = synquacer_i2c_xfer,
+ .functionality = synquacer_i2c_functionality,
};
static const struct i2c_adapter synquacer_i2c_ops = {
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 049b4d154c23..6316360475e7 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -607,7 +607,6 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev)
static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
{
u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode;
- acpi_handle handle = ACPI_HANDLE(i2c_dev->dev);
struct i2c_timings *t = &i2c_dev->timings;
int err;
@@ -619,11 +618,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
* emit a noisy warning on error, which won't stay unnoticed and
* won't hose machine entirely.
*/
- if (handle)
- err = acpi_evaluate_object(handle, "_RST", NULL, NULL);
- else
- err = reset_control_reset(i2c_dev->rst);
-
+ err = device_reset(i2c_dev->dev);
WARN_ON_ONCE(err);
if (IS_DVC(i2c_dev))
@@ -1440,9 +1435,9 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm tegra_i2c_algo = {
- .master_xfer = tegra_i2c_xfer,
- .master_xfer_atomic = tegra_i2c_xfer_atomic,
- .functionality = tegra_i2c_func,
+ .xfer = tegra_i2c_xfer,
+ .xfer_atomic = tegra_i2c_xfer_atomic,
+ .functionality = tegra_i2c_func,
};
/* payload size is only 12 bit */
@@ -1666,19 +1661,6 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev)
i2c_dev->is_vi = true;
}
-static int tegra_i2c_init_reset(struct tegra_i2c_dev *i2c_dev)
-{
- if (ACPI_HANDLE(i2c_dev->dev))
- return 0;
-
- i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c");
- if (IS_ERR(i2c_dev->rst))
- return dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst),
- "failed to get reset control\n");
-
- return 0;
-}
-
static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev)
{
int err;
@@ -1788,10 +1770,6 @@ static int tegra_i2c_probe(struct platform_device *pdev)
tegra_i2c_parse_dt(i2c_dev);
- err = tegra_i2c_init_reset(i2c_dev);
- if (err)
- return err;
-
err = tegra_i2c_init_clocks(i2c_dev);
if (err)
return err;
diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index a18eab0992a1..57dfe5f1a7d9 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -139,6 +139,11 @@ out:
return ret;
}
+/* prevent invalid 0-length usb_control_msg */
+static const struct i2c_adapter_quirks usb_quirks = {
+ .flags = I2C_AQ_NO_ZERO_LEN_READ,
+};
+
/* This is the actual algorithm we define */
static const struct i2c_algorithm usb_algorithm = {
.xfer = usb_xfer,
@@ -247,6 +252,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface,
/* setup i2c adapter description */
dev->adapter.owner = THIS_MODULE;
dev->adapter.class = I2C_CLASS_HWMON;
+ dev->adapter.quirks = &usb_quirks;
dev->adapter.algo = &usb_algorithm;
dev->adapter.algo_data = dev;
snprintf(dev->adapter.name, sizeof(dev->adapter.name),
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
index 9b05ff53d3d7..af1381949f50 100644
--- a/drivers/i2c/busses/i2c-virtio.c
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -116,15 +116,16 @@ static int virtio_i2c_complete_reqs(struct virtqueue *vq,
for (i = 0; i < num; i++) {
struct virtio_i2c_req *req = &reqs[i];
- wait_for_completion(&req->completion);
-
- if (!failed && req->in_hdr.status != VIRTIO_I2C_MSG_OK)
- failed = true;
+ if (!failed) {
+ if (wait_for_completion_interruptible(&req->completion))
+ failed = true;
+ else if (req->in_hdr.status != VIRTIO_I2C_MSG_OK)
+ failed = true;
+ else
+ j++;
+ }
i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], !failed);
-
- if (!failed)
- j++;
}
return j;
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 6bc1575cea6c..607026c921d6 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -1398,8 +1398,8 @@ static u32 xiic_func(struct i2c_adapter *adap)
}
static const struct i2c_algorithm xiic_algorithm = {
- .master_xfer = xiic_xfer,
- .master_xfer_atomic = xiic_xfer_atomic,
+ .xfer = xiic_xfer,
+ .xfer_atomic = xiic_xfer_atomic,
.functionality = xiic_func,
};
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index 4d5e49b6321b..ddb1c3e8bc9d 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -452,7 +452,7 @@ static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter)
}
static const struct i2c_algorithm xlp9xx_i2c_algo = {
- .master_xfer = xlp9xx_i2c_xfer,
+ .xfer = xlp9xx_i2c_xfer,
.functionality = xlp9xx_i2c_functionality,
};
diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c
index be7d6d41e0b2..dd194476b118 100644
--- a/drivers/i2c/i2c-atr.c
+++ b/drivers/i2c/i2c-atr.c
@@ -738,7 +738,7 @@ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev,
atr->flags = flags;
if (parent->algo->master_xfer)
- atr->algo.master_xfer = i2c_atr_master_xfer;
+ atr->algo.xfer = i2c_atr_master_xfer;
if (parent->algo->smbus_xfer)
atr->algo.smbus_xfer = i2c_atr_smbus_xfer;
atr->algo.functionality = i2c_atr_functionality;
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index fda72e8be885..4d8690981a55 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -293,12 +293,12 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc,
*/
if (parent->algo->master_xfer) {
if (muxc->mux_locked)
- priv->algo.master_xfer = i2c_mux_master_xfer;
+ priv->algo.xfer = i2c_mux_master_xfer;
else
- priv->algo.master_xfer = __i2c_mux_master_xfer;
+ priv->algo.xfer = __i2c_mux_master_xfer;
}
if (parent->algo->master_xfer_atomic)
- priv->algo.master_xfer_atomic = priv->algo.master_xfer;
+ priv->algo.xfer_atomic = priv->algo.master_xfer;
if (parent->algo->smbus_xfer) {
if (muxc->mux_locked)
diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
index 77a740561fd7..f2a1f4744978 100644
--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
+++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
@@ -95,9 +95,9 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
priv->cur_chan = new_chan;
/* Now fill out current adapter structure. cur_chan must be up to date */
- priv->algo.master_xfer = i2c_demux_master_xfer;
+ priv->algo.xfer = i2c_demux_master_xfer;
if (adap->algo->master_xfer_atomic)
- priv->algo.master_xfer_atomic = i2c_demux_master_xfer;
+ priv->algo.xfer_atomic = i2c_demux_master_xfer;
priv->algo.functionality = i2c_demux_functionality;
snprintf(priv->cur_adap.name, sizeof(priv->cur_adap.name),
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 8ccb483204fa..73747d20df85 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -152,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev,
int index, bool irqoff)
{
struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
- unsigned long ecx = 1*irqoff; /* break on interrupt flag */
+ unsigned int eax = flg2MWAIT(state->flags);
+ unsigned int ecx = 1*irqoff; /* break on interrupt flag */
mwait_idle_with_hints(eax, ecx);
@@ -226,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- unsigned long ecx = 1; /* break on interrupt flag */
struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
+ unsigned int eax = flg2MWAIT(state->flags);
+ unsigned int ecx = 1; /* break on interrupt flag */
if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
fpu_idle_fpregs();
@@ -2507,6 +2507,8 @@ static int __init intel_idle_init(void)
pr_debug("Local APIC timer is reliable in %s\n",
boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
+ arch_cpu_rescan_dead_smt_siblings();
+
return 0;
hp_setup_fail:
@@ -2518,7 +2520,7 @@ init_driver_fail:
return retval;
}
-device_initcall(intel_idle_init);
+subsys_initcall_sync(intel_idle_init);
/*
* We are not really modular, but we used to support that. Meaning we also
diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
index 12598feaa693..b10a30960e1e 100644
--- a/drivers/iio/accel/fxls8962af-core.c
+++ b/drivers/iio/accel/fxls8962af-core.c
@@ -877,6 +877,8 @@ static int fxls8962af_buffer_predisable(struct iio_dev *indio_dev)
if (ret)
return ret;
+ synchronize_irq(data->irq);
+
ret = __fxls8962af_fifo_set_mode(data, false);
if (data->enable_event)
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 99cb661fabb2..a7961c610ed2 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
union acpi_object *ont;
union acpi_object *elements;
acpi_status status;
+ struct device *parent = indio_dev->dev.parent;
int ret = -EINVAL;
unsigned int val;
int i, j;
@@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
};
- adev = ACPI_COMPANION(indio_dev->dev.parent);
+ adev = ACPI_COMPANION(parent);
if (!adev)
return -ENXIO;
@@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
if (status == AE_NOT_FOUND) {
return -ENXIO;
} else if (ACPI_FAILURE(status)) {
- dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
- status);
+ dev_warn(parent, "failed to execute _ONT: %d\n", status);
return status;
}
@@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
}
ret = 0;
- dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+ dev_info(parent, "computed mount matrix from ACPI\n");
out:
kfree(buffer.pointer);
if (ret)
- dev_dbg(&indio_dev->dev,
+ dev_dbg(parent,
"failed to apply ACPI orientation data: %d\n", ret);
return ret;
diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c
index d96bd12dfea6..cabf5511d116 100644
--- a/drivers/iio/adc/ad7380.c
+++ b/drivers/iio/adc/ad7380.c
@@ -1953,8 +1953,9 @@ static int ad7380_probe(struct spi_device *spi)
if (st->chip_info->has_hardware_gain) {
device_for_each_child_node_scoped(dev, node) {
- unsigned int channel, gain;
+ unsigned int channel;
int gain_idx;
+ u16 gain;
ret = fwnode_property_read_u32(node, "reg", &channel);
if (ret)
@@ -1966,7 +1967,7 @@ static int ad7380_probe(struct spi_device *spi)
"Invalid channel number %i\n",
channel);
- ret = fwnode_property_read_u32(node, "adi,gain-milli",
+ ret = fwnode_property_read_u16(node, "adi,gain-milli",
&gain);
if (ret && ret != -EINVAL)
return dev_err_probe(dev, ret,
diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c
index edd0c3a35ab7..202561cad401 100644
--- a/drivers/iio/adc/ad7949.c
+++ b/drivers/iio/adc/ad7949.c
@@ -308,7 +308,6 @@ static void ad7949_disable_reg(void *reg)
static int ad7949_spi_probe(struct spi_device *spi)
{
- u32 spi_ctrl_mask = spi->controller->bits_per_word_mask;
struct device *dev = &spi->dev;
const struct ad7949_adc_spec *spec;
struct ad7949_adc_chip *ad7949_adc;
@@ -337,11 +336,11 @@ static int ad7949_spi_probe(struct spi_device *spi)
ad7949_adc->resolution = spec->resolution;
/* Set SPI bits per word */
- if (spi_ctrl_mask & SPI_BPW_MASK(ad7949_adc->resolution)) {
+ if (spi_is_bpw_supported(spi, ad7949_adc->resolution)) {
spi->bits_per_word = ad7949_adc->resolution;
- } else if (spi_ctrl_mask == SPI_BPW_MASK(16)) {
+ } else if (spi_is_bpw_supported(spi, 16)) {
spi->bits_per_word = 16;
- } else if (spi_ctrl_mask == SPI_BPW_MASK(8)) {
+ } else if (spi_is_bpw_supported(spi, 8)) {
spi->bits_per_word = 8;
} else {
dev_err(dev, "unable to find common BPW with spi controller\n");
diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c
index 4116c44197b8..2dbaa0b5b3d6 100644
--- a/drivers/iio/adc/adi-axi-adc.c
+++ b/drivers/iio/adc/adi-axi-adc.c
@@ -445,7 +445,7 @@ static int axi_adc_raw_read(struct iio_backend *back, u32 *val)
static int ad7606_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val)
{
struct adi_axi_adc_state *st = iio_backend_get_priv(back);
- int addr;
+ u32 addr, reg_val;
guard(mutex)(&st->lock);
@@ -455,7 +455,9 @@ static int ad7606_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val)
*/
addr = FIELD_PREP(ADI_AXI_REG_ADDRESS_MASK, reg) | ADI_AXI_REG_READ_BIT;
axi_adc_raw_write(back, addr);
- axi_adc_raw_read(back, val);
+ axi_adc_raw_read(back, &reg_val);
+
+ *val = FIELD_GET(ADI_AXI_REG_VALUE_MASK, reg_val);
/* Write 0x0 on the bus to get back to ADC mode */
axi_adc_raw_write(back, 0);
diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c
index 71584ffd3632..1b49325ec1ce 100644
--- a/drivers/iio/adc/axp20x_adc.c
+++ b/drivers/iio/adc/axp20x_adc.c
@@ -187,6 +187,7 @@ static struct iio_map axp717_maps[] = {
.consumer_channel = "batt_chrg_i",
.adc_channel_label = "batt_chrg_i",
},
+ { }
};
/*
diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c
index a7e9912fb44a..9dd547e62b6c 100644
--- a/drivers/iio/adc/max1363.c
+++ b/drivers/iio/adc/max1363.c
@@ -511,10 +511,10 @@ static const struct iio_event_spec max1363_events[] = {
MAX1363_CHAN_U(1, _s1, 1, bits, ev_spec, num_ev_spec), \
MAX1363_CHAN_U(2, _s2, 2, bits, ev_spec, num_ev_spec), \
MAX1363_CHAN_U(3, _s3, 3, bits, ev_spec, num_ev_spec), \
- MAX1363_CHAN_B(0, 1, d0m1, 4, bits, ev_spec, num_ev_spec), \
- MAX1363_CHAN_B(2, 3, d2m3, 5, bits, ev_spec, num_ev_spec), \
- MAX1363_CHAN_B(1, 0, d1m0, 6, bits, ev_spec, num_ev_spec), \
- MAX1363_CHAN_B(3, 2, d3m2, 7, bits, ev_spec, num_ev_spec), \
+ MAX1363_CHAN_B(0, 1, d0m1, 12, bits, ev_spec, num_ev_spec), \
+ MAX1363_CHAN_B(2, 3, d2m3, 13, bits, ev_spec, num_ev_spec), \
+ MAX1363_CHAN_B(1, 0, d1m0, 18, bits, ev_spec, num_ev_spec), \
+ MAX1363_CHAN_B(3, 2, d3m2, 19, bits, ev_spec, num_ev_spec), \
IIO_CHAN_SOFT_TIMESTAMP(8) \
}
@@ -532,23 +532,23 @@ static const struct iio_chan_spec max1363_channels[] =
/* Applies to max1236, max1237 */
static const enum max1363_modes max1236_mode_list[] = {
_s0, _s1, _s2, _s3,
- s0to1, s0to2, s0to3,
+ s0to1, s0to2, s2to3, s0to3,
d0m1, d2m3, d1m0, d3m2,
d0m1to2m3, d1m0to3m2,
- s2to3,
};
/* Applies to max1238, max1239 */
static const enum max1363_modes max1238_mode_list[] = {
_s0, _s1, _s2, _s3, _s4, _s5, _s6, _s7, _s8, _s9, _s10, _s11,
s0to1, s0to2, s0to3, s0to4, s0to5, s0to6,
+ s6to7, s6to8, s6to9, s6to10, s6to11,
s0to7, s0to8, s0to9, s0to10, s0to11,
d0m1, d2m3, d4m5, d6m7, d8m9, d10m11,
d1m0, d3m2, d5m4, d7m6, d9m8, d11m10,
- d0m1to2m3, d0m1to4m5, d0m1to6m7, d0m1to8m9, d0m1to10m11,
- d1m0to3m2, d1m0to5m4, d1m0to7m6, d1m0to9m8, d1m0to11m10,
- s6to7, s6to8, s6to9, s6to10, s6to11,
- d6m7to8m9, d6m7to10m11, d7m6to9m8, d7m6to11m10,
+ d0m1to2m3, d0m1to4m5, d0m1to6m7, d6m7to8m9,
+ d0m1to8m9, d6m7to10m11, d0m1to10m11, d1m0to3m2,
+ d1m0to5m4, d1m0to7m6, d7m6to9m8, d1m0to9m8,
+ d7m6to11m10, d1m0to11m10,
};
#define MAX1363_12X_CHANS(bits) { \
@@ -584,16 +584,15 @@ static const struct iio_chan_spec max1238_channels[] = MAX1363_12X_CHANS(12);
static const enum max1363_modes max11607_mode_list[] = {
_s0, _s1, _s2, _s3,
- s0to1, s0to2, s0to3,
- s2to3,
+ s0to1, s0to2, s2to3,
+ s0to3,
d0m1, d2m3, d1m0, d3m2,
d0m1to2m3, d1m0to3m2,
};
static const enum max1363_modes max11608_mode_list[] = {
_s0, _s1, _s2, _s3, _s4, _s5, _s6, _s7,
- s0to1, s0to2, s0to3, s0to4, s0to5, s0to6, s0to7,
- s6to7,
+ s0to1, s0to2, s0to3, s0to4, s0to5, s0to6, s6to7, s0to7,
d0m1, d2m3, d4m5, d6m7,
d1m0, d3m2, d5m4, d7m6,
d0m1to2m3, d0m1to4m5, d0m1to6m7,
@@ -609,14 +608,14 @@ static const enum max1363_modes max11608_mode_list[] = {
MAX1363_CHAN_U(5, _s5, 5, bits, NULL, 0), \
MAX1363_CHAN_U(6, _s6, 6, bits, NULL, 0), \
MAX1363_CHAN_U(7, _s7, 7, bits, NULL, 0), \
- MAX1363_CHAN_B(0, 1, d0m1, 8, bits, NULL, 0), \
- MAX1363_CHAN_B(2, 3, d2m3, 9, bits, NULL, 0), \
- MAX1363_CHAN_B(4, 5, d4m5, 10, bits, NULL, 0), \
- MAX1363_CHAN_B(6, 7, d6m7, 11, bits, NULL, 0), \
- MAX1363_CHAN_B(1, 0, d1m0, 12, bits, NULL, 0), \
- MAX1363_CHAN_B(3, 2, d3m2, 13, bits, NULL, 0), \
- MAX1363_CHAN_B(5, 4, d5m4, 14, bits, NULL, 0), \
- MAX1363_CHAN_B(7, 6, d7m6, 15, bits, NULL, 0), \
+ MAX1363_CHAN_B(0, 1, d0m1, 12, bits, NULL, 0), \
+ MAX1363_CHAN_B(2, 3, d2m3, 13, bits, NULL, 0), \
+ MAX1363_CHAN_B(4, 5, d4m5, 14, bits, NULL, 0), \
+ MAX1363_CHAN_B(6, 7, d6m7, 15, bits, NULL, 0), \
+ MAX1363_CHAN_B(1, 0, d1m0, 18, bits, NULL, 0), \
+ MAX1363_CHAN_B(3, 2, d3m2, 19, bits, NULL, 0), \
+ MAX1363_CHAN_B(5, 4, d5m4, 20, bits, NULL, 0), \
+ MAX1363_CHAN_B(7, 6, d7m6, 21, bits, NULL, 0), \
IIO_CHAN_SOFT_TIMESTAMP(16) \
}
static const struct iio_chan_spec max11602_channels[] = MAX1363_8X_CHANS(8);
diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
index bd3458965bff..21c04a98b3b6 100644
--- a/drivers/iio/adc/stm32-adc-core.c
+++ b/drivers/iio/adc/stm32-adc-core.c
@@ -430,10 +430,9 @@ static int stm32_adc_irq_probe(struct platform_device *pdev,
return -ENOMEM;
}
- for (i = 0; i < priv->cfg->num_irqs; i++) {
- irq_set_chained_handler(priv->irq[i], stm32_adc_irq_handler);
- irq_set_handler_data(priv->irq[i], priv);
- }
+ for (i = 0; i < priv->cfg->num_irqs; i++)
+ irq_set_chained_handler_and_data(priv->irq[i],
+ stm32_adc_irq_handler, priv);
return 0;
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8ce1dccfea4f..dac593be5695 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
return err;
st_accel_set_fullscale_error:
- dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+ dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n");
return err;
}
@@ -231,8 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
ARRAY_SIZE(regulator_names),
regulator_names);
if (err)
- return dev_err_probe(&indio_dev->dev, err,
- "unable to enable supplies\n");
+ return dev_err_probe(parent, err, "unable to enable supplies\n");
return 0;
}
@@ -241,13 +240,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS");
static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
/* Sensor does not support interrupts */
if (!sdata->sensor_settings->drdy_irq.int1.addr &&
!sdata->sensor_settings->drdy_irq.int2.addr) {
if (pdata->drdy_int_pin)
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"DRDY on pin INT%d specified, but sensor does not support interrupts\n",
pdata->drdy_int_pin);
return 0;
@@ -256,29 +256,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
switch (pdata->drdy_int_pin) {
case 1:
if (!sdata->sensor_settings->drdy_irq.int1.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT1 not available.\n");
+ dev_err(parent, "DRDY on INT1 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 1;
break;
case 2:
if (!sdata->sensor_settings->drdy_irq.int2.mask) {
- dev_err(&indio_dev->dev,
- "DRDY on INT2 not available.\n");
+ dev_err(parent, "DRDY on INT2 not available.\n");
return -EINVAL;
}
sdata->drdy_int_pin = 2;
break;
default:
- dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n");
+ dev_err(parent, "DRDY on pdata not valid.\n");
return -EINVAL;
}
if (pdata->open_drain) {
if (!sdata->sensor_settings->drdy_irq.int1.addr_od &&
!sdata->sensor_settings->drdy_irq.int2.addr_od)
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"open drain requested but unsupported.\n");
else
sdata->int_pin_open_drain = true;
@@ -336,6 +334,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS");
int st_sensors_init_sensor(struct iio_dev *indio_dev,
struct st_sensors_platform_data *pdata)
{
+ struct device *parent = indio_dev->dev.parent;
struct st_sensor_data *sdata = iio_priv(indio_dev);
struct st_sensors_platform_data *of_pdata;
int err = 0;
@@ -343,7 +342,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mutex_init(&sdata->odr_lock);
/* If OF/DT pdata exists, it will take precedence of anything else */
- of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ of_pdata = st_sensors_dev_probe(parent, pdata);
if (IS_ERR(of_pdata))
return PTR_ERR(of_pdata);
if (of_pdata)
@@ -370,7 +369,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
if (err < 0)
return err;
} else
- dev_info(&indio_dev->dev, "Full-scale not possible\n");
+ dev_info(parent, "Full-scale not possible\n");
err = st_sensors_set_odr(indio_dev, sdata->odr);
if (err < 0)
@@ -405,7 +404,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
mask = sdata->sensor_settings->drdy_irq.int2.mask_od;
}
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"set interrupt line to open drain mode on pin %d\n",
sdata->drdy_int_pin);
err = st_sensors_write_data_with_mask(indio_dev, addr,
@@ -593,21 +592,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS");
int st_sensors_verify_id(struct iio_dev *indio_dev)
{
struct st_sensor_data *sdata = iio_priv(indio_dev);
+ struct device *parent = indio_dev->dev.parent;
int wai, err;
if (sdata->sensor_settings->wai_addr) {
err = regmap_read(sdata->regmap,
sdata->sensor_settings->wai_addr, &wai);
if (err < 0) {
- dev_err(&indio_dev->dev,
- "failed to read Who-Am-I register.\n");
- return err;
+ return dev_err_probe(parent, err,
+ "failed to read Who-Am-I register.\n");
}
if (sdata->sensor_settings->wai != wai) {
- dev_warn(&indio_dev->dev,
- "%s: WhoAmI mismatch (0x%x).\n",
- indio_dev->name, wai);
+ dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n",
+ indio_dev->name, wai);
}
}
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 9d4bf822a15d..8a8ab688d798 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
indio_dev->name);
if (sdata->trig == NULL) {
- dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+ dev_err(parent, "failed to allocate iio trigger.\n");
return -ENOMEM;
}
@@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
case IRQF_TRIGGER_FALLING:
case IRQF_TRIGGER_LOW:
if (!sdata->sensor_settings->drdy_irq.addr_ihl) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n");
if (irq_trig == IRQF_TRIGGER_FALLING)
irq_trig = IRQF_TRIGGER_RISING;
@@ -156,21 +156,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->sensor_settings->drdy_irq.mask_ihl, 1);
if (err < 0)
return err;
- dev_info(&indio_dev->dev,
+ dev_info(parent,
"interrupts on the falling edge or active low level\n");
}
break;
case IRQF_TRIGGER_RISING:
- dev_info(&indio_dev->dev,
- "interrupts on the rising edge\n");
+ dev_info(parent, "interrupts on the rising edge\n");
break;
case IRQF_TRIGGER_HIGH:
- dev_info(&indio_dev->dev,
- "interrupts active high level\n");
+ dev_info(parent, "interrupts active high level\n");
break;
default:
/* This is the most preferred mode, if possible */
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig);
irq_trig = IRQF_TRIGGER_RISING;
}
@@ -179,7 +177,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
if (irq_trig == IRQF_TRIGGER_FALLING ||
irq_trig == IRQF_TRIGGER_RISING) {
if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
- dev_err(&indio_dev->dev,
+ dev_err(parent,
"edge IRQ not supported w/o stat register.\n");
return -EOPNOTSUPP;
}
@@ -214,13 +212,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
sdata->trig->name,
sdata->trig);
if (err) {
- dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
+ dev_err(parent, "failed to request trigger IRQ.\n");
return err;
}
err = devm_iio_trigger_register(parent, sdata->trig);
if (err < 0) {
- dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+ dev_err(parent, "failed to register iio trigger.\n");
return err;
}
indio_dev->trig = iio_trigger_get(sdata->trig);
diff --git a/drivers/iio/dac/ad3530r.c b/drivers/iio/dac/ad3530r.c
index f9752a571aa5..6134613777b8 100644
--- a/drivers/iio/dac/ad3530r.c
+++ b/drivers/iio/dac/ad3530r.c
@@ -166,7 +166,9 @@ static ssize_t ad3530r_set_dac_powerdown(struct iio_dev *indio_dev,
AD3530R_OUTPUT_OPERATING_MODE_0 :
AD3530R_OUTPUT_OPERATING_MODE_1;
pdmode = powerdown ? st->chan[chan->channel].powerdown_mode : 0;
- mask = AD3530R_OP_MODE_CHAN_MSK(chan->channel);
+ mask = chan->channel < AD3531R_MAX_CHANNELS ?
+ AD3530R_OP_MODE_CHAN_MSK(chan->channel) :
+ AD3530R_OP_MODE_CHAN_MSK(chan->channel - 4);
val = field_prep(mask, pdmode);
ret = regmap_update_bits(st->regmap, reg, mask, val);
diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c
index c1eb9ef9db08..266e1b29bf91 100644
--- a/drivers/iio/industrialio-backend.c
+++ b/drivers/iio/industrialio-backend.c
@@ -155,11 +155,14 @@ static ssize_t iio_backend_debugfs_write_reg(struct file *file,
ssize_t rc;
int ret;
+ if (count >= sizeof(buf))
+ return -ENOSPC;
+
rc = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf, count);
if (rc < 0)
return rc;
- buf[count] = '\0';
+ buf[rc] = '\0';
ret = sscanf(buf, "%i %i", &back->cached_reg_addr, &val);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 178e99b111de..5ffda104d4b2 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -411,12 +411,15 @@ static ssize_t iio_debugfs_write_reg(struct file *file,
char buf[80];
int ret;
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf,
count);
if (ret < 0)
return ret;
- buf[count] = '\0';
+ buf[ret] = '\0';
ret = sscanf(buf, "%i %i", &reg, &val);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 9979a351577f..81cf3c902e81 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
out_unlock:
mutex_unlock(&table->lock);
if (ret)
- pr_warn("%s: unable to add gid %pI6 error=%d\n",
- __func__, gid->raw, ret);
+ pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
return ret;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index c752ae9fad6c..b1c44ec1a3f3 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
end = ALIGN(end, page_size);
if (unlikely(end < page_size))
return -EOVERFLOW;
+ /*
+ * The mmu notifier can be called within reclaim contexts and takes the
+ * umem_mutex. This is rare to trigger in testing, teach lockdep about
+ * it.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ mutex_lock(&umem_odp->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
+ fs_reclaim_release(GFP_KERNEL);
+ }
nr_entries = (end - start) >> PAGE_SHIFT;
if (!(nr_entries * PAGE_SIZE / page_size))
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index b847084dcd99..a506fafd2b15 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
- ret = mlx5_lag_query_cong_counters(dev->mdev,
+ ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 2479da8620ca..843dcd312242 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
@@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
- struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
+ struct mlx5_async_cmd *async_cmd;
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
int head = 0;
int tail = 0;
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
@@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
+
+ kfree(async_cmd);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ce7610740412..df6557ddbdfc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+}
+
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
return true;
unbind:
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 57f9bc2a4a3a..bd35e75d9ce5 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
- bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
- !to_ib_umem_dmabuf(mr->umem)->pinned;
- bool from_cache = !!ent;
- int ret = 0;
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
- dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
@@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);
- goto out;
+ return 0;
}
if (ent) {
@@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
ret = destroy_mkey(dev, mr);
-out:
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2075,9 +2108,9 @@ out:
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
- dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
-
return ret;
}
@@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
+ rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index eaa2f9f5f3a9..f6abd64f07f7 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
}
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
- __xa_erase(&mr_to_mdev(mr)->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
- ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL);
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
__xa_erase(&imr->implicit_children, idx);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1378651735f6..23ed2fc688f0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev,
target_host->max_id = 1;
target_host->max_lun = -1LL;
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
- target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
- if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
+ if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
+ target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
+ else
target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
target = host_to_target(target_host);
diff --git a/drivers/input/joystick/fsia6b.c b/drivers/input/joystick/fsia6b.c
index 76ffdec5c183..7e3bc99d766f 100644
--- a/drivers/input/joystick/fsia6b.c
+++ b/drivers/input/joystick/fsia6b.c
@@ -149,7 +149,7 @@ static int fsia6b_serio_connect(struct serio *serio, struct serio_driver *drv)
}
fsia6b->dev = input_dev;
- snprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys);
+ scnprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys);
input_dev->name = DRIVER_DESC;
input_dev->phys = fsia6b->phys;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index c066a4da7c14..1d8c579b5433 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -172,6 +172,7 @@ static const struct xpad_device {
{ 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX },
{ 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX },
{ 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 },
+ { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX360 },
{ 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 },
{ 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX },
{ 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX },
@@ -524,6 +525,7 @@ static const struct usb_device_id xpad_table[] = {
XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */
XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */
XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */
+ XPAD_XBOX360_VENDOR(0x0502), /* Acer Inc. Xbox 360 style controllers */
XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */
XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */
XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */
@@ -1344,11 +1346,12 @@ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
if (error) {
- dev_err(&xpad->intf->dev,
- "%s - usb_submit_urb failed with result %d\n",
- __func__, error);
+ if (error != -ENODEV)
+ dev_err(&xpad->intf->dev,
+ "%s - usb_submit_urb failed with result %d\n",
+ __func__, error);
usb_unanchor_urb(xpad->irq_out);
- return -EIO;
+ return error;
}
xpad->irq_out_active = true;
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 3ff2fcf05ad5..c9e1127578b9 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -1191,8 +1191,8 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd)
"AT %s Set %d keyboard",
atkbd->translated ? "Translated" : "Raw", atkbd->set);
- snprintf(atkbd->phys, sizeof(atkbd->phys),
- "%s/input0", atkbd->ps2dev.serio->phys);
+ scnprintf(atkbd->phys, sizeof(atkbd->phys),
+ "%s/input0", atkbd->ps2dev.serio->phys);
input_dev->name = atkbd->name;
input_dev->phys = atkbd->phys;
diff --git a/drivers/input/misc/cs40l50-vibra.c b/drivers/input/misc/cs40l50-vibra.c
index dce3b0ec8cf3..330f09123631 100644
--- a/drivers/input/misc/cs40l50-vibra.c
+++ b/drivers/input/misc/cs40l50-vibra.c
@@ -238,6 +238,8 @@ static int cs40l50_upload_owt(struct cs40l50_work *work_data)
header.data_words = len / sizeof(u32);
new_owt_effect_data = kmalloc(sizeof(header) + len, GFP_KERNEL);
+ if (!new_owt_effect_data)
+ return -ENOMEM;
memcpy(new_owt_effect_data, &header, sizeof(header));
memcpy(new_owt_effect_data + sizeof(header), work_data->custom_data, len);
diff --git a/drivers/input/misc/gpio-beeper.c b/drivers/input/misc/gpio-beeper.c
index d2d2954e2f79..3d65cb4f4ef3 100644
--- a/drivers/input/misc/gpio-beeper.c
+++ b/drivers/input/misc/gpio-beeper.c
@@ -94,7 +94,7 @@ static int gpio_beeper_probe(struct platform_device *pdev)
#ifdef CONFIG_OF
static const struct of_device_id gpio_beeper_of_match[] = {
- { .compatible = BEEPER_MODNAME, },
+ { .compatible = "gpio-beeper", },
{ }
};
MODULE_DEVICE_TABLE(of, gpio_beeper_of_match);
diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c
index 7a6e6927f331..7fba4a8edceb 100644
--- a/drivers/input/misc/iqs626a.c
+++ b/drivers/input/misc/iqs626a.c
@@ -771,7 +771,7 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626,
u8 *thresh = &sys_reg->tp_grp_reg.ch_reg_tp[i].thresh;
char tc_name[10];
- snprintf(tc_name, sizeof(tc_name), "channel-%d", i);
+ scnprintf(tc_name, sizeof(tc_name), "channel-%d", i);
struct fwnode_handle *tc_node __free(fwnode_handle) =
fwnode_get_named_child_node(ch_node, tc_name);
diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c
index 80b917944b51..6fac31c0d99f 100644
--- a/drivers/input/misc/iqs7222.c
+++ b/drivers/input/misc/iqs7222.c
@@ -301,6 +301,7 @@ struct iqs7222_dev_desc {
int allow_offset;
int event_offset;
int comms_offset;
+ int ext_chan;
bool legacy_gesture;
struct iqs7222_reg_grp_desc reg_grps[IQS7222_NUM_REG_GRPS];
};
@@ -315,6 +316,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
.allow_offset = 9,
.event_offset = 10,
.comms_offset = 12,
+ .ext_chan = 10,
.reg_grps = {
[IQS7222_REG_GRP_STAT] = {
.base = IQS7222_SYS_STATUS,
@@ -373,6 +375,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
.allow_offset = 9,
.event_offset = 10,
.comms_offset = 12,
+ .ext_chan = 10,
.legacy_gesture = true,
.reg_grps = {
[IQS7222_REG_GRP_STAT] = {
@@ -2244,7 +2247,7 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222,
const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc;
struct i2c_client *client = iqs7222->client;
int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row;
- int ext_chan = rounddown(num_chan, 10);
+ int ext_chan = dev_desc->ext_chan ? : num_chan;
int error, i;
u16 *chan_setup = iqs7222->chan_setup[chan_index];
u16 *sys_setup = iqs7222->sys_setup;
@@ -2445,7 +2448,7 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222,
const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc;
struct i2c_client *client = iqs7222->client;
int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row;
- int ext_chan = rounddown(num_chan, 10);
+ int ext_chan = dev_desc->ext_chan ? : num_chan;
int count, error, reg_offset, i;
u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset];
u16 *sldr_setup = iqs7222->sldr_setup[sldr_index];
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index be734d65ea72..d0cb9fb94821 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1408,9 +1408,9 @@ static int alps_do_register_bare_ps2_mouse(struct alps_data *priv)
return -ENOMEM;
}
- snprintf(priv->phys3, sizeof(priv->phys3), "%s/%s",
- psmouse->ps2dev.serio->phys,
- (priv->dev2 ? "input2" : "input1"));
+ scnprintf(priv->phys3, sizeof(priv->phys3), "%s/%s",
+ psmouse->ps2dev.serio->phys,
+ (priv->dev2 ? "input2" : "input1"));
dev3->phys = priv->phys3;
/*
@@ -3103,8 +3103,8 @@ int alps_init(struct psmouse *psmouse)
goto init_fail;
}
- snprintf(priv->phys2, sizeof(priv->phys2), "%s/input1",
- psmouse->ps2dev.serio->phys);
+ scnprintf(priv->phys2, sizeof(priv->phys2), "%s/input1",
+ psmouse->ps2dev.serio->phys);
dev2->phys = priv->phys2;
/*
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index 7147dacc404f..283ef46f039f 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -279,8 +279,8 @@ static int lifebook_create_relative_device(struct psmouse *psmouse)
goto err_out;
priv->dev2 = dev2;
- snprintf(priv->phys, sizeof(priv->phys),
- "%s/input1", psmouse->ps2dev.serio->phys);
+ scnprintf(priv->phys, sizeof(priv->phys),
+ "%s/input1", psmouse->ps2dev.serio->phys);
dev2->phys = priv->phys;
dev2->name = "LBPS/2 Fujitsu Lifebook Touchpad";
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index a2c9f7144864..77ea7da3b1c5 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1600,7 +1600,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
psmouse_pre_receive_byte, psmouse_receive_byte);
INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync);
psmouse->dev = input_dev;
- snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys);
+ scnprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys);
psmouse_set_state(psmouse, PSMOUSE_INITIALIZING);
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 91a2b584dab1..196905162945 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -105,7 +105,6 @@ config TOUCHSCREEN_ADC
config TOUCHSCREEN_APPLE_Z2
tristate "Apple Z2 touchscreens"
- default ARCH_APPLE
depends on SPI && (ARCH_APPLE || COMPILE_TEST)
help
Say Y here if you have an ARM Apple device with
diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c
index a6946e3d8376..869884219908 100644
--- a/drivers/input/touchscreen/melfas_mip4.c
+++ b/drivers/input/touchscreen/melfas_mip4.c
@@ -1554,7 +1554,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mip4_pm_ops, mip4_suspend, mip4_resume);
#ifdef CONFIG_OF
static const struct of_device_id mip4_of_match[] = {
- { .compatible = "melfas,"MIP4_DEVICE_NAME, },
+ { .compatible = "melfas,mip4_ts", },
{ },
};
MODULE_DEVICE_TABLE(of, mip4_of_match);
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 1a41e59c77f8..3ebf37ddfc18 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -20,7 +20,7 @@
#include "internal.h"
-#define ICC_DYN_ID_START 10000
+#define ICC_DYN_ID_START 100000
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -819,6 +819,9 @@ static struct icc_node *icc_node_create_nolock(int id)
{
struct icc_node *node;
+ if (id >= ICC_DYN_ID_START)
+ return ERR_PTR(-EINVAL);
+
/* check if node already exists */
node = node_find(id);
if (node)
@@ -906,11 +909,36 @@ void icc_node_destroy(int id)
return;
kfree(node->links);
+ if (node->id >= ICC_DYN_ID_START)
+ kfree(node->name);
kfree(node);
}
EXPORT_SYMBOL_GPL(icc_node_destroy);
/**
+ * icc_node_set_name() - set node name
+ * @node: node
+ * @provider: node provider
+ * @name: node name
+ *
+ * Return: 0 on success, or -ENOMEM on allocation failure
+ */
+int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name)
+{
+ if (node->id >= ICC_DYN_ID_START) {
+ node->name = kasprintf(GFP_KERNEL, "%s@%s", name,
+ dev_name(provider->dev));
+ if (!node->name)
+ return -ENOMEM;
+ } else {
+ node->name = name;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(icc_node_set_name);
+
+/**
* icc_link_nodes() - create link between two nodes
* @src_node: source node
* @dst_node: destination node
@@ -1038,10 +1066,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
node->avg_bw = node->init_avg;
node->peak_bw = node->init_peak;
- if (node->id >= ICC_DYN_ID_START)
- node->name = devm_kasprintf(provider->dev, GFP_KERNEL, "%s@%s",
- node->name, dev_name(provider->dev));
-
if (node->avg_bw || node->peak_bw) {
if (provider->pre_aggregate)
provider->pre_aggregate(node);
diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
index 88f311c11020..93c030608d3e 100644
--- a/drivers/interconnect/icc-clk.c
+++ b/drivers/interconnect/icc-clk.c
@@ -117,6 +117,7 @@ struct icc_provider *icc_clk_register(struct device *dev,
node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_master", data[i].name);
if (!node->name) {
+ icc_node_destroy(node->id);
ret = -ENOMEM;
goto err;
}
@@ -135,6 +136,7 @@ struct icc_provider *icc_clk_register(struct device *dev,
node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_slave", data[i].name);
if (!node->name) {
+ icc_node_destroy(node->id);
ret = -ENOMEM;
goto err;
}
diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index 41bfc6e7ee1d..001404e91041 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -293,7 +293,12 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
goto err_remove_nodes;
}
- node->name = qn->name;
+ ret = icc_node_set_name(node, provider, qn->name);
+ if (ret) {
+ icc_node_destroy(node->id);
+ goto err_remove_nodes;
+ }
+
node->data = qn;
icc_node_add(node, provider);
diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index baecbf2533f7..b33f00da1880 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -236,7 +236,12 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
goto err;
}
- node->name = qnodes[i]->name;
+ ret = icc_node_set_name(node, provider, qnodes[i]->name);
+ if (ret) {
+ icc_node_destroy(node->id);
+ goto err;
+ }
+
/* Cast away const and add it back in qcom_osm_l3_set() */
node->data = (void *)qnodes[i];
icc_node_add(node, provider);
diff --git a/drivers/interconnect/qcom/sc7280.c b/drivers/interconnect/qcom/sc7280.c
index 346f18d70e9e..905403a3a930 100644
--- a/drivers/interconnect/qcom/sc7280.c
+++ b/drivers/interconnect/qcom/sc7280.c
@@ -238,6 +238,7 @@ static struct qcom_icc_node xm_pcie3_1 = {
.id = SC7280_MASTER_PCIE_1,
.channels = 1,
.buswidth = 8,
+ .num_links = 1,
.links = { SC7280_SLAVE_ANOC_PCIE_GEM_NOC },
};
diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c
index 9e041365d909..8e8f56186a36 100644
--- a/drivers/interconnect/samsung/exynos.c
+++ b/drivers/interconnect/samsung/exynos.c
@@ -134,6 +134,11 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
priv->node = icc_node;
icc_node->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn",
bus_dev->of_node);
+ if (!icc_node->name) {
+ icc_node_destroy(pdev->id);
+ return -ENOMEM;
+ }
+
if (of_property_read_u32(bus_dev->of_node, "samsung,data-clock-ratio",
&priv->bus_clk_ratio))
priv->bus_clk_ratio = EXYNOS_ICC_DEFAULT_BUS_CLK_RATIO;
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 761ab647f372..0961ac805944 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -193,15 +193,13 @@ struct hyperv_root_ir_data {
static void
hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
{
- u64 status;
- u32 vector;
- struct irq_cfg *cfg;
- int ioapic_id;
- const struct cpumask *affinity;
- int cpu;
- struct hv_interrupt_entry entry;
struct hyperv_root_ir_data *data = irq_data->chip_data;
+ struct hv_interrupt_entry entry;
+ const struct cpumask *affinity;
struct IO_APIC_route_entry e;
+ struct irq_cfg *cfg;
+ int cpu, ioapic_id;
+ u32 vector;
cfg = irqd_cfg(irq_data);
affinity = irq_data_get_effective_affinity_mask(irq_data);
@@ -214,23 +212,16 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
&& data->entry.ioapic_rte.as_uint64) {
entry = data->entry;
- status = hv_unmap_ioapic_interrupt(ioapic_id, &entry);
-
- if (status != HV_STATUS_SUCCESS)
- hv_status_debug(status, "failed to unmap\n");
+ (void)hv_unmap_ioapic_interrupt(ioapic_id, &entry);
data->entry.ioapic_rte.as_uint64 = 0;
data->entry.source = 0; /* Invalid source */
}
- status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu,
- vector, &entry);
-
- if (status != HV_STATUS_SUCCESS) {
- hv_status_err(status, "map failed\n");
+ if (hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu,
+ vector, &entry))
return;
- }
data->entry = entry;
@@ -322,10 +313,10 @@ static void hyperv_root_irq_remapping_free(struct irq_domain *domain,
data = irq_data->chip_data;
e = &data->entry;
- if (e->source == HV_DEVICE_TYPE_IOAPIC
- && e->ioapic_rte.as_uint64)
- hv_unmap_ioapic_interrupt(data->ioapic_id,
- &data->entry);
+ if (e->source == HV_DEVICE_TYPE_IOAPIC &&
+ e->ioapic_rte.as_uint64)
+ (void)hv_unmap_ioapic_interrupt(data->ioapic_id,
+ &data->entry);
kfree(data);
}
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index fc35cba59145..47692cbfaabd 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
}
/* Assign a cache tag with specified type to domain. */
-static int cache_tag_assign(struct dmar_domain *domain, u16 did,
- struct device *dev, ioasid_t pasid,
- enum cache_tag_type type)
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7aa3932251b2..148b944143b8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev)
!pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
info->pasid_enabled = 1;
- if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev))
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
iommu_enable_pci_ats(info);
+ /* Assign a DEVTLB cache tag to the default domain. */
+ if (info->ats_enabled && info->domain) {
+ u16 did = domain_id_iommu(info->domain, iommu);
+
+ if (cache_tag_assign(info->domain, did, dev,
+ IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
+ iommu_disable_pci_ats(info);
+ }
+ }
iommu_enable_pci_pri(info);
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 3ddbcc603de2..2d1afab5eedc 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1289,6 +1289,8 @@ struct cache_tag {
unsigned int users;
};
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type);
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 22f74ba33a0e..e6bb3c784017 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev,
return -ENOMEM;
data->iommu = platform_get_drvdata(iommu_dev);
- data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
+ iommu->domain = &rk_identity_domain;
+
platform_set_drvdata(pdev, iommu);
iommu->dev = dev;
iommu->num_mmu = 0;
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 61897d50162d..e58fe9d8b9e7 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -559,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
{
unsigned int pd_index = iova_pd_index(iova);
struct tegra_smmu *smmu = as->smmu;
- struct tegra_pd *pd = as->pd;
+ u32 *pd = &as->pd->val[pd_index];
unsigned long offset = pd_index * sizeof(*pd);
/* Set the page directory entry first */
- pd->val[pd_index] = value;
+ *pd = value;
/* The flush the page directory entry from caches */
dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset,
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 0d196e447142..c3928ef79344 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -74,6 +74,7 @@ config ARM_VIC_NR
config IRQ_MSI_LIB
bool
+ select GENERIC_MSI_IRQ
config ARMADA_370_XP_IRQ
bool
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index 268cc18b781f..258b8e9a2d57 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -15,6 +15,8 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <asm/time.h>
+
#define AR71XX_RESET_REG_MISC_INT_STATUS 0
#define AR71XX_RESET_REG_MISC_INT_ENABLE 4
@@ -177,21 +179,3 @@ static int __init ar7240_misc_intc_of_init(
IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
ar7240_misc_intc_of_init);
-
-void __init ath79_misc_irq_init(void __iomem *regs, int irq,
- int irq_base, bool is_ar71xx)
-{
- struct irq_domain *domain;
-
- if (is_ar71xx)
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- else
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
-
- domain = irq_domain_create_legacy(NULL, ATH79_MISC_IRQ_COUNT,
- irq_base, 0, &misc_irq_domain_ops, regs);
- if (!domain)
- panic("Failed to create MISC irqdomain");
-
- ath79_misc_intc_domain_init(domain, irq);
-}
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index d4697e79d5a3..b2d10063d35f 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -5,7 +5,6 @@ config BCACHE
select BLOCK_HOLDER_DEPRECATED if SYSFS
select CRC64
select CLOSURES
- select MIN_HEAP
help
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8998e61efa40..48ce750bf70a 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -164,61 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
* prio is worth 1/8th of what INITIAL_PRIO is worth.
*/
-static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
-{
- unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
-
- return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
-}
-
-static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
-{
- struct bucket **lhs = (struct bucket **)l;
- struct bucket **rhs = (struct bucket **)r;
- struct cache *ca = args;
-
- return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
-}
-
-static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
-{
- struct bucket **lhs = (struct bucket **)l;
- struct bucket **rhs = (struct bucket **)r;
- struct cache *ca = args;
+#define bucket_prio(b) \
+({ \
+ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
+ \
+ (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
+})
- return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
-}
+#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
+#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
static void invalidate_buckets_lru(struct cache *ca)
{
struct bucket *b;
- const struct min_heap_callbacks bucket_max_cmp_callback = {
- .less = new_bucket_max_cmp,
- .swp = NULL,
- };
- const struct min_heap_callbacks bucket_min_cmp_callback = {
- .less = new_bucket_min_cmp,
- .swp = NULL,
- };
+ ssize_t i;
- ca->heap.nr = 0;
+ ca->heap.used = 0;
for_each_bucket(b, ca) {
if (!bch_can_invalidate_bucket(ca, b))
continue;
- if (!min_heap_full(&ca->heap))
- min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
- else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
+ if (!heap_full(&ca->heap))
+ heap_add(&ca->heap, b, bucket_max_cmp);
+ else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
ca->heap.data[0] = b;
- min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
+ heap_sift(&ca->heap, 0, bucket_max_cmp);
}
}
- min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
+ for (i = ca->heap.used / 2 - 1; i >= 0; --i)
+ heap_sift(&ca->heap, i, bucket_min_cmp);
while (!fifo_full(&ca->free_inc)) {
- if (!ca->heap.nr) {
+ if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
/*
* We don't want to be calling invalidate_buckets()
* multiple times when it can't do anything
@@ -227,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca)
wake_up_gc(ca->set);
return;
}
- b = min_heap_peek(&ca->heap)[0];
- min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
bch_invalidate_one_bucket(ca, b);
}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 785b0d9008fa..1d33e40d26ea 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -458,7 +458,7 @@ struct cache {
/* Allocation stuff: */
struct bucket *buckets;
- DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap;
+ DECLARE_HEAP(struct bucket *, heap);
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 68258a16e125..463eb13bd0b2 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
unsigned int ret = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
if (b->ops->is_extents)
for_each_key(b, k, &iter)
ret += KEY_SIZE(k);
@@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
{
va_list args;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
const char *err;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
for_each_key(b, k, &iter) {
if (b->ops->is_extents) {
err = "Keys out of order";
@@ -114,9 +110,9 @@ bug:
static void bch_btree_iter_next_check(struct btree_iter *iter)
{
- struct bkey *k = iter->heap.data->k, *next = bkey_next(k);
+ struct bkey *k = iter->data->k, *next = bkey_next(k);
- if (next < iter->heap.data->end &&
+ if (next < iter->data->end &&
bkey_cmp(k, iter->b->ops->is_extents ?
&START_KEY(next) : next) > 0) {
bch_dump_bucket(iter->b);
@@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey preceding_key_on_stack = ZERO_KEY;
struct bkey *preceding_key_p = &preceding_key_on_stack;
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
/*
* If k has preceding key, preceding_key_p will be set to address
* of k's preceding key; otherwise preceding_key_p will be set
@@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
else
preceding_key(k, &preceding_key_p);
- m = bch_btree_iter_init(b, &iter, preceding_key_p);
+ m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
- if (b->ops->insert_fixup(b, k, &iter, replace_key))
+ if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
return status;
status = BTREE_INSERT_STATUS_INSERT;
@@ -1083,94 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
/* Btree iterator */
-typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *);
+typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
+ struct btree_iter_set);
-static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args)
+static inline bool btree_iter_cmp(struct btree_iter_set l,
+ struct btree_iter_set r)
{
- const struct btree_iter_set *_l = l;
- const struct btree_iter_set *_r = r;
-
- return bkey_cmp(_l->k, _r->k) <= 0;
+ return bkey_cmp(l.k, r.k) > 0;
}
static inline bool btree_iter_end(struct btree_iter *iter)
{
- return !iter->heap.nr;
+ return !iter->used;
}
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end)
{
- const struct min_heap_callbacks callbacks = {
- .less = new_btree_iter_cmp,
- .swp = NULL,
- };
-
if (k != end)
- BUG_ON(!min_heap_push(&iter->heap,
- &((struct btree_iter_set) { k, end }),
- &callbacks,
- NULL));
+ BUG_ON(!heap_add(iter,
+ ((struct btree_iter_set) { k, end }),
+ btree_iter_cmp));
}
-static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search,
- struct bset_tree *start)
+static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search,
+ struct bset_tree *start)
{
struct bkey *ret = NULL;
- iter->heap.size = ARRAY_SIZE(iter->heap.preallocated);
- iter->heap.nr = 0;
+ iter->iter.size = ARRAY_SIZE(iter->stack_data);
+ iter->iter.used = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter->b = b;
+ iter->iter.b = b;
#endif
for (; start <= bset_tree_last(b); start++) {
ret = bch_bset_search(b, start, search);
- bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
+ bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
}
return ret;
}
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
struct bkey *search)
{
- return __bch_btree_iter_init(b, iter, search, b->set);
+ return __bch_btree_iter_stack_init(b, iter, search, b->set);
}
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
- new_btree_iter_cmp_fn *cmp)
+ btree_iter_cmp_fn *cmp)
{
struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
- const struct min_heap_callbacks callbacks = {
- .less = cmp,
- .swp = NULL,
- };
if (!btree_iter_end(iter)) {
bch_btree_iter_next_check(iter);
- ret = iter->heap.data->k;
- iter->heap.data->k = bkey_next(iter->heap.data->k);
+ ret = iter->data->k;
+ iter->data->k = bkey_next(iter->data->k);
- if (iter->heap.data->k > iter->heap.data->end) {
+ if (iter->data->k > iter->data->end) {
WARN_ONCE(1, "bset was corrupt!\n");
- iter->heap.data->k = iter->heap.data->end;
+ iter->data->k = iter->data->end;
}
- if (iter->heap.data->k == iter->heap.data->end) {
- if (iter->heap.nr) {
- b = min_heap_peek(&iter->heap)[0];
- min_heap_pop(&iter->heap, &callbacks, NULL);
- }
- }
+ if (iter->data->k == iter->data->end)
+ heap_pop(iter, b, cmp);
else
- min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
+ heap_sift(iter, 0, cmp);
}
return ret;
@@ -1178,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
struct bkey *bch_btree_iter_next(struct btree_iter *iter)
{
- return __bch_btree_iter_next(iter, new_btree_iter_cmp);
+ return __bch_btree_iter_next(iter, btree_iter_cmp);
}
@@ -1216,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
struct btree_iter *iter,
bool fixup, bool remove_stale)
{
+ int i;
struct bkey *k, *last = NULL;
BKEY_PADDED(k) tmp;
bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
? bch_ptr_bad
: bch_ptr_invalid;
- const struct min_heap_callbacks callbacks = {
- .less = b->ops->sort_cmp,
- .swp = NULL,
- };
/* Heapify the iterator, using our comparison function */
- min_heapify_all(&iter->heap, &callbacks, NULL);
+ for (i = iter->used / 2 - 1; i >= 0; --i)
+ heap_sift(iter, i, b->ops->sort_cmp);
while (!btree_iter_end(iter)) {
if (b->ops->sort_fixup && fixup)
@@ -1316,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
int oldsize = bch_count_data(b);
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
- __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
+ __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
if (start) {
unsigned int i;
@@ -1331,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
order = get_order(__set_bytes(b->set->data, keys));
}
- __btree_sort(b, &iter, start, order, false, state);
+ __btree_sort(b, &iter.iter, start, order, false, state);
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
@@ -1347,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
- struct btree_iter iter;
-
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ struct btree_iter_stack iter;
- bch_btree_iter_init(b, &iter, NULL);
+ bch_btree_iter_stack_init(b, &iter, NULL);
- btree_mergesort(b, new->set->data, &iter, false, true);
+ btree_mergesort(b, new->set->data, &iter.iter, false, true);
bch_time_stats_update(&state->time, start_time);
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index f79441acd4c1..011f6062c4c0 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -187,9 +187,8 @@ struct bset_tree {
};
struct btree_keys_ops {
- bool (*sort_cmp)(const void *l,
- const void *r,
- void *args);
+ bool (*sort_cmp)(struct btree_iter_set l,
+ struct btree_iter_set r);
struct bkey *(*sort_fixup)(struct btree_iter *iter,
struct bkey *tmp);
bool (*insert_fixup)(struct btree_keys *b,
@@ -313,17 +312,23 @@ enum {
BTREE_INSERT_STATUS_FRONT_MERGE,
};
-struct btree_iter_set {
- struct bkey *k, *end;
-};
-
/* Btree key iteration */
struct btree_iter {
+ size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree_keys *b;
#endif
- MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap;
+ struct btree_iter_set {
+ struct bkey *k, *end;
+ } data[];
+};
+
+/* Fixed-size btree_iter that can be allocated on the stack */
+
+struct btree_iter_stack {
+ struct btree_iter iter;
+ struct btree_iter_set stack_data[MAX_BSETS];
};
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
@@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
struct bkey *end);
-struct bkey *bch_btree_iter_init(struct btree_keys *b,
- struct btree_iter *iter,
- struct bkey *search);
+struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
+ struct btree_iter_stack *iter,
+ struct bkey *search);
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
const struct bkey *search);
@@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
return search ? __bch_bset_search(b, t, search) : t->data->start;
}
-#define for_each_key_filter(b, k, iter, filter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+#define for_each_key_filter(b, k, stack_iter, filter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
+ filter));)
-#define for_each_key(b, k, iter) \
- for (bch_btree_iter_init((b), (iter), NULL); \
- ((k) = bch_btree_iter_next(iter));)
+#define for_each_key(b, k, stack_iter) \
+ for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
+ ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
/* Sorting */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 1d0100677357..210b59007d98 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b)
{
const char *err = "bad btree header";
struct bset *i = btree_bset_first(b);
- struct btree_iter iter;
+ struct btree_iter *iter;
/*
* c->fill_iter can allocate an iterator with more memory space
* than static MAX_BSETS.
* See the comment arount cache_set->fill_iter.
*/
- iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
- iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
- iter.heap.nr = 0;
+ iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
+ iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
+ iter->used = 0;
#ifdef CONFIG_BCACHE_DEBUG
- iter.b = &b->keys;
+ iter->b = &b->keys;
#endif
if (!i->seq)
@@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b)
if (i != b->keys.set[0].data && !i->keys)
goto err;
- bch_btree_iter_push(&iter, i->start, bset_bkey_last(i));
+ bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
b->written += set_blocks(i, block_bytes(b->c->cache));
}
@@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b)
if (i->seq == b->keys.set[0].data->seq)
goto err;
- bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort);
+ bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
i = b->keys.set[0].data;
err = "short btree key";
@@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b)
bch_bset_init_next(&b->keys, write_block(b),
bset_magic(&b->c->cache->sb));
out:
- mempool_free(iter.heap.data, &b->c->fill_iter);
+ mempool_free(iter, &b->c->fill_iter);
return;
err:
set_btree_node_io_error(b);
@@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
uint8_t stale = 0;
unsigned int keys = 0, good_keys = 0;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bset_tree *t;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
gc->nodes++;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
@@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
unsigned int ret = 0;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
ret += bkey_u64s(k);
@@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
int ret = 0;
bool should_rewrite;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct gc_merge_info r[GC_MERGE_NODES];
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
- bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
+ bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
for (i = r; i < r + ARRAY_SIZE(r); i++)
i->b = ERR_PTR(-EINTR);
while (1) {
- k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad);
if (k) {
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
true, b);
@@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
{
int ret = 0;
struct bkey *k, *p = NULL;
- struct btree_iter iter;
-
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ struct btree_iter_stack iter;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(b->c, b->level, k);
@@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
bch_initial_mark_key(b->c, b->level + 1, &b->key);
if (b->level) {
- bch_btree_iter_init(&b->keys, &iter, NULL);
+ bch_btree_iter_stack_init(&b->keys, &iter, NULL);
do {
- k = bch_btree_iter_next_filter(&iter, &b->keys,
+ k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad);
if (k) {
btree_node_prefetch(b, k);
@@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg)
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
@@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg)
cur_idx = prev_idx = 0;
ret = 0;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
/* root node keys are checked before thread created */
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c)
int ret = 0;
int i;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct btree_check_state check_state;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
/* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(c, c->root->level, k);
@@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
if (b->level) {
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
bch_ptr_bad))) {
ret = bcache_btree(map_nodes_recurse, k, b,
op, from, fn, flags);
@@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
{
int ret = MAP_CONTINUE;
struct bkey *k;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
- bch_btree_iter_init(&b->keys, &iter, from);
+ bch_btree_iter_stack_init(&b->keys, &iter, from);
- while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
+ while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
+ bch_ptr_bad))) {
ret = !b->level
? fn(op, b, k)
: bcache_btree(map_keys_recurse, k,
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 4b84fda1530a..d626ffcbecb9 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter,
i->k = bkey_next(i->k);
if (i->k == i->end)
- *i = iter->heap.data[--iter->heap.nr];
+ *i = iter->data[--iter->used];
}
-static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args)
+static bool bch_key_sort_cmp(struct btree_iter_set l,
+ struct btree_iter_set r)
{
- struct btree_iter_set *_l = (struct btree_iter_set *)l;
- struct btree_iter_set *_r = (struct btree_iter_set *)r;
- int64_t c = bkey_cmp(_l->k, _r->k);
+ int64_t c = bkey_cmp(l.k, r.k);
- return !(c ? c > 0 : _l->k < _r->k);
+ return c ? c > 0 : l.k < r.k;
}
static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
@@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
}
const struct btree_keys_ops bch_btree_keys_ops = {
- .sort_cmp = new_bch_key_sort_cmp,
+ .sort_cmp = bch_key_sort_cmp,
.insert_fixup = bch_btree_ptr_insert_fixup,
.key_invalid = bch_btree_ptr_invalid,
.key_bad = bch_btree_ptr_bad,
@@ -256,28 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = {
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
-
-static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args)
+static bool bch_extent_sort_cmp(struct btree_iter_set l,
+ struct btree_iter_set r)
{
- struct btree_iter_set *_l = (struct btree_iter_set *)l;
- struct btree_iter_set *_r = (struct btree_iter_set *)r;
- int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k));
+ int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
- return !(c ? c > 0 : _l->k < _r->k);
+ return c ? c > 0 : l.k < r.k;
}
static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
struct bkey *tmp)
{
- const struct min_heap_callbacks callbacks = {
- .less = new_bch_extent_sort_cmp,
- .swp = NULL,
- };
- while (iter->heap.nr > 1) {
- struct btree_iter_set *top = iter->heap.data, *i = top + 1;
-
- if (iter->heap.nr > 2 &&
- !new_bch_extent_sort_cmp(&i[0], &i[1], NULL))
+ while (iter->used > 1) {
+ struct btree_iter_set *top = iter->data, *i = top + 1;
+
+ if (iter->used > 2 &&
+ bch_extent_sort_cmp(i[0], i[1]))
i++;
if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
@@ -285,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
if (!KEY_SIZE(i->k)) {
sort_key_next(iter, i);
- min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
+ heap_sift(iter, i - top, bch_extent_sort_cmp);
continue;
}
@@ -295,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
else
bch_cut_front(top->k, i->k);
- min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
+ heap_sift(iter, i - top, bch_extent_sort_cmp);
} else {
/* can't happen because of comparison func */
BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
@@ -305,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
bch_cut_back(&START_KEY(i->k), tmp);
bch_cut_front(i->k, top->k);
- min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
+ heap_sift(iter, 0, bch_extent_sort_cmp);
return tmp;
} else {
@@ -625,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk,
}
const struct btree_keys_ops bch_extent_keys_ops = {
- .sort_cmp = new_bch_extent_sort_cmp,
+ .sort_cmp = bch_extent_sort_cmp,
.sort_fixup = bch_extent_sort_fixup,
.insert_fixup = bch_extent_insert_fixup,
.key_invalid = bch_extent_invalid,
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 45ca134cbf02..26a6a535ec32 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -182,19 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private))
closure_sync(&cl);
}
-static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args)
+static bool bucket_cmp(struct bucket *l, struct bucket *r)
{
- struct bucket **_l = (struct bucket **)l;
- struct bucket **_r = (struct bucket **)r;
-
- return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r);
+ return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
}
static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
- return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0;
+ return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
}
void bch_moving_gc(struct cache_set *c)
@@ -202,10 +199,6 @@ void bch_moving_gc(struct cache_set *c)
struct cache *ca = c->cache;
struct bucket *b;
unsigned long sectors_to_move, reserve_sectors;
- const struct min_heap_callbacks callbacks = {
- .less = new_bucket_cmp,
- .swp = NULL,
- };
if (!c->copy_gc_enabled)
return;
@@ -216,7 +209,7 @@ void bch_moving_gc(struct cache_set *c)
reserve_sectors = ca->sb.bucket_size *
fifo_used(&ca->free[RESERVE_MOVINGGC]);
- ca->heap.nr = 0;
+ ca->heap.used = 0;
for_each_bucket(b, ca) {
if (GC_MARK(b) == GC_MARK_METADATA ||
@@ -225,31 +218,25 @@ void bch_moving_gc(struct cache_set *c)
atomic_read(&b->pin))
continue;
- if (!min_heap_full(&ca->heap)) {
+ if (!heap_full(&ca->heap)) {
sectors_to_move += GC_SECTORS_USED(b);
- min_heap_push(&ca->heap, &b, &callbacks, NULL);
- } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) {
+ heap_add(&ca->heap, b, bucket_cmp);
+ } else if (bucket_cmp(b, heap_peek(&ca->heap))) {
sectors_to_move -= bucket_heap_top(ca);
sectors_to_move += GC_SECTORS_USED(b);
ca->heap.data[0] = b;
- min_heap_sift_down(&ca->heap, 0, &callbacks, NULL);
+ heap_sift(&ca->heap, 0, bucket_cmp);
}
}
while (sectors_to_move > reserve_sectors) {
- if (ca->heap.nr) {
- b = min_heap_peek(&ca->heap)[0];
- min_heap_pop(&ca->heap, &callbacks, NULL);
- }
+ heap_pop(&ca->heap, b, bucket_cmp);
sectors_to_move -= GC_SECTORS_USED(b);
}
- while (ca->heap.nr) {
- b = min_heap_peek(&ca->heap)[0];
- min_heap_pop(&ca->heap, &callbacks, NULL);
+ while (heap_pop(&ca->heap, b, bucket_cmp))
SET_GC_MOVE(b, 1);
- }
mutex_unlock(&c->bucket_lock);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1efb768b2890..2ea490b9d370 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ iter_size = sizeof(struct btree_iter) +
+ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index e8f696cb58c0..826b14cae4e5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
- struct btree_iter iter;
-
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
+ struct btree_iter_stack iter;
goto lock_root;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 539454d8e2d0..f61ab1bada6c 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/sched/clock.h>
#include <linux/llist.h>
-#include <linux/min_heap.h>
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -31,10 +30,16 @@ struct closure;
#endif
+#define DECLARE_HEAP(type, name) \
+ struct { \
+ size_t size, used; \
+ type *data; \
+ } name
+
#define init_heap(heap, _size, gfp) \
({ \
size_t _bytes; \
- (heap)->nr = 0; \
+ (heap)->used = 0; \
(heap)->size = (_size); \
_bytes = (heap)->size * sizeof(*(heap)->data); \
(heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \
@@ -47,6 +52,64 @@ do { \
(heap)->data = NULL; \
} while (0)
+#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
+
+#define heap_sift(h, i, cmp) \
+do { \
+ size_t _r, _j = i; \
+ \
+ for (; _j * 2 + 1 < (h)->used; _j = _r) { \
+ _r = _j * 2 + 1; \
+ if (_r + 1 < (h)->used && \
+ cmp((h)->data[_r], (h)->data[_r + 1])) \
+ _r++; \
+ \
+ if (cmp((h)->data[_r], (h)->data[_j])) \
+ break; \
+ heap_swap(h, _r, _j); \
+ } \
+} while (0)
+
+#define heap_sift_down(h, i, cmp) \
+do { \
+ while (i) { \
+ size_t p = (i - 1) / 2; \
+ if (cmp((h)->data[i], (h)->data[p])) \
+ break; \
+ heap_swap(h, i, p); \
+ i = p; \
+ } \
+} while (0)
+
+#define heap_add(h, d, cmp) \
+({ \
+ bool _r = !heap_full(h); \
+ if (_r) { \
+ size_t _i = (h)->used++; \
+ (h)->data[_i] = d; \
+ \
+ heap_sift_down(h, _i, cmp); \
+ heap_sift(h, _i, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_pop(h, d, cmp) \
+({ \
+ bool _r = (h)->used; \
+ if (_r) { \
+ (d) = (h)->data[0]; \
+ (h)->used--; \
+ heap_swap(h, 0, (h)->used); \
+ heap_sift(h, 0, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL)
+
+#define heap_full(h) ((h)->used == (h)->size)
+
#define DECLARE_FIFO(type, name) \
struct { \
size_t front, back, size, mask; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 453efbbdc8ee..302e75f1fc4b 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg)
struct dirty_init_thrd_info *info = arg;
struct bch_dirty_init_state *state = info->state;
struct cache_set *c = state->c;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
prev_idx = 0;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
- bch_btree_iter_init(&c->root->keys, &iter, NULL);
- k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
@@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
- k = bch_btree_iter_next_filter(&iter,
+ k = bch_btree_iter_next_filter(&iter.iter,
&c->root->keys,
bch_ptr_bad);
if (k)
@@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
int i;
struct btree *b = NULL;
struct bkey *k = NULL;
- struct btree_iter iter;
+ struct btree_iter_stack iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
- min_heap_init(&iter.heap, NULL, MAX_BSETS);
-
retry_lock:
b = c->root;
rw_lock(0, b, b->level);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index ec84ba5e93e5..ff7595caf440 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -2742,7 +2742,11 @@ static unsigned long __evict_a_few(unsigned long nr_buffers)
__make_buffer_clean(b);
__free_buffer_wake(b);
- cond_resched();
+ if (need_resched()) {
+ dm_bufio_unlock(c);
+ cond_resched();
+ dm_bufio_lock(c);
+ }
}
dm_bufio_unlock(c);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9dfdb63220d7..17157c4216a5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -517,7 +517,10 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
{
struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
SHASH_DESC_ON_STACK(desc, lmk->hash_tfm);
- struct md5_state md5state;
+ union {
+ struct md5_state md5state;
+ u8 state[CRYPTO_MD5_STATESIZE];
+ } u;
__le32 buf[4];
int i, r;
@@ -548,13 +551,13 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
return r;
/* No MD5 padding here */
- r = crypto_shash_export(desc, &md5state);
+ r = crypto_shash_export(desc, &u.md5state);
if (r)
return r;
for (i = 0; i < MD5_HASH_WORDS; i++)
- __cpu_to_le32s(&md5state.hash[i]);
- memcpy(iv, &md5state.hash, cc->iv_size);
+ __cpu_to_le32s(&u.md5state.hash[i]);
+ memcpy(iv, &u.md5state.hash, cc->iv_size);
return 0;
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index d296770478b2..e8c0a8c6fb51 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2407,7 +2407,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
*/
sb_retrieve_failed_devices(sb, failed_devices);
rdev_for_each(r, mddev) {
- if (test_bit(Journal, &rdev->flags) ||
+ if (test_bit(Journal, &r->flags) ||
!r->sb_page)
continue;
sb2 = page_address(r->sb_page);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bd694910b01b..7f524a26cebc 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2366,8 +2366,7 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
if (!bitmap)
return -ENOENT;
- if (!bitmap->mddev->bitmap_info.external &&
- !bitmap->storage.sb_page)
+ if (!bitmap->storage.sb_page)
return -EINVAL;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 19c5a0ce5a40..64b8176907a9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1399,7 +1399,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
}
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
&mddev->bio_set);
-
+ read_bio->bi_opf &= ~REQ_NOWAIT;
r1_bio->bios[rdisk] = read_bio;
read_bio->bi_iter.bi_sector = r1_bio->sector +
@@ -1649,6 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
wait_for_serialization(rdev, r1_bio);
}
+ mbio->bi_opf &= ~REQ_NOWAIT;
r1_bio->bios[i] = mbio;
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
@@ -3428,6 +3429,7 @@ static int raid1_reshape(struct mddev *mddev)
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
conf->r1bio_pool = newpool;
+ init_waitqueue_head(&conf->r1bio_pool.wait);
for (d = d2 = 0; d < conf->raid_disks; d++) {
struct md_rdev *rdev = conf->mirrors[d].rdev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b74780af4c22..c9bd2005bfd0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1182,8 +1182,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
}
- if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
+ if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
+ raid_end_bio_io(r10_bio);
return;
+ }
+
rdev = read_balance(conf, r10_bio, &max_sectors);
if (!rdev) {
if (err_rdev) {
@@ -1221,6 +1224,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
+ read_bio->bi_opf &= ~REQ_NOWAIT;
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
@@ -1256,6 +1260,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
conf->mirrors[devnum].rdev;
mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
+ mbio->bi_opf &= ~REQ_NOWAIT;
if (replacement)
r10_bio->devs[n_copy].repl_bio = mbio;
else
@@ -1370,8 +1375,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
}
sectors = r10_bio->sectors;
- if (!regular_request_wait(mddev, conf, bio, sectors))
+ if (!regular_request_wait(mddev, conf, bio, sectors)) {
+ raid_end_bio_io(r10_bio);
return;
+ }
+
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
(mddev->reshape_backwards
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 043b9ec756ff..7f3f47db4c98 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -324,7 +324,7 @@ EXPORT_SYMBOL(memstick_init_req);
static int h_memstick_read_dev_id(struct memstick_dev *card,
struct memstick_request **mrq)
{
- struct ms_id_register id_reg;
+ struct ms_id_register id_reg = {};
if (!(*mrq)) {
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg,
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 488e346047c1..77230fbe07be 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip,
unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
int data, mask, ret = -EINVAL;
int nr_irqs, irq_base = -1;
- struct device_node *node = i2c->dev.of_node;
mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
| PM8607_B0_MISC1_INT_MASK;
@@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip,
ret = -EBUSY;
goto out;
}
- irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0,
+ irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0,
&pm860x_irq_domain_ops, chip);
chip->core_irq = i2c->irq;
if (!chip->core_irq)
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 78b16c67a5fc..25377dcce60e 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
{
unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
int ret;
- struct device_node *node = chip->dev->of_node;
/* clear all interrupts */
max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1);
@@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
return -EBUSY;
}
- irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0,
- &max8925_irq_domain_ops, chip);
+ irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS,
+ chip->irq_base, 0, &max8925_irq_domain_ops,
+ chip);
/* request irq handler for pmic main irq*/
chip->core_irq = irq;
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 232c2bfe8c18..d3ab40651307 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num)
static struct irq_chip twl4030_irq_chip;
int status, i;
int irq_base, irq_end, nr_irqs;
- struct device_node *node = dev->of_node;
/*
* TWL core and pwr interrupts must be contiguous because
@@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num)
return irq_base;
}
- irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0,
+ irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0,
&irq_domain_simple_ops, NULL);
irq_end = irq_base + TWL4030_CORE_NR_IRQS;
diff --git a/drivers/misc/amd-sbi/rmi-core.c b/drivers/misc/amd-sbi/rmi-core.c
index b653a21a909e..3dec2fc00124 100644
--- a/drivers/misc/amd-sbi/rmi-core.c
+++ b/drivers/misc/amd-sbi/rmi-core.c
@@ -42,7 +42,6 @@
#define RD_MCA_CMD 0x86
/* CPUID MCAMSR mask & index */
-#define CPUID_MCA_THRD_MASK GENMASK(15, 0)
#define CPUID_MCA_THRD_INDEX 32
#define CPUID_MCA_FUNC_MASK GENMASK(31, 0)
#define CPUID_EXT_FUNC_INDEX 56
@@ -129,7 +128,7 @@ static int rmi_cpuid_read(struct sbrmi_data *data,
goto exit_unlock;
}
- thread = msg->cpu_in_out << CPUID_MCA_THRD_INDEX & CPUID_MCA_THRD_MASK;
+ thread = msg->cpu_in_out >> CPUID_MCA_THRD_INDEX;
/* Thread > 127, Thread128 CS register, 1'b1 needs to be set to 1 */
if (thread > 127) {
@@ -210,7 +209,7 @@ static int rmi_mca_msr_read(struct sbrmi_data *data,
goto exit_unlock;
}
- thread = msg->mcamsr_in_out << CPUID_MCA_THRD_INDEX & CPUID_MCA_THRD_MASK;
+ thread = msg->mcamsr_in_out >> CPUID_MCA_THRD_INDEX;
/* Thread > 127, Thread128 CS register, 1'b1 needs to be set to 1 */
if (thread > 127) {
@@ -321,6 +320,10 @@ int rmi_mailbox_xfer(struct sbrmi_data *data,
ret = regmap_read(data->regmap, SBRMI_OUTBNDMSG7, &ec);
if (ret || ec)
goto exit_clear_alert;
+
+ /* Clear the input value before updating the output data */
+ msg->mb_in_out = 0;
+
/*
* For a read operation, the initiator (BMC) reads the firmware
* response Command Data Out[31:0] from SBRMI::OutBndMsg_inst[4:1]
@@ -373,7 +376,8 @@ static int apml_rmi_reg_xfer(struct sbrmi_data *data,
mutex_unlock(&data->lock);
if (msg.rflag && !ret)
- return copy_to_user(arg, &msg, sizeof(struct apml_reg_xfer_msg));
+ if (copy_to_user(arg, &msg, sizeof(struct apml_reg_xfer_msg)))
+ return -EFAULT;
return ret;
}
@@ -391,7 +395,9 @@ static int apml_mailbox_xfer(struct sbrmi_data *data, struct apml_mbox_msg __use
if (ret && ret != -EPROTOTYPE)
return ret;
- return copy_to_user(arg, &msg, sizeof(struct apml_mbox_msg));
+ if (copy_to_user(arg, &msg, sizeof(struct apml_mbox_msg)))
+ return -EFAULT;
+ return ret;
}
static int apml_cpuid_xfer(struct sbrmi_data *data, struct apml_cpuid_msg __user *arg)
@@ -408,7 +414,9 @@ static int apml_cpuid_xfer(struct sbrmi_data *data, struct apml_cpuid_msg __user
if (ret && ret != -EPROTOTYPE)
return ret;
- return copy_to_user(arg, &msg, sizeof(struct apml_cpuid_msg));
+ if (copy_to_user(arg, &msg, sizeof(struct apml_cpuid_msg)))
+ return -EFAULT;
+ return ret;
}
static int apml_mcamsr_xfer(struct sbrmi_data *data, struct apml_mcamsr_msg __user *arg)
@@ -425,7 +433,9 @@ static int apml_mcamsr_xfer(struct sbrmi_data *data, struct apml_mcamsr_msg __us
if (ret && ret != -EPROTOTYPE)
return ret;
- return copy_to_user(arg, &msg, sizeof(struct apml_mcamsr_msg));
+ if (copy_to_user(arg, &msg, sizeof(struct apml_mcamsr_msg)))
+ return -EFAULT;
+ return ret;
}
static long sbrmi_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 7f893bafaa60..c417ed34c057 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = {
0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY),
+ /*
+ * Some SD cards reports discard support while they don't
+ */
+ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
+ MMC_QUIRK_BROKEN_SD_DISCARD),
+
END_FIXUP
};
@@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
MMC_QUIRK_TRIM_BROKEN),
- /*
- * Some SD cards reports discard support while they don't
- */
- MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
- MMC_QUIRK_BROKEN_SD_DISCARD),
-
END_FIXUP
};
diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c
index 1c31d0dfa961..de17d1611290 100644
--- a/drivers/mmc/core/sd_uhs2.c
+++ b/drivers/mmc/core/sd_uhs2.c
@@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host)
err = host->ops->uhs2_control(host, UHS2_PHY_INIT);
if (err) {
- pr_err("%s: failed to initial phy for UHS-II!\n",
- mmc_hostname(host));
+ pr_debug("%s: failed to initial phy for UHS-II!\n",
+ mmc_hostname(host));
}
return err;
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index def054ddd256..4fced9b36c80 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -503,7 +503,8 @@ void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data)
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!desc) {
- dma_unmap_sg(dma_chan->device->dev, data->sg, sg_len, dir_data);
+ dma_unmap_sg(dma_chan->device->dev, data->sg, data->sg_len,
+ dir_data);
return;
}
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 31eb90536bce..d7020e06dd55 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma,
static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data)
{
if (!(data->host_cookie & MSDC_PREPARE_FLAG)) {
- data->host_cookie |= MSDC_PREPARE_FLAG;
data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len,
mmc_get_dma_dir(data));
+ if (data->sg_count)
+ data->host_cookie |= MSDC_PREPARE_FLAG;
}
}
+static bool msdc_data_prepared(struct mmc_data *data)
+{
+ return data->host_cookie & MSDC_PREPARE_FLAG;
+}
+
static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data)
{
if (data->host_cookie & MSDC_ASYNC_FLAG)
@@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq)
WARN_ON(!host->hsq_en && host->mrq);
host->mrq = mrq;
- if (mrq->data)
+ if (mrq->data) {
msdc_prepare_data(host, mrq->data);
+ if (!msdc_data_prepared(mrq->data)) {
+ host->mrq = NULL;
+ /*
+ * Failed to prepare DMA area, fail fast before
+ * starting any commands.
+ */
+ mrq->cmd->error = -ENOSPC;
+ mmc_request_done(mmc_from_priv(host), mrq);
+ return;
+ }
+ }
/* if SBC is required, we have HW option and SW option.
* if HW option is enabled, and SBC does not have "special" flags,
diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c
index 6880d3e9ab62..2e5da7c5834c 100644
--- a/drivers/mmc/host/sdhci-of-k1.c
+++ b/drivers/mmc/host/sdhci-of-k1.c
@@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev)
host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
- if (spacemit_sdhci_get_clocks(dev, pltfm_host))
+ ret = spacemit_sdhci_get_clocks(dev, pltfm_host);
+ if (ret)
goto err_pltfm;
ret = sdhci_add_host(host);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 13a84b9309e0..e3877a1c72a9 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -913,7 +913,8 @@ static bool glk_broken_cqhci(struct sdhci_pci_slot *slot)
{
return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC &&
(dmi_match(DMI_BIOS_VENDOR, "LENOVO") ||
- dmi_match(DMI_SYS_VENDOR, "IRBIS"));
+ dmi_match(DMI_SYS_VENDOR, "IRBIS") ||
+ dmi_match(DMI_SYS_VENDOR, "Positivo Tecnologia SA"));
}
static bool jsl_broken_hs400es(struct sdhci_pci_slot *slot)
diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c
index c53b64d50c0d..0efeb9d0c376 100644
--- a/drivers/mmc/host/sdhci-uhs2.c
+++ b/drivers/mmc/host/sdhci-uhs2.c
@@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask)
/* hw clears the bit when it's done */
if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10,
UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) {
- pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__,
- mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc));
+ pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__,
+ mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc));
sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET);
return;
}
@@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT),
100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true,
host, SDHCI_PRESENT_STATE)) {
- pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms");
return -EIO;
}
@@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC),
100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) {
- pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms");
return -EIO;
}
@@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc)
host->ops->uhs2_pre_detect_init(host);
if (sdhci_uhs2_interface_detect(host)) {
- pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc));
+ pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc));
return -EIO;
}
if (sdhci_uhs2_init(host)) {
- pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc));
+ pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc));
return -EIO;
}
@@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host)
if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE),
100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host,
SDHCI_PRESENT_STATE)) {
- pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc));
- sdhci_dumpregs(host);
+ pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc));
+ sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms");
return -EIO;
}
return 0;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f008167d1863..e116f2db34d5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
host->mmc->actual_clock = 0;
- clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- if (clk & SDHCI_CLOCK_CARD_EN)
- sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN,
- SDHCI_CLOCK_CONTROL);
+ sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
- if (clock == 0) {
- sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
+ if (clock == 0)
return;
- }
clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
sdhci_enable_clk(host, clk);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index f9d65dd0f2b2..70ada1857a4c 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en);
void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable);
void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd);
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define SDHCI_DBG_ANYWAY 0
+#elif defined(DEBUG)
+#define SDHCI_DBG_ANYWAY 1
+#else
+#define SDHCI_DBG_ANYWAY 0
+#endif
+
+#define sdhci_dbg_dumpregs(host, fmt) \
+do { \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \
+ sdhci_dumpregs(host); \
+} while (0)
+
#endif /* __SDHCI_HW_H */
diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 73385ff4c0f3..9e94998e8df7 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -613,7 +613,8 @@ static const struct sdhci_ops sdhci_am654_ops = {
static const struct sdhci_pltfm_data sdhci_am654_pdata = {
.ops = &sdhci_am654_ops,
.quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_DISABLE_HW_TIMEOUT,
};
static const struct sdhci_am654_driver_data sdhci_am654_sr1_drvdata = {
@@ -643,7 +644,8 @@ static const struct sdhci_ops sdhci_j721e_8bit_ops = {
static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = {
.ops = &sdhci_j721e_8bit_ops,
.quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_DISABLE_HW_TIMEOUT,
};
static const struct sdhci_am654_driver_data sdhci_j721e_8bit_drvdata = {
@@ -667,7 +669,8 @@ static const struct sdhci_ops sdhci_j721e_4bit_ops = {
static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = {
.ops = &sdhci_j721e_4bit_ops,
.quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_DISABLE_HW_TIMEOUT,
};
static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = {
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 391d81ad960c..8dc4f5c493fc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -559,7 +559,7 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd,
/* Sanitize user input */
p.devname[BLKPG_DEVNAMELTH - 1] = '\0';
- return mtd_add_partition(mtd, p.devname, p.start, p.length, NULL);
+ return mtd_add_partition(mtd, p.devname, p.start, p.length);
case BLKPG_DEL_PARTITION:
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 429d8c16baf0..5ba9a741f5ac 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -68,13 +68,7 @@ static struct class mtd_class = {
.pm = MTD_CLS_PM_OPS,
};
-static struct class mtd_master_class = {
- .name = "mtd_master",
- .pm = MTD_CLS_PM_OPS,
-};
-
static DEFINE_IDR(mtd_idr);
-static DEFINE_IDR(mtd_master_idr);
/* These are exported solely for the purpose of mtd_blkdevs.c. You
should not use them for _anything_ else */
@@ -89,9 +83,8 @@ EXPORT_SYMBOL_GPL(__mtd_next_device);
static LIST_HEAD(mtd_notifiers);
-#define MTD_MASTER_DEVS 255
+
#define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2)
-static dev_t mtd_master_devt;
/* REVISIT once MTD uses the driver model better, whoever allocates
* the mtd_info will probably want to use the release() hook...
@@ -111,17 +104,6 @@ static void mtd_release(struct device *dev)
device_destroy(&mtd_class, index + 1);
}
-static void mtd_master_release(struct device *dev)
-{
- struct mtd_info *mtd = dev_get_drvdata(dev);
-
- idr_remove(&mtd_master_idr, mtd->index);
- of_node_put(mtd_get_of_node(mtd));
-
- if (mtd_is_partition(mtd))
- release_mtd_partition(mtd);
-}
-
static void mtd_device_release(struct kref *kref)
{
struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt);
@@ -385,11 +367,6 @@ static const struct device_type mtd_devtype = {
.release = mtd_release,
};
-static const struct device_type mtd_master_devtype = {
- .name = "mtd_master",
- .release = mtd_master_release,
-};
-
static bool mtd_expert_analysis_mode;
#ifdef CONFIG_DEBUG_FS
@@ -657,13 +634,13 @@ exit_parent:
/**
* add_mtd_device - register an MTD device
* @mtd: pointer to new MTD device info structure
- * @partitioned: create partitioned device
*
* Add a device to the list of MTD devices present in the system, and
* notify each currently active MTD 'user' of its arrival. Returns
* zero on success or non-zero on failure.
*/
-int add_mtd_device(struct mtd_info *mtd, bool partitioned)
+
+int add_mtd_device(struct mtd_info *mtd)
{
struct device_node *np = mtd_get_of_node(mtd);
struct mtd_info *master = mtd_get_master(mtd);
@@ -710,17 +687,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned)
ofidx = -1;
if (np)
ofidx = of_alias_get_id(np, "mtd");
- if (partitioned) {
- if (ofidx >= 0)
- i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL);
- else
- i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL);
- } else {
- if (ofidx >= 0)
- i = idr_alloc(&mtd_master_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL);
- else
- i = idr_alloc(&mtd_master_idr, mtd, 0, 0, GFP_KERNEL);
- }
+ if (ofidx >= 0)
+ i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL);
+ else
+ i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL);
if (i < 0) {
error = i;
goto fail_locked;
@@ -768,18 +738,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned)
/* Caller should have set dev.parent to match the
* physical device, if appropriate.
*/
- if (partitioned) {
- mtd->dev.type = &mtd_devtype;
- mtd->dev.class = &mtd_class;
- mtd->dev.devt = MTD_DEVT(i);
- dev_set_name(&mtd->dev, "mtd%d", i);
- error = dev_set_name(&mtd->dev, "mtd%d", i);
- } else {
- mtd->dev.type = &mtd_master_devtype;
- mtd->dev.class = &mtd_master_class;
- mtd->dev.devt = MKDEV(MAJOR(mtd_master_devt), i);
- error = dev_set_name(&mtd->dev, "mtd_master%d", i);
- }
+ mtd->dev.type = &mtd_devtype;
+ mtd->dev.class = &mtd_class;
+ mtd->dev.devt = MTD_DEVT(i);
+ error = dev_set_name(&mtd->dev, "mtd%d", i);
if (error)
goto fail_devname;
dev_set_drvdata(&mtd->dev, mtd);
@@ -787,7 +749,6 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned)
of_node_get(mtd_get_of_node(mtd));
error = device_register(&mtd->dev);
if (error) {
- pr_err("mtd: %s device_register fail %d\n", mtd->name, error);
put_device(&mtd->dev);
goto fail_added;
}
@@ -799,13 +760,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned)
mtd_debugfs_populate(mtd);
- if (partitioned) {
- device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL,
- "mtd%dro", i);
- }
+ device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL,
+ "mtd%dro", i);
- pr_debug("mtd: Giving out %spartitioned device %d to %s\n",
- partitioned ? "" : "un-", i, mtd->name);
+ pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name);
/* No need to get a refcount on the module containing
the notifier, since we hold the mtd_table_mutex */
list_for_each_entry(not, &mtd_notifiers, list)
@@ -813,16 +771,13 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned)
mutex_unlock(&mtd_table_mutex);
- if (partitioned) {
- if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) {
- if (IS_BUILTIN(CONFIG_MTD)) {
- pr_info("mtd: setting mtd%d (%s) as root device\n",
- mtd->index, mtd->name);
- ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
- } else {
- pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n",
- mtd->index, mtd->name);
- }
+ if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) {
+ if (IS_BUILTIN(CONFIG_MTD)) {
+ pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name);
+ ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
+ } else {
+ pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n",
+ mtd->index, mtd->name);
}
}
@@ -838,10 +793,7 @@ fail_nvmem_add:
fail_added:
of_node_put(mtd_get_of_node(mtd));
fail_devname:
- if (partitioned)
- idr_remove(&mtd_idr, i);
- else
- idr_remove(&mtd_master_idr, i);
+ idr_remove(&mtd_idr, i);
fail_locked:
mutex_unlock(&mtd_table_mutex);
return error;
@@ -859,14 +811,12 @@ fail_locked:
int del_mtd_device(struct mtd_info *mtd)
{
- struct mtd_notifier *not;
- struct idr *idr;
int ret;
+ struct mtd_notifier *not;
mutex_lock(&mtd_table_mutex);
- idr = mtd->dev.class == &mtd_class ? &mtd_idr : &mtd_master_idr;
- if (idr_find(idr, mtd->index) != mtd) {
+ if (idr_find(&mtd_idr, mtd->index) != mtd) {
ret = -ENODEV;
goto out_error;
}
@@ -1106,7 +1056,6 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
const struct mtd_partition *parts,
int nr_parts)
{
- struct mtd_info *parent;
int ret, err;
mtd_set_dev_defaults(mtd);
@@ -1115,30 +1064,25 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
if (ret)
goto out;
- ret = add_mtd_device(mtd, false);
- if (ret)
- goto out;
-
if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
- ret = mtd_add_partition(mtd, mtd->name, 0, MTDPART_SIZ_FULL, &parent);
+ ret = add_mtd_device(mtd);
if (ret)
goto out;
-
- } else {
- parent = mtd;
}
/* Prefer parsed partitions over driver-provided fallback */
- ret = parse_mtd_partitions(parent, types, parser_data);
+ ret = parse_mtd_partitions(mtd, types, parser_data);
if (ret == -EPROBE_DEFER)
goto out;
if (ret > 0)
ret = 0;
else if (nr_parts)
- ret = add_mtd_partitions(parent, parts, nr_parts);
- else if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
- ret = mtd_add_partition(parent, mtd->name, 0, MTDPART_SIZ_FULL, NULL);
+ ret = add_mtd_partitions(mtd, parts, nr_parts);
+ else if (!device_is_registered(&mtd->dev))
+ ret = add_mtd_device(mtd);
+ else
+ ret = 0;
if (ret)
goto out;
@@ -1158,14 +1102,13 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
register_reboot_notifier(&mtd->reboot_notifier);
}
- return 0;
out:
- nvmem_unregister(mtd->otp_user_nvmem);
- nvmem_unregister(mtd->otp_factory_nvmem);
-
- del_mtd_partitions(mtd);
+ if (ret) {
+ nvmem_unregister(mtd->otp_user_nvmem);
+ nvmem_unregister(mtd->otp_factory_nvmem);
+ }
- if (device_is_registered(&mtd->dev)) {
+ if (ret && device_is_registered(&mtd->dev)) {
err = del_mtd_device(mtd);
if (err)
pr_err("Error when deleting MTD device (%d)\n", err);
@@ -1324,7 +1267,8 @@ int __get_mtd_device(struct mtd_info *mtd)
mtd = mtd->parent;
}
- kref_get(&master->refcnt);
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+ kref_get(&master->refcnt);
return 0;
}
@@ -1418,7 +1362,8 @@ void __put_mtd_device(struct mtd_info *mtd)
mtd = parent;
}
- kref_put(&master->refcnt, mtd_device_release);
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+ kref_put(&master->refcnt, mtd_device_release);
module_put(master->owner);
@@ -2585,16 +2530,6 @@ static int __init init_mtd(void)
if (ret)
goto err_reg;
- ret = class_register(&mtd_master_class);
- if (ret)
- goto err_reg2;
-
- ret = alloc_chrdev_region(&mtd_master_devt, 0, MTD_MASTER_DEVS, "mtd_master");
- if (ret < 0) {
- pr_err("unable to allocate char dev region\n");
- goto err_chrdev;
- }
-
mtd_bdi = mtd_bdi_init("mtd");
if (IS_ERR(mtd_bdi)) {
ret = PTR_ERR(mtd_bdi);
@@ -2619,10 +2554,6 @@ out_procfs:
bdi_unregister(mtd_bdi);
bdi_put(mtd_bdi);
err_bdi:
- unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS);
-err_chrdev:
- class_unregister(&mtd_master_class);
-err_reg2:
class_unregister(&mtd_class);
err_reg:
pr_err("Error registering mtd class or bdi: %d\n", ret);
@@ -2636,12 +2567,9 @@ static void __exit cleanup_mtd(void)
if (proc_mtd)
remove_proc_entry("mtd", NULL);
class_unregister(&mtd_class);
- class_unregister(&mtd_master_class);
- unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS);
bdi_unregister(mtd_bdi);
bdi_put(mtd_bdi);
idr_destroy(&mtd_idr);
- idr_destroy(&mtd_master_idr);
}
module_init(init_mtd);
diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index 2258d31c5aa6..b014861a06a6 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -8,7 +8,7 @@ extern struct mutex mtd_table_mutex;
extern struct backing_dev_info *mtd_bdi;
struct mtd_info *__mtd_next_device(int i);
-int __must_check add_mtd_device(struct mtd_info *mtd, bool partitioned);
+int __must_check add_mtd_device(struct mtd_info *mtd);
int del_mtd_device(struct mtd_info *mtd);
int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
int del_mtd_partitions(struct mtd_info *);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 5a3db36d734e..994e8c51e674 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -86,7 +86,8 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent,
* parent conditional on that option. Note, this is a way to
* distinguish between the parent and its partitions in sysfs.
*/
- child->dev.parent = &parent->dev;
+ child->dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ?
+ &parent->dev : parent->dev.parent;
child->dev.of_node = part->of_node;
child->parent = parent;
child->part.offset = part->offset;
@@ -242,7 +243,7 @@ static int mtd_add_partition_attrs(struct mtd_info *new)
}
int mtd_add_partition(struct mtd_info *parent, const char *name,
- long long offset, long long length, struct mtd_info **out)
+ long long offset, long long length)
{
struct mtd_info *master = mtd_get_master(parent);
u64 parent_size = mtd_is_partition(parent) ?
@@ -275,15 +276,12 @@ int mtd_add_partition(struct mtd_info *parent, const char *name,
list_add_tail(&child->part.node, &parent->partitions);
mutex_unlock(&master->master.partitions_lock);
- ret = add_mtd_device(child, true);
+ ret = add_mtd_device(child);
if (ret)
goto err_remove_part;
mtd_add_partition_attrs(child);
- if (out)
- *out = child;
-
return 0;
err_remove_part:
@@ -415,7 +413,7 @@ int add_mtd_partitions(struct mtd_info *parent,
list_add_tail(&child->part.node, &parent->partitions);
mutex_unlock(&master->master.partitions_lock);
- ret = add_mtd_device(child, true);
+ ret = add_mtd_device(child);
if (ret) {
mutex_lock(&master->master.partitions_lock);
list_del(&child->part.node);
@@ -592,6 +590,9 @@ static int mtd_part_of_parse(struct mtd_info *master,
int ret, err = 0;
dev = &master->dev;
+ /* Use parent device (controller) if the top level MTD is not registered */
+ if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master))
+ dev = master->dev.parent;
np = mtd_get_of_node(master);
if (mtd_is_partition(master))
@@ -710,7 +711,6 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types,
if (ret < 0 && !err)
err = ret;
}
-
return err;
}
diff --git a/drivers/mtd/nand/qpic_common.c b/drivers/mtd/nand/qpic_common.c
index 4dc4d65e7d32..8e604cc22ca3 100644
--- a/drivers/mtd/nand/qpic_common.c
+++ b/drivers/mtd/nand/qpic_common.c
@@ -57,14 +57,15 @@ qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc)
bam_txn_buf += sizeof(*bam_txn);
bam_txn->bam_ce = bam_txn_buf;
- bam_txn_buf +=
- sizeof(*bam_txn->bam_ce) * QPIC_PER_CW_CMD_ELEMENTS * num_cw;
+ bam_txn->bam_ce_nitems = QPIC_PER_CW_CMD_ELEMENTS * num_cw;
+ bam_txn_buf += sizeof(*bam_txn->bam_ce) * bam_txn->bam_ce_nitems;
bam_txn->cmd_sgl = bam_txn_buf;
- bam_txn_buf +=
- sizeof(*bam_txn->cmd_sgl) * QPIC_PER_CW_CMD_SGL * num_cw;
+ bam_txn->cmd_sgl_nitems = QPIC_PER_CW_CMD_SGL * num_cw;
+ bam_txn_buf += sizeof(*bam_txn->cmd_sgl) * bam_txn->cmd_sgl_nitems;
bam_txn->data_sgl = bam_txn_buf;
+ bam_txn->data_sgl_nitems = QPIC_PER_CW_DATA_SGL * num_cw;
init_completion(&bam_txn->txn_done);
@@ -238,6 +239,11 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read,
struct bam_transaction *bam_txn = nandc->bam_txn;
u32 offset;
+ if (bam_txn->bam_ce_pos + size > bam_txn->bam_ce_nitems) {
+ dev_err(nandc->dev, "BAM %s array is full\n", "CE");
+ return -EINVAL;
+ }
+
bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_pos];
/* fill the command desc */
@@ -258,6 +264,12 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read,
/* use the separate sgl after this command */
if (flags & NAND_BAM_NEXT_SGL) {
+ if (bam_txn->cmd_sgl_pos >= bam_txn->cmd_sgl_nitems) {
+ dev_err(nandc->dev, "BAM %s array is full\n",
+ "CMD sgl");
+ return -EINVAL;
+ }
+
bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_start];
bam_ce_size = (bam_txn->bam_ce_pos -
bam_txn->bam_ce_start) *
@@ -297,10 +309,20 @@ int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read,
struct bam_transaction *bam_txn = nandc->bam_txn;
if (read) {
+ if (bam_txn->rx_sgl_pos >= bam_txn->data_sgl_nitems) {
+ dev_err(nandc->dev, "BAM %s array is full\n", "RX sgl");
+ return -EINVAL;
+ }
+
sg_set_buf(&bam_txn->data_sgl[bam_txn->rx_sgl_pos],
vaddr, size);
bam_txn->rx_sgl_pos++;
} else {
+ if (bam_txn->tx_sgl_pos >= bam_txn->data_sgl_nitems) {
+ dev_err(nandc->dev, "BAM %s array is full\n", "TX sgl");
+ return -EINVAL;
+ }
+
sg_set_buf(&bam_txn->data_sgl[bam_txn->tx_sgl_pos],
vaddr, size);
bam_txn->tx_sgl_pos++;
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 7099db7a62be..c411fe9be3ef 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -1585,6 +1585,7 @@ static void spinand_cleanup(struct spinand_device *spinand)
{
struct nand_device *nand = spinand_to_nand(spinand);
+ nanddev_ecc_engine_cleanup(nand);
nanddev_cleanup(nand);
spinand_manufacturer_cleanup(spinand);
kfree(spinand->databuf);
diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c
index 19f8dd4a6370..b7a28f001a38 100644
--- a/drivers/mtd/nand/spi/winbond.c
+++ b/drivers/mtd/nand/spi/winbond.c
@@ -25,7 +25,7 @@
static SPINAND_OP_VARIANTS(read_cache_octal_variants,
SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(0, 2, NULL, 0, 105 * HZ_PER_MHZ),
- SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 86 * HZ_PER_MHZ),
+ SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 162 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(0, 1, NULL, 0, 133 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0));
@@ -42,11 +42,11 @@ static SPINAND_OP_VARIANTS(update_cache_octal_variants,
static SPINAND_OP_VARIANTS(read_cache_dual_quad_dtr_variants,
SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(0, 8, NULL, 0, 80 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ),
- SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0),
+ SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0, 104 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(0, 4, NULL, 0, 80 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ),
- SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0),
+ SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0, 104 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0),
SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ),
SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0),
@@ -289,7 +289,7 @@ static const struct spinand_info winbond_spinand_table[] = {
SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)),
SPINAND_INFO("W35N02JW", /* 1.8V */
SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x22),
- NAND_MEMORG(1, 4096, 128, 64, 512, 10, 2, 1, 1),
+ NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 2, 1),
NAND_ECCREQ(1, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants,
&write_cache_octal_variants,
@@ -298,7 +298,7 @@ static const struct spinand_info winbond_spinand_table[] = {
SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)),
SPINAND_INFO("W35N04JW", /* 1.8V */
SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x23),
- NAND_MEMORG(1, 4096, 128, 64, 512, 10, 4, 1, 1),
+ NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 4, 1),
NAND_ECCREQ(1, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants,
&write_cache_octal_variants,
diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig
index 80f015cf6e54..c68132e38138 100644
--- a/drivers/mux/Kconfig
+++ b/drivers/mux/Kconfig
@@ -48,6 +48,7 @@ config MUX_GPIO
config MUX_MMIO
tristate "MMIO/Regmap register bitfield-controlled Multiplexer"
depends on OF
+ select REGMAP_MMIO
help
MMIO/Regmap register bitfield-controlled Multiplexer controller.
diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c
index ea8c807af4d8..3913971125de 100644
--- a/drivers/net/can/dev/dev.c
+++ b/drivers/net/can/dev/dev.c
@@ -145,13 +145,16 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
EXPORT_SYMBOL_GPL(can_change_state);
/* CAN device restart for bus-off recovery */
-static void can_restart(struct net_device *dev)
+static int can_restart(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
struct can_frame *cf;
int err;
+ if (!priv->do_set_mode)
+ return -EOPNOTSUPP;
+
if (netif_carrier_ok(dev))
netdev_err(dev, "Attempt to restart for bus-off recovery, but carrier is OK?\n");
@@ -173,10 +176,14 @@ static void can_restart(struct net_device *dev)
if (err) {
netdev_err(dev, "Restart failed, error %pe\n", ERR_PTR(err));
netif_carrier_off(dev);
+
+ return err;
} else {
netdev_dbg(dev, "Restarted\n");
priv->can_stats.restarts++;
}
+
+ return 0;
}
static void can_restart_work(struct work_struct *work)
@@ -201,9 +208,8 @@ int can_restart_now(struct net_device *dev)
return -EBUSY;
cancel_delayed_work_sync(&priv->restart_work);
- can_restart(dev);
- return 0;
+ return can_restart(dev);
}
/* CAN bus-off
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index a36842ace084..f0e3f0d538fb 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -285,6 +285,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_CAN_RESTART_MS]) {
+ if (!priv->do_set_mode) {
+ NL_SET_ERR_MSG(extack,
+ "Device doesn't support restart from Bus Off");
+ return -EOPNOTSUPP;
+ }
+
/* Do not allow changing restart delay while running */
if (dev->flags & IFF_UP)
return -EBUSY;
@@ -292,6 +298,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_CAN_RESTART]) {
+ if (!priv->do_set_mode) {
+ NL_SET_ERR_MSG(extack,
+ "Device doesn't support restart from Bus Off");
+ return -EOPNOTSUPP;
+ }
+
/* Do not allow a restart while not running */
if (!(dev->flags & IFF_UP))
return -EINVAL;
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 6c656bfdb323..fe74dbd2c966 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -665,7 +665,7 @@ static int m_can_handle_lost_msg(struct net_device *dev)
struct can_frame *frame;
u32 timestamp = 0;
- netdev_err(dev, "msg lost in rxf0\n");
+ netdev_dbg(dev, "msg lost in rxf0\n");
stats->rx_errors++;
stats->rx_over_errors++;
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index e5c162f8c589..39b0b5277b11 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -343,21 +343,19 @@ static void tcan4x5x_get_dt_data(struct m_can_classdev *cdev)
of_property_read_bool(cdev->dev->of_node, "ti,nwkrq-voltage-vio");
}
-static int tcan4x5x_get_gpios(struct m_can_classdev *cdev,
- const struct tcan4x5x_version_info *version_info)
+static int tcan4x5x_get_gpios(struct m_can_classdev *cdev)
{
struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev);
int ret;
- if (version_info->has_wake_pin) {
- tcan4x5x->device_wake_gpio = devm_gpiod_get(cdev->dev, "device-wake",
- GPIOD_OUT_HIGH);
- if (IS_ERR(tcan4x5x->device_wake_gpio)) {
- if (PTR_ERR(tcan4x5x->device_wake_gpio) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ tcan4x5x->device_wake_gpio = devm_gpiod_get_optional(cdev->dev,
+ "device-wake",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(tcan4x5x->device_wake_gpio)) {
+ if (PTR_ERR(tcan4x5x->device_wake_gpio) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
- tcan4x5x_disable_wake(cdev);
- }
+ tcan4x5x->device_wake_gpio = NULL;
}
tcan4x5x->reset_gpio = devm_gpiod_get_optional(cdev->dev, "reset",
@@ -369,14 +367,31 @@ static int tcan4x5x_get_gpios(struct m_can_classdev *cdev,
if (ret)
return ret;
- if (version_info->has_state_pin) {
- tcan4x5x->device_state_gpio = devm_gpiod_get_optional(cdev->dev,
- "device-state",
- GPIOD_IN);
- if (IS_ERR(tcan4x5x->device_state_gpio)) {
- tcan4x5x->device_state_gpio = NULL;
- tcan4x5x_disable_state(cdev);
- }
+ tcan4x5x->device_state_gpio = devm_gpiod_get_optional(cdev->dev,
+ "device-state",
+ GPIOD_IN);
+ if (IS_ERR(tcan4x5x->device_state_gpio))
+ tcan4x5x->device_state_gpio = NULL;
+
+ return 0;
+}
+
+static int tcan4x5x_check_gpios(struct m_can_classdev *cdev,
+ const struct tcan4x5x_version_info *version_info)
+{
+ struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev);
+ int ret;
+
+ if (version_info->has_wake_pin && !tcan4x5x->device_wake_gpio) {
+ ret = tcan4x5x_disable_wake(cdev);
+ if (ret)
+ return ret;
+ }
+
+ if (version_info->has_state_pin && !tcan4x5x->device_state_gpio) {
+ ret = tcan4x5x_disable_state(cdev);
+ if (ret)
+ return ret;
}
return 0;
@@ -411,10 +426,11 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
priv = cdev_to_priv(mcan_class);
priv->power = devm_regulator_get_optional(&spi->dev, "vsup");
- if (PTR_ERR(priv->power) == -EPROBE_DEFER) {
- ret = -EPROBE_DEFER;
- goto out_m_can_class_free_dev;
- } else {
+ if (IS_ERR(priv->power)) {
+ if (PTR_ERR(priv->power) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto out_m_can_class_free_dev;
+ }
priv->power = NULL;
}
@@ -467,15 +483,21 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
goto out_m_can_class_free_dev;
}
+ ret = tcan4x5x_get_gpios(mcan_class);
+ if (ret) {
+ dev_err(&spi->dev, "Getting gpios failed %pe\n", ERR_PTR(ret));
+ goto out_power;
+ }
+
version_info = tcan4x5x_find_version(priv);
if (IS_ERR(version_info)) {
ret = PTR_ERR(version_info);
goto out_power;
}
- ret = tcan4x5x_get_gpios(mcan_class, version_info);
+ ret = tcan4x5x_check_gpios(mcan_class, version_info);
if (ret) {
- dev_err(&spi->dev, "Getting gpios failed %pe\n", ERR_PTR(ret));
+ dev_err(&spi->dev, "Checking gpios failed %pe\n", ERR_PTR(ret));
goto out_power;
}
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 862bdccb7439..dc2f4adac9bc 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2034,9 +2034,6 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge,
b53_get_vlan_entry(dev, pvid, vl);
vl->members &= ~BIT(port);
- if (vl->members == BIT(cpu_port))
- vl->members &= ~BIT(cpu_port);
- vl->untag = vl->members;
b53_set_vlan_entry(dev, pvid, vl);
}
@@ -2115,8 +2112,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge)
}
b53_get_vlan_entry(dev, pvid, vl);
- vl->members |= BIT(port) | BIT(cpu_port);
- vl->untag |= BIT(port) | BIT(cpu_port);
+ vl->members |= BIT(port);
b53_set_vlan_entry(dev, pvid, vl);
}
}
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index a7ec609d64de..9057180051df 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -1065,23 +1065,18 @@ static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q)
static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma)
{
+ int size, index, num_desc = HW_DSCP_NUM;
struct airoha_eth *eth = qdma->eth;
int id = qdma - &eth->qdma[0];
+ u32 status, buf_size;
dma_addr_t dma_addr;
const char *name;
- int size, index;
- u32 status;
-
- size = HW_DSCP_NUM * sizeof(struct airoha_qdma_fwd_desc);
- if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL))
- return -ENOMEM;
-
- airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr);
name = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d-buf", id);
if (!name)
return -ENOMEM;
+ buf_size = id ? AIROHA_MAX_PACKET_SIZE / 2 : AIROHA_MAX_PACKET_SIZE;
index = of_property_match_string(eth->dev->of_node,
"memory-region-names", name);
if (index >= 0) {
@@ -1099,8 +1094,12 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma)
rmem = of_reserved_mem_lookup(np);
of_node_put(np);
dma_addr = rmem->base;
+ /* Compute the number of hw descriptors according to the
+ * reserved memory size and the payload buffer size
+ */
+ num_desc = div_u64(rmem->size, buf_size);
} else {
- size = AIROHA_MAX_PACKET_SIZE * HW_DSCP_NUM;
+ size = buf_size * num_desc;
if (!dmam_alloc_coherent(eth->dev, size, &dma_addr,
GFP_KERNEL))
return -ENOMEM;
@@ -1108,15 +1107,21 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma)
airoha_qdma_wr(qdma, REG_FWD_BUF_BASE, dma_addr);
+ size = num_desc * sizeof(struct airoha_qdma_fwd_desc);
+ if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL))
+ return -ENOMEM;
+
+ airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr);
+ /* QDMA0: 2KB. QDMA1: 1KB */
airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG,
HW_FWD_DSCP_PAYLOAD_SIZE_MASK,
- FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0));
+ FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, !!id));
airoha_qdma_rmw(qdma, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK,
FIELD_PREP(FWD_DSCP_LOW_THR_MASK, 128));
airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG,
LMGR_INIT_START | LMGR_SRAM_MODE_MASK |
HW_FWD_DESC_NUM_MASK,
- FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) |
+ FIELD_PREP(HW_FWD_DESC_NUM_MASK, num_desc) |
LMGR_INIT_START | LMGR_SRAM_MODE_MASK);
return read_poll_timeout(airoha_qdma_rr, status,
@@ -2979,6 +2984,7 @@ static int airoha_probe(struct platform_device *pdev)
error_napi_stop:
for (i = 0; i < ARRAY_SIZE(eth->qdma); i++)
airoha_qdma_stop_napi(&eth->qdma[i]);
+ airoha_ppe_deinit(eth);
error_hw_cleanup:
for (i = 0; i < ARRAY_SIZE(eth->qdma); i++)
airoha_hw_cleanup(&eth->qdma[i]);
diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c
index 0e5b8c21b9aa..1e58a4aeb9a0 100644
--- a/drivers/net/ethernet/airoha/airoha_npu.c
+++ b/drivers/net/ethernet/airoha/airoha_npu.c
@@ -401,12 +401,13 @@ struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr)
return ERR_PTR(-ENODEV);
pdev = of_find_device_by_node(np);
- of_node_put(np);
if (!pdev) {
dev_err(dev, "cannot find device node %s\n", np->name);
+ of_node_put(np);
return ERR_PTR(-ENODEV);
}
+ of_node_put(np);
if (!try_module_get(THIS_MODULE)) {
dev_err(dev, "failed to get the device driver module\n");
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index 9067d2fc7706..0e217acfc5ef 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -809,8 +809,10 @@ airoha_ppe_foe_flow_l2_entry_update(struct airoha_ppe *ppe,
int idle;
hwe = airoha_ppe_foe_get_entry(ppe, iter->hash);
- ib1 = READ_ONCE(hwe->ib1);
+ if (!hwe)
+ continue;
+ ib1 = READ_ONCE(hwe->ib1);
state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, ib1);
if (state != AIROHA_FOE_STATE_BIND) {
iter->hash = 0xffff;
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index 04187eb40ec6..150c85995cc1 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -614,8 +614,9 @@
RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \
RX17_DONE_INT_MASK | RX16_DONE_INT_MASK)
-#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK)
#define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK)
+#define RX_DONE_INT_MASK \
+ ((RX_DONE_HIGH_INT_MASK << RX_DONE_HIGH_OFFSET) | RX_DONE_LOW_INT_MASK)
#define INT_RX2_MASK(_n) \
((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index e1296cbf4ff3..9316de4126cf 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1269,6 +1269,8 @@
#define MDIO_VEND2_CTRL1_SS13 BIT(13)
#endif
+#define XGBE_VEND2_MAC_AUTO_SW BIT(9)
+
/* MDIO mask values */
#define XGBE_AN_CL73_INT_CMPLT BIT(0)
#define XGBE_AN_CL73_INC_LINK BIT(1)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 71449edbb76d..1a37ec45e650 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable,
reg |= MDIO_VEND2_CTRL1_AN_RESTART;
XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg);
+
+ reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL);
+ reg |= XGBE_VEND2_MAC_AUTO_SW;
+ XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg);
}
static void xgbe_an37_restart(struct xgbe_prv_data *pdata)
@@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata)
netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n",
(pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII");
+
+ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1);
+ reg &= ~MDIO_AN_CTRL1_ENABLE;
+ XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg);
+
}
static void xgbe_an73_init(struct xgbe_prv_data *pdata)
@@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata,
&an_restart);
+ /* bail out if the link status register read fails */
+ if (pdata->phy.link < 0)
+ return;
+
if (an_restart) {
xgbe_phy_config_aneg(pdata);
goto adjust_link;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 7a4dfa4e19c7..23c39e92e783 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int reg;
- int ret;
+ int reg, ret;
*an_restart = 0;
@@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
return 0;
}
- /* Link status is latched low, so read once to clear
- * and then read again to get current state
- */
- reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+ if (reg < 0)
+ return reg;
+
+ /* Link status is latched low so that momentary link drops
+ * can be detected. If link was already down read again
+ * to get the latest state.
+ */
+
+ if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) {
+ reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+ if (reg < 0)
+ return reg;
+ }
if (pdata->en_rx_adap) {
/* if the link is available and adaptation is done,
@@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
xgbe_phy_set_mode(pdata, phy_data->cur_mode);
}
- /* check again for the link and adaptation status */
- reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
- if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done)
+ if (pdata->rx_adapt_done)
return 1;
} else if (reg & MDIO_STAT1_LSTATUS)
return 1;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 6359bb87dc13..057379cd43ba 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -183,12 +183,12 @@
#define XGBE_LINK_TIMEOUT 5
#define XGBE_KR_TRAINING_WAIT_ITER 50
-#define XGBE_SGMII_AN_LINK_STATUS BIT(1)
+#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1)
#define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3))
#define XGBE_SGMII_AN_LINK_SPEED_10 0x00
#define XGBE_SGMII_AN_LINK_SPEED_100 0x04
#define XGBE_SGMII_AN_LINK_SPEED_1000 0x08
-#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4)
+#define XGBE_SGMII_AN_LINK_STATUS BIT(4)
/* ECC correctable error notification window (seconds) */
#define XGBE_ECC_LIMIT 60
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index cfdb546a09e7..98a4d089270e 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
break;
}
- buffer_info->alloced = 1;
- buffer_info->skb = skb;
- buffer_info->length = (u16) adapter->rx_buffer_len;
page = virt_to_page(skb->data);
offset = offset_in_page(skb->data);
buffer_info->dma = dma_map_page(&pdev->dev, page, offset,
adapter->rx_buffer_len,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+ kfree_skb(skb);
+ adapter->soft_stats.rx_dropped++;
+ break;
+ }
+
+ buffer_info->alloced = 1;
+ buffer_info->skb = skb;
+ buffer_info->length = (u16)adapter->rx_buffer_len;
+
rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len);
rfd_desc->coalese = 0;
@@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
return 0;
}
-static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
- struct tx_packet_desc *ptpd)
+static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
+ struct tx_packet_desc *ptpd)
{
struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring;
struct atl1_buffer *buffer_info;
@@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
unsigned int nr_frags;
unsigned int f;
int retval;
+ u16 first_mapped;
u16 next_to_use;
u16 data_len;
u8 hdr_len;
@@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buf_len -= skb->data_len;
nr_frags = skb_shinfo(skb)->nr_frags;
next_to_use = atomic_read(&tpd_ring->next_to_use);
+ first_mapped = next_to_use;
buffer_info = &tpd_ring->buffer_info[next_to_use];
BUG_ON(buffer_info->skb);
/* put skb in last TPD */
@@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
offset, hdr_len,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
@@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
page, offset,
buffer_info->length,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = dma_map_page(&adapter->pdev->dev, page,
offset, buf_len,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
}
@@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev,
frag, i * ATL1_MAX_TX_BUF_LEN,
buffer_info->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ buffer_info->dma))
+ goto dma_err;
if (++next_to_use == tpd_ring->count)
next_to_use = 0;
@@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb,
/* last tpd's buffer-info */
buffer_info->skb = skb;
+
+ return true;
+
+ dma_err:
+ while (first_mapped != next_to_use) {
+ buffer_info = &tpd_ring->buffer_info[first_mapped];
+ dma_unmap_page(&adapter->pdev->dev,
+ buffer_info->dma,
+ buffer_info->length,
+ DMA_TO_DEVICE);
+ buffer_info->dma = 0;
+
+ if (++first_mapped == tpd_ring->count)
+ first_mapped = 0;
+ }
+ return false;
}
static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count,
@@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
len = skb_headlen(skb);
- if (unlikely(skb->len <= 0)) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (unlikely(skb->len <= 0))
+ goto drop_packet;
nr_frags = skb_shinfo(skb)->nr_frags;
for (f = 0; f < nr_frags; f++) {
@@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
if (mss) {
if (skb->protocol == htons(ETH_P_IP)) {
proto_hdr_len = skb_tcp_all_headers(skb);
- if (unlikely(proto_hdr_len > len)) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (unlikely(proto_hdr_len > len))
+ goto drop_packet;
+
/* need additional TPD ? */
if (proto_hdr_len != len)
count += (len - proto_hdr_len +
@@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
}
tso = atl1_tso(adapter, skb, ptpd);
- if (tso < 0) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (tso < 0)
+ goto drop_packet;
if (!tso) {
ret_val = atl1_tx_csum(adapter, skb, ptpd);
- if (ret_val < 0) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
+ if (ret_val < 0)
+ goto drop_packet;
}
- atl1_tx_map(adapter, skb, ptpd);
+ if (!atl1_tx_map(adapter, skb, ptpd))
+ goto drop_packet;
+
atl1_tx_queue(adapter, count, ptpd);
atl1_update_mailbox(adapter);
return NETDEV_TX_OK;
+
+drop_packet:
+ adapter->soft_stats.tx_errors++;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
}
static int atl1_rings_clean(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index 0d61b8580d72..f832562bd7a3 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -818,6 +818,9 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf)
/* Tx SPB */
tx_spb_ctrl_wl(intf, ((intf->channel + 8) << TX_SPB_CTRL_XF_BID_SHIFT),
TX_SPB_CTRL_XF_CTRL2);
+
+ if (intf->parent->tx_chan_offset)
+ tx_pause_ctrl_wl(intf, (1 << (intf->channel + 8)), TX_PAUSE_MAP_VECTOR);
tx_spb_top_wl(intf, 0x1e, TX_SPB_TOP_BLKOUT);
tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_READ);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 869580b6f70d..243cb13cb01c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2989,6 +2989,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
{
struct bnxt_napi *bnapi = cpr->bnapi;
u32 raw_cons = cpr->cp_raw_cons;
+ bool flush_xdp = false;
u32 cons;
int rx_pkts = 0;
u8 event = 0;
@@ -3042,6 +3043,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
else
rc = bnxt_force_rx_discard(bp, cpr, &raw_cons,
&event);
+ if (event & BNXT_REDIRECT_EVENT)
+ flush_xdp = true;
if (likely(rc >= 0))
rx_pkts += rc;
/* Increment rx_pkts when rc is -ENOMEM to count towards
@@ -3066,7 +3069,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
}
- if (event & BNXT_REDIRECT_EVENT) {
+ if (flush_xdp) {
xdp_do_flush();
event &= ~BNXT_REDIRECT_EVENT;
}
@@ -10780,6 +10783,72 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx,
bp->num_rss_ctx--;
}
+static bool bnxt_vnic_has_rx_ring(struct bnxt *bp, struct bnxt_vnic_info *vnic,
+ int rxr_id)
+{
+ u16 tbl_size = bnxt_get_rxfh_indir_size(bp->dev);
+ int i, vnic_rx;
+
+ /* Ntuple VNIC always has all the rx rings. Any change of ring id
+ * must be updated because a future filter may use it.
+ */
+ if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG)
+ return true;
+
+ for (i = 0; i < tbl_size; i++) {
+ if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG)
+ vnic_rx = ethtool_rxfh_context_indir(vnic->rss_ctx)[i];
+ else
+ vnic_rx = bp->rss_indir_tbl[i];
+
+ if (rxr_id == vnic_rx)
+ return true;
+ }
+
+ return false;
+}
+
+static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic,
+ u16 mru, int rxr_id)
+{
+ int rc;
+
+ if (!bnxt_vnic_has_rx_ring(bp, vnic, rxr_id))
+ return 0;
+
+ if (mru) {
+ rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true);
+ if (rc) {
+ netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n",
+ vnic->vnic_id, rc);
+ return rc;
+ }
+ }
+ vnic->mru = mru;
+ bnxt_hwrm_vnic_update(bp, vnic,
+ VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
+
+ return 0;
+}
+
+static int bnxt_set_rss_ctx_vnic_mru(struct bnxt *bp, u16 mru, int rxr_id)
+{
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
+ int rc;
+
+ xa_for_each(&bp->dev->ethtool->rss_ctx, context, ctx) {
+ struct bnxt_rss_ctx *rss_ctx = ethtool_rxfh_context_priv(ctx);
+ struct bnxt_vnic_info *vnic = &rss_ctx->vnic;
+
+ rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, rxr_id);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp)
{
bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA);
@@ -11538,11 +11607,9 @@ static void bnxt_free_irq(struct bnxt *bp)
static int bnxt_request_irq(struct bnxt *bp)
{
+ struct cpu_rmap *rmap = NULL;
int i, j, rc = 0;
unsigned long flags = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
rc = bnxt_setup_int_mode(bp);
if (rc) {
@@ -11563,15 +11630,15 @@ static int bnxt_request_irq(struct bnxt *bp)
int map_idx = bnxt_cp_num_to_irq_num(bp, i);
struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
-#ifdef CONFIG_RFS_ACCEL
- if (rmap && bp->bnapi[i]->rx_ring) {
+ if (IS_ENABLED(CONFIG_RFS_ACCEL) &&
+ rmap && bp->bnapi[i]->rx_ring) {
rc = irq_cpu_rmap_add(rmap, irq->vector);
if (rc)
netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n",
j);
j++;
}
-#endif
+
rc = request_irq(irq->vector, irq->handler, flags, irq->name,
bp->bnapi[i]);
if (rc)
@@ -15927,6 +15994,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
struct bnxt_vnic_info *vnic;
struct bnxt_napi *bnapi;
int i, rc;
+ u16 mru;
rxr = &bp->rx_ring[idx];
clone = qmem;
@@ -15977,21 +16045,15 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
+ mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
for (i = 0; i < bp->nr_vnics; i++) {
vnic = &bp->vnic_info[i];
- rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true);
- if (rc) {
- netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n",
- vnic->vnic_id, rc);
+ rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, idx);
+ if (rc)
return rc;
- }
- vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
- bnxt_hwrm_vnic_update(bp, vnic,
- VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
}
-
- return 0;
+ return bnxt_set_rss_ctx_vnic_mru(bp, mru, idx);
err_reset:
netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n",
@@ -16013,10 +16075,10 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
for (i = 0; i < bp->nr_vnics; i++) {
vnic = &bp->vnic_info[i];
- vnic->mru = 0;
- bnxt_hwrm_vnic_update(bp, vnic,
- VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
+
+ bnxt_set_vnic_mru_p5(bp, vnic, 0, idx);
}
+ bnxt_set_rss_ctx_vnic_mru(bp, 0, idx);
/* Make sure NAPI sees that the VNIC is disabled */
synchronize_net();
rxr = &bp->rx_ring[idx];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
index ce97befd3cb3..67e70d3d0980 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
@@ -368,23 +368,27 @@ static u32 bnxt_get_ctx_coredump(struct bnxt *bp, void *buf, u32 offset,
if (!ctxm->mem_valid || !seg_id)
continue;
- if (trace)
+ if (trace) {
extra_hlen = BNXT_SEG_RCD_LEN;
+ if (buf) {
+ u16 trace_type = bnxt_bstore_to_trace[type];
+
+ bnxt_fill_drv_seg_record(bp, &record, ctxm,
+ trace_type);
+ }
+ }
+
if (buf)
data = buf + BNXT_SEG_HDR_LEN + extra_hlen;
+
seg_len = bnxt_copy_ctx_mem(bp, ctxm, data, 0) + extra_hlen;
if (buf) {
bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, seg_len,
0, 0, 0, comp_id, seg_id);
memcpy(buf, &seg_hdr, BNXT_SEG_HDR_LEN);
buf += BNXT_SEG_HDR_LEN;
- if (trace) {
- u16 trace_type = bnxt_bstore_to_trace[type];
-
- bnxt_fill_drv_seg_record(bp, &record, ctxm,
- trace_type);
+ if (trace)
memcpy(buf, &record, BNXT_SEG_RCD_LEN);
- }
buf += seg_len;
}
len += BNXT_SEG_HDR_LEN + seg_len;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 0dbb880a7aa0..71e14be2507e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -487,7 +487,9 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc)
if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && i > bp->max_tc)
return -EINVAL;
+ }
+ for (i = 0; i < max_tc; i++) {
switch (ets->tc_tsa[i]) {
case IEEE_8021QAZ_TSA_STRICT:
break;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 84c4812414fd..2450a369b792 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -231,10 +231,9 @@ void bnxt_ulp_stop(struct bnxt *bp)
return;
mutex_lock(&edev->en_dev_lock);
- if (!bnxt_ulp_registered(edev)) {
- mutex_unlock(&edev->en_dev_lock);
- return;
- }
+ if (!bnxt_ulp_registered(edev) ||
+ (edev->flags & BNXT_EN_FLAG_ULP_STOPPED))
+ goto ulp_stop_exit;
edev->flags |= BNXT_EN_FLAG_ULP_STOPPED;
if (aux_priv) {
@@ -250,6 +249,7 @@ void bnxt_ulp_stop(struct bnxt *bp)
adrv->suspend(adev, pm);
}
}
+ulp_stop_exit:
mutex_unlock(&edev->en_dev_lock);
}
@@ -258,19 +258,13 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
struct bnxt_aux_priv *aux_priv = bp->aux_priv;
struct bnxt_en_dev *edev = bp->edev;
- if (!edev)
- return;
-
- edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
-
- if (err)
+ if (!edev || err)
return;
mutex_lock(&edev->en_dev_lock);
- if (!bnxt_ulp_registered(edev)) {
- mutex_unlock(&edev->en_dev_lock);
- return;
- }
+ if (!bnxt_ulp_registered(edev) ||
+ !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED))
+ goto ulp_start_exit;
if (edev->ulp_tbl->msix_requested)
bnxt_fill_msix_vecs(bp, edev->msix_entries);
@@ -287,6 +281,8 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
adrv->resume(adev);
}
}
+ulp_start_exit:
+ edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED;
mutex_unlock(&edev->en_dev_lock);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 4a6d8cb9f970..09e7e8efa6fa 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -115,7 +115,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
tx_buf->action = XDP_REDIRECT;
tx_buf->xdpf = xdpf;
dma_unmap_addr_set(tx_buf, mapping, mapping);
- dma_unmap_len_set(tx_buf, len, 0);
+ dma_unmap_len_set(tx_buf, len, len);
}
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index fa0077bc67b7..97585c160de3 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4092,6 +4092,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
for (i = 0; i <= priv->hw_params->rx_queues; i++)
priv->rx_rings[i].rx_max_coalesced_frames = 1;
+ /* Initialize u64 stats seq counter for 32bit machines */
+ for (i = 0; i <= priv->hw_params->rx_queues; i++)
+ u64_stats_init(&priv->rx_rings[i].stats64.syncp);
+ for (i = 0; i <= priv->hw_params->tx_queues; i++)
+ u64_stats_init(&priv->tx_rings[i].stats64.syncp);
+
/* libphy will determine the link state */
netif_carrier_off(dev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index aebb9fef3f6e..1be2dc40a1a6 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1578,7 +1578,6 @@ napi_del:
static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nicvf *nic = netdev_priv(netdev);
- int orig_mtu = netdev->mtu;
/* For now just support only the usual MTU sized frames,
* plus some headroom for VLAN, QinQ.
@@ -1589,15 +1588,10 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
return -EINVAL;
}
- WRITE_ONCE(netdev->mtu, new_mtu);
-
- if (!netif_running(netdev))
- return 0;
-
- if (nicvf_update_hw_max_frs(nic, new_mtu)) {
- netdev->mtu = orig_mtu;
+ if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu))
return -EINVAL;
- }
+
+ WRITE_ONCE(netdev->mtu, new_mtu);
return 0;
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 773f5ad972a2..6bc8dfdb3d4b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu)
if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
return -EOPNOTSUPP;
- if (netdev->mtu > enic->port_mtu)
+ if (new_mtu > enic->port_mtu)
netdev_warn(netdev,
"interface MTU (%d) set higher than port MTU (%d)\n",
- netdev->mtu, enic->port_mtu);
+ new_mtu, enic->port_mtu);
return _enic_change_mtu(netdev, new_mtu);
}
diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig
index c699bd6bcbb9..474073c7f94d 100644
--- a/drivers/net/ethernet/faraday/Kconfig
+++ b/drivers/net/ethernet/faraday/Kconfig
@@ -31,6 +31,7 @@ config FTGMAC100
depends on ARM || COMPILE_TEST
depends on !64BIT || BROKEN
select PHYLIB
+ select FIXED_PHY
select MDIO_ASPEED if MACH_ASPEED_G6
select CRC32
help
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 2ec2c3dab250..0f4efd505332 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv,
MEM_TYPE_PAGE_ORDER0, NULL);
if (err) {
dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n");
+ xdp_rxq_info_unreg(&fq->channel->xdp_rxq);
return err;
}
@@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv)
return -EINVAL;
}
if (err)
- return err;
+ goto out;
}
err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token,
DPNI_QUEUE_TX, &priv->tx_qdid);
if (err) {
dev_err(dev, "dpni_get_qdid() failed\n");
- return err;
+ goto out;
}
return 0;
+
+out:
+ while (i--) {
+ if (priv->fq[i].type == DPAA2_RX_FQ &&
+ xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq))
+ xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq);
+ }
+ return err;
}
/* Allocate rings for storing incoming frame descriptors */
@@ -4657,12 +4666,19 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
return PTR_ERR(dpmac_dev);
}
- if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+ if (IS_ERR(dpmac_dev))
return 0;
+ if (dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) {
+ err = 0;
+ goto out_put_device;
+ }
+
mac = kzalloc(sizeof(struct dpaa2_mac), GFP_KERNEL);
- if (!mac)
- return -ENOMEM;
+ if (!mac) {
+ err = -ENOMEM;
+ goto out_put_device;
+ }
mac->mc_dev = dpmac_dev;
mac->mc_io = priv->mc_io;
@@ -4696,6 +4712,8 @@ err_close_mac:
dpaa2_mac_close(mac);
err_free_mac:
kfree(mac);
+out_put_device:
+ put_device(&dpmac_dev->dev);
return err;
}
@@ -4825,6 +4843,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv)
}
}
+static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->num_fqs; i++) {
+ if (priv->fq[i].type == DPAA2_RX_FQ &&
+ xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq))
+ xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq);
+ }
+}
+
static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
{
struct device *dev;
@@ -5028,6 +5057,7 @@ err_alloc_percpu_extras:
free_percpu(priv->percpu_stats);
err_alloc_percpu_stats:
dpaa2_eth_del_ch_napi(priv);
+ dpaa2_eth_free_rx_xdp_rxq(priv);
err_bind:
dpaa2_eth_free_dpbps(priv);
err_dpbp_setup:
@@ -5080,6 +5110,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
free_percpu(priv->percpu_extras);
dpaa2_eth_del_ch_napi(priv);
+ dpaa2_eth_free_rx_xdp_rxq(priv);
dpaa2_eth_free_dpbps(priv);
dpaa2_eth_free_dpio(priv);
dpaa2_eth_free_dpni(priv);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 147a93bf9fa9..4643a3380618 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1448,12 +1448,19 @@ static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
return PTR_ERR(dpmac_dev);
- if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+ if (IS_ERR(dpmac_dev))
return 0;
+ if (dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) {
+ err = 0;
+ goto out_put_device;
+ }
+
mac = kzalloc(sizeof(*mac), GFP_KERNEL);
- if (!mac)
- return -ENOMEM;
+ if (!mac) {
+ err = -ENOMEM;
+ goto out_put_device;
+ }
mac->mc_dev = dpmac_dev;
mac->mc_io = port_priv->ethsw_data->mc_io;
@@ -1483,6 +1490,8 @@ err_close_mac:
dpaa2_mac_close(mac);
err_free_mac:
kfree(mac);
+out_put_device:
+ put_device(&dpmac_dev->dev);
return err;
}
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index e917132d3714..54b0f0a5a6bb 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config FSL_ENETC_CORE
tristate
+ select NXP_NETC_LIB if NXP_NTMP
help
This module supports common functionality between the PF and VF
drivers for the NXP ENETC controller.
@@ -22,6 +23,9 @@ config NXP_NETC_LIB
Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc
flower and debugfs interfaces and so on.
+config NXP_NTMP
+ bool
+
config FSL_ENETC
tristate "ENETC PF driver"
depends on PCI_MSI
@@ -45,7 +49,7 @@ config NXP_ENETC4
select FSL_ENETC_CORE
select FSL_ENETC_MDIO
select NXP_ENETC_PF_COMMON
- select NXP_NETC_LIB
+ select NXP_NTMP
select PHYLINK
select DIMLIB
help
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 4098f01479bc..53e8d18c7a34 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -507,7 +507,7 @@ static inline u64 _enetc_rd_reg64(void __iomem *reg)
tmp = ioread32(reg + 4);
} while (high != tmp);
- return le64_to_cpu((__le64)high << 32 | low);
+ return (u64)high << 32 | low;
}
#endif
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index dc35a23ec47f..d1aeb722d48f 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1917,49 +1917,56 @@ static void gve_turnup_and_check_status(struct gve_priv *priv)
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
}
-static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
+ unsigned int txqueue)
{
- struct gve_notify_block *block;
- struct gve_tx_ring *tx = NULL;
- struct gve_priv *priv;
- u32 last_nic_done;
- u32 current_time;
u32 ntfy_idx;
- netdev_info(dev, "Timeout on tx queue, %d", txqueue);
- priv = netdev_priv(dev);
if (txqueue > priv->tx_cfg.num_queues)
- goto reset;
+ return NULL;
ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
if (ntfy_idx >= priv->num_ntfy_blks)
- goto reset;
+ return NULL;
+
+ return &priv->ntfy_blocks[ntfy_idx];
+}
+
+static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv,
+ unsigned int txqueue)
+{
+ struct gve_notify_block *block;
+ u32 current_time;
- block = &priv->ntfy_blocks[ntfy_idx];
- tx = block->tx;
+ block = gve_get_tx_notify_block(priv, txqueue);
+
+ if (!block)
+ return false;
current_time = jiffies_to_msecs(jiffies);
- if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
- goto reset;
+ if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
+ return false;
- /* Check to see if there are missed completions, which will allow us to
- * kick the queue.
- */
- last_nic_done = gve_tx_load_event_counter(priv, tx);
- if (last_nic_done - tx->done) {
- netdev_info(dev, "Kicking queue %d", txqueue);
- iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
- napi_schedule(&block->napi);
- tx->last_kick_msec = current_time;
- goto out;
- } // Else reset.
+ netdev_info(priv->dev, "Kicking queue %d", txqueue);
+ napi_schedule(&block->napi);
+ block->tx->last_kick_msec = current_time;
+ return true;
+}
-reset:
- gve_schedule_reset(priv);
+static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct gve_notify_block *block;
+ struct gve_priv *priv;
-out:
- if (tx)
- tx->queue_timeout++;
+ netdev_info(dev, "Timeout on tx queue, %d", txqueue);
+ priv = netdev_priv(dev);
+
+ if (!gve_tx_timeout_try_q_kick(priv, txqueue))
+ gve_schedule_reset(priv);
+
+ block = gve_get_tx_notify_block(priv, txqueue);
+ if (block)
+ block->tx->queue_timeout++;
priv->tx_timeo_cnt++;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b03b8758c777..aaa803563bd2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -11,6 +11,7 @@
#include <linux/irq.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
@@ -1039,6 +1040,8 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring,
static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
{
u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size;
+ struct net_device *netdev = ring_to_netdev(ring);
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hns3_tx_spare *tx_spare;
struct page *page;
dma_addr_t dma;
@@ -1080,6 +1083,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
tx_spare->buf = page_address(page);
tx_spare->len = PAGE_SIZE << order;
ring->tx_spare = tx_spare;
+ ring->tx_copybreak = priv->tx_copybreak;
return;
dma_mapping_error:
@@ -4874,6 +4878,30 @@ static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
devm_kfree(&pdev->dev, priv->tqp_vector);
}
+static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv)
+{
+#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024)
+#define HNS3_MAX_PACKET_SIZE (64 * 1024)
+
+ struct iommu_domain *domain = iommu_get_domain_for_dev(priv->dev);
+ struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(priv->ae_handle);
+ struct hnae3_handle *handle = priv->ae_handle;
+
+ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3)
+ return;
+
+ if (!(domain && iommu_is_dma_domain(domain)))
+ return;
+
+ priv->min_tx_copybreak = HNS3_MAX_PACKET_SIZE;
+ priv->min_tx_spare_buf_size = HNS3_MIN_SPARE_BUF_SIZE;
+
+ if (priv->tx_copybreak < priv->min_tx_copybreak)
+ priv->tx_copybreak = priv->min_tx_copybreak;
+ if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size)
+ handle->kinfo.tx_spare_buf_size = priv->min_tx_spare_buf_size;
+}
+
static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
unsigned int ring_type)
{
@@ -5107,6 +5135,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv)
int i, j;
int ret;
+ hns3_update_tx_spare_buf_config(priv);
for (i = 0; i < ring_num; i++) {
ret = hns3_alloc_ring_memory(&priv->ring[i]);
if (ret) {
@@ -5311,6 +5340,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
priv->ae_handle = handle;
priv->tx_timeout_count = 0;
priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num;
+ priv->min_tx_copybreak = 0;
+ priv->min_tx_spare_buf_size = 0;
set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index d36c4ed16d8d..caf7a4df8585 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -596,6 +596,8 @@ struct hns3_nic_priv {
struct hns3_enet_coalesce rx_coal;
u32 tx_copybreak;
u32 rx_copybreak;
+ u32 min_tx_copybreak;
+ u32 min_tx_spare_buf_size;
};
union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a7de67699a01..30bdec1acb57 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9576,33 +9576,36 @@ static bool hclge_need_enable_vport_vlan_filter(struct hclge_vport *vport)
return false;
}
-int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en)
+static int __hclge_enable_vport_vlan_filter(struct hclge_vport *vport,
+ bool request_en)
{
- struct hclge_dev *hdev = vport->back;
bool need_en;
int ret;
- mutex_lock(&hdev->vport_lock);
-
- vport->req_vlan_fltr_en = request_en;
-
need_en = hclge_need_enable_vport_vlan_filter(vport);
- if (need_en == vport->cur_vlan_fltr_en) {
- mutex_unlock(&hdev->vport_lock);
+ if (need_en == vport->cur_vlan_fltr_en)
return 0;
- }
ret = hclge_set_vport_vlan_filter(vport, need_en);
- if (ret) {
- mutex_unlock(&hdev->vport_lock);
+ if (ret)
return ret;
- }
vport->cur_vlan_fltr_en = need_en;
+ return 0;
+}
+
+int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en)
+{
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ mutex_lock(&hdev->vport_lock);
+ vport->req_vlan_fltr_en = request_en;
+ ret = __hclge_enable_vport_vlan_filter(vport, request_en);
mutex_unlock(&hdev->vport_lock);
- return 0;
+ return ret;
}
static int hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
@@ -10623,16 +10626,19 @@ static void hclge_sync_vlan_fltr_state(struct hclge_dev *hdev)
&vport->state))
continue;
- ret = hclge_enable_vport_vlan_filter(vport,
- vport->req_vlan_fltr_en);
+ mutex_lock(&hdev->vport_lock);
+ ret = __hclge_enable_vport_vlan_filter(vport,
+ vport->req_vlan_fltr_en);
if (ret) {
dev_err(&hdev->pdev->dev,
"failed to sync vlan filter state for vport%u, ret = %d\n",
vport->vport_id, ret);
set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
&vport->state);
+ mutex_unlock(&hdev->vport_lock);
return;
}
+ mutex_unlock(&hdev->vport_lock);
}
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index ec581d4b696f..4bd52eab3914 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -497,14 +497,14 @@ int hclge_ptp_init(struct hclge_dev *hdev)
if (ret) {
dev_err(&hdev->pdev->dev,
"failed to init freq, ret = %d\n", ret);
- goto out;
+ goto out_clear_int;
}
ret = hclge_ptp_set_ts_mode(hdev, &hdev->ptp->ts_cfg);
if (ret) {
dev_err(&hdev->pdev->dev,
"failed to init ts mode, ret = %d\n", ret);
- goto out;
+ goto out_clear_int;
}
ktime_get_real_ts64(&ts);
@@ -512,7 +512,7 @@ int hclge_ptp_init(struct hclge_dev *hdev)
if (ret) {
dev_err(&hdev->pdev->dev,
"failed to init ts time, ret = %d\n", ret);
- goto out;
+ goto out_clear_int;
}
set_bit(HCLGE_STATE_PTP_EN, &hdev->state);
@@ -520,6 +520,9 @@ int hclge_ptp_init(struct hclge_dev *hdev)
return 0;
+out_clear_int:
+ clear_bit(HCLGE_PTP_FLAG_EN, &hdev->ptp->flags);
+ hclge_ptp_int_en(hdev, false);
out:
hclge_ptp_destroy_clock(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index c4f35e8e2177..4cf6ac04662a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -3094,11 +3094,7 @@ static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
static u32 hclgevf_get_max_channels(struct hclgevf_dev *hdev)
{
- struct hnae3_handle *nic = &hdev->nic;
- struct hnae3_knic_private_info *kinfo = &nic->kinfo;
-
- return min_t(u32, hdev->rss_size_max,
- hdev->num_tqps / kinfo->tc_info.num_tc);
+ return min(hdev->rss_size_max, hdev->num_tqps);
}
/**
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index a189038d88df..246ddce753f9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -211,7 +211,6 @@ struct ibmvnic_statistics {
u8 reserved[72];
} __packed __aligned(8);
-#define NUM_TX_STATS 3
struct ibmvnic_tx_queue_stats {
u64 batched_packets;
u64 direct_packets;
@@ -219,13 +218,18 @@ struct ibmvnic_tx_queue_stats {
u64 dropped_packets;
};
-#define NUM_RX_STATS 3
+#define NUM_TX_STATS \
+ (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64))
+
struct ibmvnic_rx_queue_stats {
u64 packets;
u64 bytes;
u64 interrupts;
};
+#define NUM_RX_STATS \
+ (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64))
+
struct ibmvnic_acl_buffer {
__be32 len;
__be32 version;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 3f089c3d47b2..d8595e84326d 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter)
cancel_delayed_work_sync(&adapter->phy_info_task);
cancel_delayed_work_sync(&adapter->fifo_stall_task);
-
- /* Only kill reset task if adapter is not resetting */
- if (!test_bit(__E1000_RESETTING, &adapter->flags))
- cancel_work_sync(&adapter->reset_task);
}
void e1000_down(struct e1000_adapter *adapter)
@@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
+ /* Only kill reset task if adapter is not resetting */
+ if (!test_bit(__E1000_RESETTING, &adapter->flags))
+ cancel_work_sync(&adapter->reset_task);
+
e1000_phy_hw_reset(hw);
kfree(adapter->tx_ring);
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 8294a7c4f122..ba331899d186 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -638,6 +638,9 @@
/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
#define NVM_SUM 0xBABA
+/* Uninitialized ("empty") checksum word value */
+#define NVM_CHECKSUM_UNINITIALIZED 0xFFFF
+
/* PBA (printed board assembly) number words */
#define NVM_PBA_OFFSET_0 8
#define NVM_PBA_OFFSET_1 9
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 364378133526..df4e7d781cb1 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -4274,6 +4274,8 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
ret_val = e1000e_update_nvm_checksum(hw);
if (ret_val)
return ret_val;
+ } else if (hw->mac.type == e1000_pch_tgp) {
+ return 0;
}
}
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a96f4cfa6e17..7719e15813ee 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3534,9 +3534,6 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
case e1000_pch_cnp:
case e1000_pch_tgp:
case e1000_pch_adp:
- case e1000_pch_mtp:
- case e1000_pch_lnp:
- case e1000_pch_ptp:
case e1000_pch_nvp:
if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
/* Stable 24MHz frequency */
@@ -3552,6 +3549,17 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
adapter->cc.shift = shift;
}
break;
+ case e1000_pch_mtp:
+ case e1000_pch_lnp:
+ case e1000_pch_ptp:
+ /* System firmware can misreport this value, so set it to a
+ * stable 38400KHz frequency.
+ */
+ incperiod = INCPERIOD_38400KHZ;
+ incvalue = INCVALUE_38400KHZ;
+ shift = INCVALUE_SHIFT_38400KHZ;
+ adapter->cc.shift = shift;
+ break;
case e1000_82574:
case e1000_82583:
/* Stable 25MHz frequency */
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index e609f4df86f4..16369e6d245a 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -558,6 +558,12 @@ s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw)
checksum += nvm_data;
}
+ if (hw->mac.type == e1000_pch_tgp &&
+ nvm_data == NVM_CHECKSUM_UNINITIALIZED) {
+ e_dbg("Uninitialized NVM Checksum on TGP platform - ignoring\n");
+ return 0;
+ }
+
if (checksum != (u16)NVM_SUM) {
e_dbg("NVM Checksum Invalid\n");
return -E1000_ERR_NVM;
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 89d57dd911dc..ea3c3eb2ef20 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -295,15 +295,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
case e1000_pch_cnp:
case e1000_pch_tgp:
case e1000_pch_adp:
- case e1000_pch_mtp:
- case e1000_pch_lnp:
- case e1000_pch_ptp:
case e1000_pch_nvp:
if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)
adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ;
else
adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ;
break;
+ case e1000_pch_mtp:
+ case e1000_pch_lnp:
+ case e1000_pch_ptp:
+ adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ;
+ break;
case e1000_82574:
case e1000_82583:
adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 6119a4108838..65a2816142d9 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -189,13 +189,14 @@ struct fm10k_q_vector {
struct fm10k_ring_container rx, tx;
struct napi_struct napi;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+
cpumask_t affinity_mask;
char name[IFNAMSIZ + 9];
#ifdef CONFIG_DEBUG_FS
struct dentry *dbg_q_vector;
#endif /* CONFIG_DEBUG_FS */
- struct rcu_head rcu; /* to avoid race with update stats on free */
/* for dynamic allocation of rings associated with this q_vector */
struct fm10k_ring ring[] ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index c67963bfe14e..7c600d6e66ba 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -945,6 +945,7 @@ struct i40e_q_vector {
u16 reg_idx; /* register index of the interrupt */
struct napi_struct napi;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
struct i40e_ring_container rx;
struct i40e_ring_container tx;
@@ -955,7 +956,6 @@ struct i40e_q_vector {
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
- struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
bool in_busy_poll;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 1120f8e4bb67..7ccfc1191ae5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
* @vf: pointer to the VF structure
* @flr: VFLR was issued or not
*
- * Returns true if the VF is in reset, resets successfully, or resets
- * are disabled and false otherwise.
+ * Return: True if reset was performed successfully or if resets are disabled.
+ * False if reset is already in progress.
**/
bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
{
@@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
/* If VF is being reset already we don't need to continue. */
if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
- return true;
+ return false;
i40e_trigger_vf_reset(vf, flr);
@@ -3137,10 +3137,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
const u8 *addr = al->list[i].addr;
/* Allow to delete VF primary MAC only if it was not set
- * administratively by PF or if VF is trusted.
+ * administratively by PF.
*/
if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
- if (i40e_can_vf_change_mac(vf))
+ if (!vf->pf_set_mac)
was_unimac_deleted = true;
else
continue;
@@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf)
reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx));
if (reg & BIT(bit_idx))
/* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */
- i40e_reset_vf(vf, true);
+ if (!i40e_reset_vf(vf, true)) {
+ /* At least one VF did not finish resetting, retry next time */
+ set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+ }
}
return 0;
@@ -5003,7 +5006,7 @@ int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
vf_stats->broadcast = stats->rx_broadcast;
vf_stats->multicast = stats->rx_multicast;
vf_stats->rx_dropped = stats->rx_discards + stats->rx_discards_other;
- vf_stats->tx_dropped = stats->tx_discards;
+ vf_stats->tx_dropped = stats->tx_errors;
return 0;
}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 2c0bb41809a4..81d7249d1149 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work)
}
continue_reset:
+ /* If we are still early in the state machine, just restart. */
+ if (adapter->state <= __IAVF_INIT_FAILED) {
+ iavf_shutdown_adminq(hw);
+ iavf_change_state(adapter, __IAVF_STARTUP);
+ iavf_startup(adapter);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(30));
+ netdev_unlock(netdev);
+ return;
+ }
+
/* We don't use netif_running() because it may be true prior to
* ndo_open() returning, so we can't assume it means all our open
* tasks have finished, since we're not holding the rtnl_lock here.
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index a6f0e5990be2..07f0d0a0f1e2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event,
return iavf_status_to_errno(status);
received_op =
(enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high);
+
+ if (received_op == VIRTCHNL_OP_EVENT) {
+ struct iavf_adapter *adapter = hw->back;
+ struct virtchnl_pf_event *vpe =
+ (struct virtchnl_pf_event *)event->msg_buf;
+
+ if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING)
+ continue;
+
+ dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
+ if (!(adapter->flags & IAVF_FLAG_RESET_PENDING))
+ iavf_schedule_reset(adapter,
+ IAVF_FLAG_RESET_PENDING);
+
+ return -EIO;
+ }
+
if (op_to_poll == received_op)
break;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index 2bc5c7f59844..1f7834c03550 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -378,6 +378,50 @@ ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto)
}
/**
+ * ice_arfs_cmp - Check if aRFS filter matches this flow.
+ * @fltr_info: filter info of the saved ARFS entry.
+ * @fk: flow dissector keys.
+ * @n_proto: One of htons(ETH_P_IP) or htons(ETH_P_IPV6).
+ * @ip_proto: One of IPPROTO_TCP or IPPROTO_UDP.
+ *
+ * Since this function assumes limited values for n_proto and ip_proto, it
+ * is meant to be called only from ice_rx_flow_steer().
+ *
+ * Return:
+ * * true - fltr_info refers to the same flow as fk.
+ * * false - fltr_info and fk refer to different flows.
+ */
+static bool
+ice_arfs_cmp(const struct ice_fdir_fltr *fltr_info, const struct flow_keys *fk,
+ __be16 n_proto, u8 ip_proto)
+{
+ /* Determine if the filter is for IPv4 or IPv6 based on flow_type,
+ * which is one of ICE_FLTR_PTYPE_NONF_IPV{4,6}_{TCP,UDP}.
+ */
+ bool is_v4 = fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+ fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+
+ /* Following checks are arranged in the quickest and most discriminative
+ * fields first for early failure.
+ */
+ if (is_v4)
+ return n_proto == htons(ETH_P_IP) &&
+ fltr_info->ip.v4.src_port == fk->ports.src &&
+ fltr_info->ip.v4.dst_port == fk->ports.dst &&
+ fltr_info->ip.v4.src_ip == fk->addrs.v4addrs.src &&
+ fltr_info->ip.v4.dst_ip == fk->addrs.v4addrs.dst &&
+ fltr_info->ip.v4.proto == ip_proto;
+
+ return fltr_info->ip.v6.src_port == fk->ports.src &&
+ fltr_info->ip.v6.dst_port == fk->ports.dst &&
+ fltr_info->ip.v6.proto == ip_proto &&
+ !memcmp(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr)) &&
+ !memcmp(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+}
+
+/**
* ice_rx_flow_steer - steer the Rx flow to where application is being run
* @netdev: ptr to the netdev being adjusted
* @skb: buffer with required header information
@@ -448,6 +492,10 @@ ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
continue;
fltr_info = &arfs_entry->fltr_info;
+
+ if (!ice_arfs_cmp(fltr_info, &fk, n_proto, ip_proto))
+ continue;
+
ret = fltr_info->fltr_id;
if (fltr_info->q_index == rxq_idx ||
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
index 59323c019544..351824dc3c62 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -2301,6 +2301,8 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
return ICE_DDP_PKG_ERR;
buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+ if (!buf_copy)
+ return ICE_DDP_PKG_ERR;
state = ice_init_pkg(hw, buf_copy, len);
if (!ice_is_init_pkg_successful(state)) {
diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c
index 9fc0fd95a13d..cb71eca6a85b 100644
--- a/drivers/net/ethernet/intel/ice/ice_debugfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c
@@ -606,7 +606,7 @@ void ice_debugfs_fwlog_init(struct ice_pf *pf)
pf->ice_debugfs_pf_fwlog = debugfs_create_dir("fwlog",
pf->ice_debugfs_pf);
- if (IS_ERR(pf->ice_debugfs_pf))
+ if (IS_ERR(pf->ice_debugfs_pf_fwlog))
goto err_create_module_files;
fw_modules_dir = debugfs_create_dir("modules",
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index 6aae03771746..2e4f0969035f 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -508,10 +508,14 @@ err_create_repr:
*/
int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf)
{
- struct ice_repr *repr = ice_repr_create_vf(vf);
struct devlink *devlink = priv_to_devlink(pf);
+ struct ice_repr *repr;
int err;
+ if (!ice_is_eswitch_mode_switchdev(pf))
+ return 0;
+
+ repr = ice_repr_create_vf(vf);
if (IS_ERR(repr))
return PTR_ERR(repr);
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 2410aee59fb2..d132eb477551 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -2226,7 +2226,8 @@ bool ice_lag_is_switchdev_running(struct ice_pf *pf)
struct ice_lag *lag = pf->lag;
struct net_device *tmp_nd;
- if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || !lag)
+ if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) ||
+ !lag || !lag->upper_netdev)
return false;
rcu_read_lock();
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index b79a148ed0f2..55cad824c5b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2299,6 +2299,7 @@ static int ice_capture_crosststamp(ktime_t *device,
ts = ((u64)ts_hi << 32) | ts_lo;
system->cycles = ts;
system->cs_id = CSID_X86_ART;
+ system->use_nsecs = true;
/* Read Device source clock time */
ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
index b28991dd1870..48b8e184f3db 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq)
*/
static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
{
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
/* free ring buffers and the ring itself */
idpf_ctlq_dealloc_ring_res(hw, cq);
@@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
/* Set ring_size to 0 to indicate uninitialized queue */
cq->ring_size = 0;
- mutex_unlock(&cq->cq_lock);
- mutex_destroy(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
}
/**
@@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
idpf_ctlq_init_regs(hw, cq, is_rxq);
- mutex_init(&cq->cq_lock);
+ spin_lock_init(&cq->cq_lock);
list_add(&cq->cq_list, &hw->cq_list_head);
@@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
int err = 0;
int i;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
/* Ensure there are enough descriptors to send all messages */
num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq);
@@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
wr32(hw, cq->reg.tail, cq->next_to_use);
err_unlock:
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
return err;
}
@@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
if (*clean_count > cq->ring_size)
return -EBADR;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
ntc = cq->next_to_clean;
@@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
cq->next_to_clean = ntc;
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
/* Return number of descriptors actually cleaned */
*clean_count = i;
@@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
if (*buff_count > 0)
buffs_avail = true;
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
if (tbp >= cq->ring_size)
tbp = 0;
@@ -524,7 +523,7 @@ post_buffs_out:
wr32(hw, cq->reg.tail, cq->next_to_post);
}
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
/* return the number of buffers that were not posted */
*buff_count = *buff_count - i;
@@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
u16 i;
/* take the lock before we start messing with the ring */
- mutex_lock(&cq->cq_lock);
+ spin_lock(&cq->cq_lock);
ntc = cq->next_to_clean;
@@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
cq->next_to_clean = ntc;
- mutex_unlock(&cq->cq_lock);
+ spin_unlock(&cq->cq_lock);
*num_q_msg = i;
if (*num_q_msg == 0)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
index 9642494a67d8..3414c5f9a831 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
@@ -99,7 +99,7 @@ struct idpf_ctlq_info {
enum idpf_ctlq_type cq_type;
int q_id;
- struct mutex cq_lock; /* control queue lock */
+ spinlock_t cq_lock; /* control queue lock */
/* used for interrupt processing */
u16 next_to_use;
u16 next_to_clean;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
index 9bdb309b668e..eaf7a2606faa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev)
struct idpf_vport_user_config_data *user_config;
if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
- return -EOPNOTSUPP;
+ return 0;
user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
@@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev)
struct idpf_vport_user_config_data *user_config;
if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
- return -EOPNOTSUPP;
+ return 0;
user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 4eb20ec2accb..80382ff4a5fa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size)
struct idpf_adapter *adapter = hw->back;
size_t sz = ALIGN(size, 4096);
- mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz,
- &mem->pa, GFP_KERNEL);
+ /* The control queue resources are freed under a spinlock, contiguous
+ * pages will avoid IOMMU remapping and the use vmap (and vunmap in
+ * dma_free_*() path.
+ */
+ mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa,
+ GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS);
mem->size = sz;
return mem->va;
@@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
{
struct idpf_adapter *adapter = hw->back;
- dma_free_coherent(&adapter->pdev->dev, mem->size,
- mem->va, mem->pa);
+ dma_free_attrs(&adapter->pdev->dev, mem->size,
+ mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS);
mem->size = 0;
mem->va = NULL;
mem->pa = 0;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 686793c539f2..031c332f66c4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7115,6 +7115,10 @@ static int igc_probe(struct pci_dev *pdev,
adapter->port_num = hw->bus.func;
adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+ /* Disable ASPM L1.2 on I226 devices to avoid packet loss */
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+
err = pci_save_state(pdev);
if (err)
goto err_ioremap;
@@ -7500,6 +7504,9 @@ static int __igc_resume(struct device *dev, bool rpm)
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+
if (igc_init_interrupt_scheme(adapter, true)) {
netdev_err(netdev, "Unable to allocate memory for queues\n");
return -ENOMEM;
@@ -7625,6 +7632,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
+ if (igc_is_device_id_i226(hw))
+ pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2);
+
/* In case of PCI error, adapter loses its HW address
* so we should re-assign it here.
*/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 47311b134a7a..e6acd791bf64 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -507,9 +507,10 @@ struct ixgbe_q_vector {
struct ixgbe_ring_container rx, tx;
struct napi_struct napi;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+
cpumask_t affinity_mask;
int numa_node;
- struct rcu_head rcu; /* to avoid race with update stats on free */
char name[IFNAMSIZ + 9];
/* for dynamic allocation of rings associated with this q_vector */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 6f572589f1e5..6b5c9536d26d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1822,7 +1822,7 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
req->chan_cnt = IEEE_8021QAZ_MAX_TCS;
req->bpid_per_chan = 1;
} else {
- req->chan_cnt = 1;
+ req->chan_cnt = pfvf->hw.rx_chan_cnt;
req->bpid_per_chan = 0;
}
@@ -1847,7 +1847,7 @@ int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable)
req->chan_cnt = IEEE_8021QAZ_MAX_TCS;
req->bpid_per_chan = 1;
} else {
- req->chan_cnt = 1;
+ req->chan_cnt = pfvf->hw.rx_chan_cnt;
req->bpid_per_chan = 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index b1aeea7c4a91..e395ef5f356e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1947,8 +1947,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
pages_queue, token, force_polling);
- if (callback)
- return err;
+ if (callback && !err)
+ return 0;
if (err > 0) /* Failed in FW, command didn't execute */
err = deliv_status_to_err(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index b5c3a2a9d2a5..9560fcba643f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -18,7 +18,8 @@ enum {
enum {
MLX5E_TC_PRIO = 0,
- MLX5E_NIC_PRIO
+ MLX5E_PROMISC_PRIO,
+ MLX5E_NIC_PRIO,
};
struct mlx5e_flow_table {
@@ -68,9 +69,13 @@ struct mlx5e_l2_table {
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-/* NIC prio FTS */
+/* NIC promisc FT level */
enum {
MLX5E_PROMISC_FT_LEVEL,
+};
+
+/* NIC prio FTS */
+enum {
MLX5E_VLAN_FT_LEVEL,
MLX5E_L2_FT_LEVEL,
MLX5E_TTC_FT_LEVEL,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index 298bb74ec5e9..d1d629697e28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable)
__set_bit(MLX5E_RQ_STATE_DIM, &rq->state);
} else {
__clear_bit(MLX5E_RQ_STATE_DIM, &rq->state);
-
+ synchronize_net();
mlx5e_dim_disable(rq->dim);
rq->dim = NULL;
}
@@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable)
__set_bit(MLX5E_SQ_STATE_DIM, &sq->state);
} else {
__clear_bit(MLX5E_SQ_STATE_DIM, &sq->state);
-
+ synchronize_net();
mlx5e_dim_disable(sq->dim);
sq->dim = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ea078c9f5d15..3cb8d3bf9044 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -43,7 +43,6 @@
#include "en/fs_ethtool.h"
#define LANES_UNKNOWN 0
-#define MAX_LANES 8
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
@@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev,
speed = info->speed;
lanes = info->lanes;
duplex = DUPLEX_FULL;
- } else if (data_rate_oper) {
+ } else if (data_rate_oper)
speed = 100 * data_rate_oper;
- lanes = MAX_LANES;
- }
out:
link_ksettings->base.duplex = duplex;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 04a969128161..265c4ca85f7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -780,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE;
ft_attr.autogroup.max_num_groups = 1;
ft_attr.level = MLX5E_PROMISC_FT_LEVEL;
- ft_attr.prio = MLX5E_NIC_PRIO;
+ ft_attr.prio = MLX5E_PROMISC_PRIO;
ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
if (IS_ERR(ft->t)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 84b1ab8233b8..7462514c7f3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1154,8 +1154,9 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
}
}
-static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
- u32 cqe_bcnt)
+static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt)
{
struct ethhdr *eth = (struct ethhdr *)(skb->data);
struct tcphdr *tcp;
@@ -1205,6 +1206,8 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
tcp->check = tcp_v6_check(payload_len, &ipv6->saddr,
&ipv6->daddr, check);
}
+
+ return (unsigned int)((unsigned char *)tcp + tcp->doff * 4 - skb->data);
}
static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
@@ -1561,8 +1564,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
if (lro_num_seg > 1) {
- mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
- skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+ unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg);
/* Subtract one since we already counted this as one
* "regular" packet in mlx5e_complete_rx_cqe()
*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f1d908f61134..fef418e1ed1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2028,9 +2028,8 @@ err_out:
return err;
}
-static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
+static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec)
{
- struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
void *headers_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
misc_parameters_3);
@@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
}
complete_all(&flow->del_hw_done);
- if (mlx5_flow_has_geneve_opt(flow))
+ if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec))
mlx5_geneve_tlv_option_del(priv->mdev->geneve);
if (flow->decap_route)
@@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
if (err) {
- kvfree(tmp_spec);
NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
- return err;
+ } else {
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
}
- err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+ if (mlx5_flow_has_geneve_opt(tmp_spec))
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
kvfree(tmp_spec);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index b6ae384396b3..ad9f6fca9b6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -1076,6 +1076,7 @@ static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
return err;
}
esw_qos_node_set_parent(node, parent);
+ node->bw_share = 0;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 7fb8a3381f84..4917d185d0c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events);
if (ret)
goto ecpf_err;
- if (mlx5_core_ec_sriov_enabled(esw->dev)) {
- ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs,
- enabled_events);
- if (ret)
- goto ec_vf_err;
- }
+ }
+
+ /* Enable ECVF vports */
+ if (mlx5_core_ec_sriov_enabled(esw->dev)) {
+ ret = mlx5_eswitch_load_ec_vf_vports(esw,
+ esw->esw_funcs.num_ec_vfs,
+ enabled_events);
+ if (ret)
+ goto ec_vf_err;
}
/* Enable VF vports */
@@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
{
mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ if (mlx5_core_ec_sriov_enabled(esw->dev))
+ mlx5_eswitch_unload_ec_vf_vports(esw,
+ esw->esw_funcs.num_ec_vfs);
+
if (mlx5_ecpf_vport_exists(esw->dev)) {
- if (mlx5_core_ec_sriov_enabled(esw->dev))
- mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs);
mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0e3a977d5332..bee906661282 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1182,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
+ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_handle **flows;
- /* total vports is the same for both e-switches */
- int nvports = esw->total_vports;
struct mlx5_flow_handle *flow;
+ struct mlx5_vport *peer_vport;
struct mlx5_flow_spec *spec;
- struct mlx5_vport *vport;
int err, pfindex;
unsigned long i;
void *misc;
- if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev))
+ if (!MLX5_VPORT_MANAGER(peer_dev) &&
+ !mlx5_core_is_ecpf_esw_manager(peer_dev))
return 0;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
@@ -1203,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
peer_miss_rules_setup(esw, peer_dev, spec, &dest);
- flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL);
+ flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL);
if (!flows) {
err = -ENOMEM;
goto alloc_flows_err;
@@ -1213,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
- spec, MLX5_VPORT_PF);
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
+ MLX5_VPORT_PF);
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
spec, &flow_act, &dest, 1);
@@ -1224,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_pf_flow_err;
}
- flows[vport->index] = flow;
+ flows[peer_vport->index] = flow;
}
- if (mlx5_ecpf_vport_exists(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
spec, &flow_act, &dest, 1);
@@ -1236,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_ecpf_flow_err;
}
- flows[vport->index] = flow;
+ flows[peer_vport->index] = flow;
}
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_vfs(peer_dev)) {
esw_set_peer_miss_rule_source_port(esw,
- peer_dev->priv.eswitch,
- spec, vport->vport);
+ peer_esw,
+ spec, peer_vport->vport);
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
spec, &flow_act, &dest, 1);
@@ -1250,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_vf_flow_err;
}
- flows[vport->index] = flow;
+ flows[peer_vport->index] = flow;
}
- if (mlx5_core_ec_sriov_enabled(esw->dev)) {
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
- if (i >= mlx5_core_max_ec_vfs(peer_dev))
- break;
- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
- spec, vport->vport);
+ if (mlx5_core_ec_sriov_enabled(peer_dev)) {
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_ec_vfs(peer_dev)) {
+ esw_set_peer_miss_rule_source_port(esw, peer_esw,
+ spec,
+ peer_vport->vport);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
err = PTR_ERR(flow);
goto add_ec_vf_flow_err;
}
- flows[vport->index] = flow;
+ flows[peer_vport->index] = flow;
}
}
@@ -1282,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
return 0;
add_ec_vf_flow_err:
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
- if (!flows[vport->index])
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_ec_vfs(peer_dev)) {
+ if (!flows[peer_vport->index])
continue;
- mlx5_del_flow_rules(flows[vport->index]);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
add_vf_flow_err:
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
- if (!flows[vport->index])
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_vfs(peer_dev)) {
+ if (!flows[peer_vport->index])
continue;
- mlx5_del_flow_rules(flows[vport->index]);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
- if (mlx5_ecpf_vport_exists(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
- mlx5_del_flow_rules(flows[vport->index]);
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
add_ecpf_flow_err:
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
- mlx5_del_flow_rules(flows[vport->index]);
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
add_pf_flow_err:
esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
@@ -1313,37 +1316,34 @@ alloc_flows_err:
static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
+ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
u16 peer_index = mlx5_get_dev_index(peer_dev);
struct mlx5_flow_handle **flows;
- struct mlx5_vport *vport;
+ struct mlx5_vport *peer_vport;
unsigned long i;
flows = esw->fdb_table.offloads.peer_miss_rules[peer_index];
if (!flows)
return;
- if (mlx5_core_ec_sriov_enabled(esw->dev)) {
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
- /* The flow for a particular vport could be NULL if the other ECPF
- * has fewer or no VFs enabled
- */
- if (!flows[vport->index])
- continue;
- mlx5_del_flow_rules(flows[vport->index]);
- }
+ if (mlx5_core_ec_sriov_enabled(peer_dev)) {
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_ec_vfs(peer_dev))
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
- mlx5_del_flow_rules(flows[vport->index]);
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
+ mlx5_core_max_vfs(peer_dev))
+ mlx5_del_flow_rules(flows[peer_vport->index]);
- if (mlx5_ecpf_vport_exists(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
- mlx5_del_flow_rules(flows[vport->index]);
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
- mlx5_del_flow_rules(flows[vport->index]);
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[peer_vport->index]);
}
kvfree(flows);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 23a7e8e7adfa..3dd9a6f40709 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -113,13 +113,16 @@
#define ETHTOOL_PRIO_NUM_LEVELS 1
#define ETHTOOL_NUM_PRIOS 11
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy,
+/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy,
* {IPsec RoCE MPV,Alias table},IPsec RoCE policy
*/
-#define KERNEL_NIC_PRIO_NUM_LEVELS 11
+#define KERNEL_NIC_PRIO_NUM_LEVELS 10
#define KERNEL_NIC_NUM_PRIOS 1
-/* One more level for tc */
-#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
+/* One more level for tc, and one more for promisc */
+#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2)
+
+#define KERNEL_NIC_PROMISC_NUM_PRIOS 1
+#define KERNEL_NIC_PROMISC_NUM_LEVELS 1
#define KERNEL_NIC_TC_NUM_PRIOS 1
#define KERNEL_NIC_TC_NUM_LEVELS 3
@@ -187,6 +190,8 @@ static struct init_tree_node {
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
KERNEL_NIC_TC_NUM_LEVELS),
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS,
+ KERNEL_NIC_PROMISC_NUM_LEVELS),
ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
KERNEL_NIC_PRIO_NUM_LEVELS))),
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
@@ -2228,6 +2233,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct mlx5_flow_handle *rule;
struct match_list *iter;
bool take_write = false;
+ bool try_again = false;
struct fs_fte *fte;
u64 version = 0;
int err;
@@ -2292,6 +2298,7 @@ skip_search:
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
if (!g->node.active) {
+ try_again = true;
up_write_ref_node(&g->node, false);
continue;
}
@@ -2313,7 +2320,8 @@ skip_search:
tree_put_node(&fte->node, false);
return rule;
}
- rule = ERR_PTR(-ENOENT);
+ err = try_again ? -EAGAIN : -ENOENT;
+ rule = ERR_PTR(err);
out:
kmem_cache_free(steering->ftes_cache, fte);
return rule;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 41e8660c819c..9c1504d29d34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -2257,6 +2257,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
{ PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
{ PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */
+ { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 972e8e9df585..9bc9bd83c232 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function)
static int alloc_system_page(struct mlx5_core_dev *dev, u32 function)
{
struct device *device = mlx5_core_dma_dev(dev);
- int nid = dev_to_node(device);
+ int nid = dev->priv.numa_node;
struct page *page;
u64 zero_addr = 1;
u64 addr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
index fb62f3bc4bd4..447ea3f8722c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
@@ -1370,8 +1370,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
struct mlx5hws_cmd_forward_tbl *fw_island;
struct mlx5hws_action *action;
- u32 i /*, packet_reformat_id*/;
- int ret;
+ int ret, last_dest_idx = -1;
+ u32 i;
if (num_dest <= 1) {
mlx5hws_err(ctx, "Action must have multiple dests\n");
@@ -1401,11 +1401,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id;
fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
fte_attr.ignore_flow_level = ignore_flow_level;
- /* ToDo: In SW steering we have a handling of 'go to WIRE'
- * destination here by upper layer setting 'is_wire_ft' flag
- * if the destination is wire.
- * This is because uplink should be last dest in the list.
- */
+ if (dests[i].is_wire_ft)
+ last_dest_idx = i;
break;
case MLX5HWS_ACTION_TYP_VPORT:
dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -1429,6 +1426,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
}
}
+ if (last_dest_idx != -1)
+ swap(dest_list[last_dest_idx], dest_list[num_dest - 1]);
+
fte_attr.dests_num = num_dest;
fte_attr.dests = dest_list;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
index 70768953a4f6..ca7501c57468 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
@@ -1070,7 +1070,7 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node;
struct rhashtable *refcount_hash;
- int i;
+ int ret, i;
bwc_rule->complex_hash_node = NULL;
@@ -1078,7 +1078,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
if (unlikely(!node))
return -ENOMEM;
- node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL);
+ ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL);
+ if (ret < 0)
+ goto err_free_node;
+ node->tag = ret;
+
refcount_set(&node->refcount, 1);
/* Clear match buffer - turn off all the unrelated fields
@@ -1094,6 +1098,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
old_node = rhashtable_lookup_get_insert_fast(refcount_hash,
&node->hash_node,
hws_refcount_hash);
+ if (IS_ERR(old_node)) {
+ ret = PTR_ERR(old_node);
+ goto err_free_ida;
+ }
+
if (old_node) {
/* Rule with the same tag already exists - update refcount */
refcount_inc(&old_node->refcount);
@@ -1112,6 +1121,12 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
bwc_rule->complex_hash_node = node;
return 0;
+
+err_free_ida:
+ ida_free(&bwc_matcher->complex->metadata_ida, node->tag);
+err_free_node:
+ kfree(node);
+ return ret;
}
static void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
index 5cc0dc002ac1..d45e1145d197 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
@@ -785,6 +785,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O,
outer_headers.ip_protocol,
eth_l3_outer.protocol_next_header);
+ HWS_SET_HDR(fc, match_param, IP_VERSION_O,
+ outer_headers.ip_version,
+ eth_l3_outer.ip_version);
HWS_SET_HDR(fc, match_param, IP_TTL_O,
outer_headers.ttl_hoplimit,
eth_l3_outer.time_to_live_hop_limit);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
index 9d1c0e4b224a..bf4643d0ce17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
@@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns,
switch (attr->type) {
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst);
+ if (dst->dest_attr.ft->flags &
+ MLX5_FLOW_TABLE_UPLINK_VPORT)
+ dest_actions[num_dest_actions].is_wire_ft = true;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx,
@@ -1357,6 +1360,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
pkt_reformat->fs_hws_action.pr_data = pr_data;
}
+ mutex_init(&pkt_reformat->fs_hws_action.lock);
pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS;
pkt_reformat->fs_hws_action.hws_action = hws_action;
return 0;
@@ -1503,7 +1507,6 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
err = -ENOMEM;
goto release_mh;
}
- mutex_init(&modify_hdr->fs_hws_action.lock);
modify_hdr->fs_hws_action.mh_data = mh_data;
modify_hdr->fs_hws_action.fs_pool = pool;
modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
index 9bbadc4d8a0b..d8ac6c196211 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
@@ -213,6 +213,7 @@ struct mlx5hws_action_dest_attr {
struct mlx5hws_action *dest;
/* Optional reformat action */
struct mlx5hws_action *reformat;
+ bool is_wire_ft;
};
/**
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index fb2e5b844c15..d76d7a945899 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -447,8 +447,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX);
phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0);
- if (phy_irq < 0) {
- dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead");
+ if (phy_irq == -EPROBE_DEFER) {
+ err = -EPROBE_DEFER;
+ goto out;
+ } else if (phy_irq < 0) {
phy_irq = PHY_POLL;
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
index e2368075ab8c..4521d0483d18 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c
@@ -127,11 +127,8 @@ static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx,
return -EBUSY;
addr = dma_map_single(fbd->dev, msg, PAGE_SIZE, direction);
- if (dma_mapping_error(fbd->dev, addr)) {
- free_page((unsigned long)msg);
-
+ if (dma_mapping_error(fbd->dev, addr))
return -ENOSPC;
- }
mbx->buf_info[tail].msg = msg;
mbx->buf_info[tail].addr = addr;
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h
index e8d073bfa2ca..f33dc83c5700 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.h
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.h
@@ -18,9 +18,9 @@
*/
#define LAN743X_PTP_N_EVENT_CHAN 2
#define LAN743X_PTP_N_PEROUT LAN743X_PTP_N_EVENT_CHAN
-#define LAN743X_PTP_N_EXTTS 4
-#define LAN743X_PTP_N_PPS 0
#define PCI11X1X_PTP_IO_MAX_CHANNELS 8
+#define LAN743X_PTP_N_EXTTS PCI11X1X_PTP_IO_MAX_CHANNELS
+#define LAN743X_PTP_N_PPS 0
#define PTP_CMD_CTL_TIMEOUT_CNT 50
struct lan743x_adapter;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 3504507477c6..58f8ee710912 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <linux/utsname.h>
#include <linux/version.h>
+#include <linux/export.h>
#include <net/mana/mana.h>
@@ -31,6 +32,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev)
gc->db_page_base = gc->bar0_va +
mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
+ gc->phys_db_page_base = gc->bar0_pa +
+ mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
+
sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
sriov_base_va = gc->bar0_va + sriov_base_off;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index ccd2885c939e..faad1cb880f8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -10,6 +10,7 @@
#include <linux/filter.h>
#include <linux/mm.h>
#include <linux/pci.h>
+#include <linux/export.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index daf1e82cb76b..0e60a6bef99a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -516,9 +516,9 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
unsigned long start_time;
unsigned long max_wait;
unsigned long duration;
- int done = 0;
bool fw_up;
int opcode;
+ bool done;
int err;
/* Wait for dev cmd to complete, retrying if we get EAGAIN,
@@ -526,6 +526,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
*/
max_wait = jiffies + (max_seconds * HZ);
try_again:
+ done = false;
opcode = idev->opcode;
start_time = jiffies;
for (fw_up = ionic_is_fw_running(idev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 2ac59564ded1..d10b58ebf603 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -321,7 +321,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame,
len, DMA_TO_DEVICE);
} else /* XDP_REDIRECT */ {
dma_addr = ionic_tx_map_single(q, frame->data, len);
- if (!dma_addr)
+ if (dma_addr == DMA_MAPPING_ERROR)
return -EIO;
}
@@ -357,7 +357,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame,
} else {
dma_addr = ionic_tx_map_frag(q, frag, 0,
skb_frag_size(frag));
- if (dma_mapping_error(q->dev, dma_addr)) {
+ if (dma_addr == DMA_MAPPING_ERROR) {
ionic_tx_desc_unmap_bufs(q, desc_info);
return -EIO;
}
@@ -1083,7 +1083,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
net_warn_ratelimited("%s: DMA single map failed on %s!\n",
dev_name(dev), q->name);
q_to_tx_stats(q)->dma_map_err++;
- return 0;
+ return DMA_MAPPING_ERROR;
}
return dma_addr;
}
@@ -1100,7 +1100,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
net_warn_ratelimited("%s: DMA frag map failed on %s!\n",
dev_name(dev), q->name);
q_to_tx_stats(q)->dma_map_err++;
- return 0;
+ return DMA_MAPPING_ERROR;
}
return dma_addr;
}
@@ -1116,7 +1116,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
int frag_idx;
dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
- if (!dma_addr)
+ if (dma_addr == DMA_MAPPING_ERROR)
return -EIO;
buf_info->dma_addr = dma_addr;
buf_info->len = skb_headlen(skb);
@@ -1126,7 +1126,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
nfrags = skb_shinfo(skb)->nr_frags;
for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
- if (!dma_addr)
+ if (dma_addr == DMA_MAPPING_ERROR)
goto dma_fail;
buf_info->dma_addr = dma_addr;
buf_info->len = skb_frag_size(frag);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
index f55eed092f25..7d78f072b0a1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
@@ -242,7 +242,7 @@ static int qed_mfw_get_tlv_group(u8 tlv_type, u8 *tlv_group)
}
/* Returns size of the data buffer or, -1 in case TLV data is not available. */
-static int
+static noinline_for_stack int
qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
struct qed_mfw_tlv_generic *p_drv_buf,
struct qed_tlv_parsed_buf *p_buf)
@@ -304,7 +304,7 @@ qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
return -1;
}
-static int
+static noinline_for_stack int
qed_mfw_get_eth_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
struct qed_mfw_tlv_eth *p_drv_buf,
struct qed_tlv_parsed_buf *p_buf)
@@ -438,7 +438,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time,
return QED_MFW_TLV_TIME_SIZE;
}
-static int
+static noinline_for_stack int
qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
struct qed_mfw_tlv_fcoe *p_drv_buf,
struct qed_tlv_parsed_buf *p_buf)
@@ -1073,7 +1073,7 @@ qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
return -1;
}
-static int
+static noinline_for_stack int
qed_mfw_get_iscsi_tlv_value(struct qed_drv_tlv_hdr *p_tlv,
struct qed_mfw_tlv_iscsi *p_drv_buf,
struct qed_tlv_parsed_buf *p_buf)
diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c
index 6b3f7fca8d15..05c4b6c8c9c3 100644
--- a/drivers/net/ethernet/renesas/rtsn.c
+++ b/drivers/net/ethernet/renesas/rtsn.c
@@ -1259,7 +1259,12 @@ static int rtsn_probe(struct platform_device *pdev)
priv = netdev_priv(ndev);
priv->pdev = pdev;
priv->ndev = ndev;
+
priv->ptp_priv = rcar_gen4_ptp_alloc(pdev);
+ if (!priv->ptp_priv) {
+ ret = -ENOMEM;
+ goto error_free;
+ }
spin_lock_init(&priv->lock);
platform_set_drvdata(pdev, priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 9a47015254bb..ea33ae39be6b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -433,6 +433,12 @@ static int intel_crosststamp(ktime_t *device,
return -ETIMEDOUT;
}
+ *system = (struct system_counterval_t) {
+ .cycles = 0,
+ .cs_id = CSID_X86_ART,
+ .use_nsecs = false,
+ };
+
num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
GMAC_TIMESTAMP_ATSNS_MASK) >>
GMAC_TIMESTAMP_ATSNS_SHIFT;
@@ -448,7 +454,7 @@ static int intel_crosststamp(ktime_t *device,
}
system->cycles *= intel_priv->crossts_adj;
- system->cs_id = CSID_X86_ART;
+
priv->plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN;
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 7840bc403788..5dcc95bc0ad2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -364,19 +364,17 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
}
/* TX/RX NORMAL interrupts */
- if (likely(intr_status & XGMAC_NIS)) {
- if (likely(intr_status & XGMAC_RI)) {
- u64_stats_update_begin(&stats->syncp);
- u64_stats_inc(&stats->rx_normal_irq_n[chan]);
- u64_stats_update_end(&stats->syncp);
- ret |= handle_rx;
- }
- if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
- u64_stats_update_begin(&stats->syncp);
- u64_stats_inc(&stats->tx_normal_irq_n[chan]);
- u64_stats_update_end(&stats->syncp);
- ret |= handle_tx;
- }
+ if (likely(intr_status & XGMAC_RI)) {
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
+ ret |= handle_rx;
+ }
+ if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+ u64_stats_update_end(&stats->syncp);
+ ret |= handle_tx;
}
/* Clear interrupts */
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index ddca8fc7883e..26119d02a94d 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp,
addr = np->ops->map_page(np->device, page, 0,
PAGE_SIZE, DMA_FROM_DEVICE);
- if (!addr) {
+ if (np->ops->mapping_error(np->device, addr)) {
__free_page(page);
return -ENOMEM;
}
@@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
len = skb_headlen(skb);
mapping = np->ops->map_single(np->device, skb->data,
len, DMA_TO_DEVICE);
+ if (np->ops->mapping_error(np->device, mapping))
+ goto out_drop;
prod = rp->prod;
@@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
mapping = np->ops->map_page(np->device, skb_frag_page(frag),
skb_frag_off(frag), len,
DMA_TO_DEVICE);
+ if (np->ops->mapping_error(np->device, mapping))
+ goto out_unmap;
rp->tx_buffs[prod].skb = NULL;
rp->tx_buffs[prod].mapping = mapping;
@@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
out:
return NETDEV_TX_OK;
+out_unmap:
+ while (i--) {
+ const skb_frag_t *frag;
+
+ prod = PREVIOUS_TX(rp, prod);
+ frag = &skb_shinfo(skb)->frags[i];
+ np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping,
+ skb_frag_size(frag), DMA_TO_DEVICE);
+ }
+
+ np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping,
+ skb_headlen(skb), DMA_TO_DEVICE);
+
out_drop:
rp->tx_errors++;
kfree_skb(skb);
@@ -9644,6 +9661,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address,
dma_unmap_single(dev, dma_address, size, direction);
}
+static int niu_pci_mapping_error(struct device *dev, u64 addr)
+{
+ return dma_mapping_error(dev, addr);
+}
+
static const struct niu_ops niu_pci_ops = {
.alloc_coherent = niu_pci_alloc_coherent,
.free_coherent = niu_pci_free_coherent,
@@ -9651,6 +9673,7 @@ static const struct niu_ops niu_pci_ops = {
.unmap_page = niu_pci_unmap_page,
.map_single = niu_pci_map_single,
.unmap_single = niu_pci_unmap_single,
+ .mapping_error = niu_pci_mapping_error,
};
static void niu_driver_version(void)
@@ -10019,6 +10042,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address,
/* Nothing to do. */
}
+static int niu_phys_mapping_error(struct device *dev, u64 dma_address)
+{
+ return false;
+}
+
static const struct niu_ops niu_phys_ops = {
.alloc_coherent = niu_phys_alloc_coherent,
.free_coherent = niu_phys_free_coherent,
@@ -10026,6 +10054,7 @@ static const struct niu_ops niu_phys_ops = {
.unmap_page = niu_phys_unmap_page,
.map_single = niu_phys_map_single,
.unmap_single = niu_phys_unmap_single,
+ .mapping_error = niu_phys_mapping_error,
};
static int niu_of_probe(struct platform_device *op)
diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h
index 04c215f91fc0..0b169c08b0f2 100644
--- a/drivers/net/ethernet/sun/niu.h
+++ b/drivers/net/ethernet/sun/niu.h
@@ -2879,6 +2879,9 @@ struct tx_ring_info {
#define NEXT_TX(tp, index) \
(((index) + 1) < (tp)->pending ? ((index) + 1) : 0)
+#define PREVIOUS_TX(tp, index) \
+ (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1))
+
static inline u32 niu_tx_avail(struct tx_ring_info *tp)
{
return (tp->pending -
@@ -3140,6 +3143,7 @@ struct niu_ops {
enum dma_data_direction direction);
void (*unmap_single)(struct device *dev, u64 dma_address,
size_t size, enum dma_data_direction direction);
+ int (*mapping_error)(struct device *dev, u64 dma_address);
};
struct niu_link_config {
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f20d1ff192ef..231ca141331f 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -856,8 +856,6 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr,
{
struct sk_buff *skb;
- len += AM65_CPSW_HEADROOM;
-
skb = build_skb(page_addr, len);
if (unlikely(!skb))
return NULL;
@@ -1344,7 +1342,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
}
skb = am65_cpsw_build_skb(page_addr, ndev,
- AM65_CPSW_MAX_PACKET_SIZE, headroom);
+ PAGE_SIZE, headroom);
if (unlikely(!skb)) {
new_page = page;
goto requeue;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 5b8fdb882172..12f25cec6255 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -98,20 +98,11 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
{
struct cppi5_host_desc_t *first_desc, *next_desc;
dma_addr_t buf_dma, next_desc_dma;
- struct prueth_swdata *swdata;
- struct page *page;
u32 buf_dma_len;
first_desc = desc;
next_desc = first_desc;
- swdata = cppi5_hdesc_get_swdata(desc);
- if (swdata->type == PRUETH_SWDATA_PAGE) {
- page = swdata->data.page;
- page_pool_recycle_direct(page->pp, swdata->data.page);
- goto free_desc;
- }
-
cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
@@ -135,7 +126,6 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
}
-free_desc:
k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
}
EXPORT_SYMBOL_GPL(prueth_xmit_free);
@@ -612,13 +602,8 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac,
k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
cppi5_hdesc_attach_buf(first_desc, buf_dma, xdpf->len, buf_dma, xdpf->len);
swdata = cppi5_hdesc_get_swdata(first_desc);
- if (page) {
- swdata->type = PRUETH_SWDATA_PAGE;
- swdata->data.page = page;
- } else {
- swdata->type = PRUETH_SWDATA_XDPF;
- swdata->data.xdpf = xdpf;
- }
+ swdata->type = PRUETH_SWDATA_XDPF;
+ swdata->data.xdpf = xdpf;
/* Report BQL before sending the packet */
netif_txq = netdev_get_tx_queue(ndev, tx_chn->id);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c
index ddfd1c02a885..da53eb04b0a4 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.c
@@ -288,8 +288,12 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac)
int i;
addr = lower_32_bits(prueth->msmcram.pa);
- if (slice)
- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE;
+ if (slice) {
+ if (prueth->pdata.banked_ms_ram)
+ addr += MSMC_RAM_BANK_SIZE;
+ else
+ addr += PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE;
+ }
if (addr % SZ_64K) {
dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n");
@@ -297,43 +301,66 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac)
}
bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET;
- /* workaround for f/w bug. bpool 0 needs to be initialized */
- for (i = 0; i < PRUETH_NUM_BUF_POOLS; i++) {
+
+ /* Configure buffer pools for forwarding buffers
+ * - used by firmware to store packets to be forwarded to other port
+ * - 8 total pools per slice
+ */
+ for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) {
writel(addr, &bpool_cfg[i].addr);
- writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len);
- addr += PRUETH_EMAC_BUF_POOL_SIZE;
+ writel(PRUETH_SW_FWD_BUF_POOL_SIZE, &bpool_cfg[i].len);
+ addr += PRUETH_SW_FWD_BUF_POOL_SIZE;
}
- if (!slice)
- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE;
- else
- addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST;
-
- for (i = PRUETH_NUM_BUF_POOLS;
- i < 2 * PRUETH_SW_NUM_BUF_POOLS_HOST + PRUETH_NUM_BUF_POOLS;
- i++) {
- /* The driver only uses first 4 queues per PRU so only initialize them */
- if (i % PRUETH_SW_NUM_BUF_POOLS_HOST < PRUETH_SW_NUM_BUF_POOLS_PER_PRU) {
- writel(addr, &bpool_cfg[i].addr);
- writel(PRUETH_SW_BUF_POOL_SIZE_HOST, &bpool_cfg[i].len);
- addr += PRUETH_SW_BUF_POOL_SIZE_HOST;
+ /* Configure buffer pools for Local Injection buffers
+ * - used by firmware to store packets received from host core
+ * - 16 total pools per slice
+ */
+ for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) {
+ int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE;
+
+ /* The driver only uses first 4 queues per PRU,
+ * so only initialize buffer for them
+ */
+ if ((i % PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE)
+ < PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) {
+ writel(addr, &bpool_cfg[cfg_idx].addr);
+ writel(PRUETH_SW_LI_BUF_POOL_SIZE,
+ &bpool_cfg[cfg_idx].len);
+ addr += PRUETH_SW_LI_BUF_POOL_SIZE;
} else {
- writel(0, &bpool_cfg[i].addr);
- writel(0, &bpool_cfg[i].len);
+ writel(0, &bpool_cfg[cfg_idx].addr);
+ writel(0, &bpool_cfg[cfg_idx].len);
}
}
- if (!slice)
- addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST;
- else
- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE;
+ /* Express RX buffer queue
+ * - used by firmware to store express packets to be transmitted
+ * to the host core
+ */
+ rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET;
+ for (i = 0; i < 3; i++)
+ writel(addr, &rxq_ctx->start[i]);
+
+ addr += PRUETH_SW_HOST_EXP_BUF_POOL_SIZE;
+ writel(addr, &rxq_ctx->end);
+ /* Pre-emptible RX buffer queue
+ * - used by firmware to store preemptible packets to be transmitted
+ * to the host core
+ */
rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET;
for (i = 0; i < 3; i++)
writel(addr, &rxq_ctx->start[i]);
- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE;
- writel(addr - SZ_2K, &rxq_ctx->end);
+ addr += PRUETH_SW_HOST_PRE_BUF_POOL_SIZE;
+ writel(addr, &rxq_ctx->end);
+
+ /* Set pointer for default dropped packet write
+ * - used by firmware to temporarily store packet to be dropped
+ */
+ rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET;
+ writel(addr, &rxq_ctx->start[0]);
return 0;
}
@@ -347,13 +374,13 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac)
u32 addr;
int i;
- /* Layout to have 64KB aligned buffer pool
- * |BPOOL0|BPOOL1|RX_CTX0|RX_CTX1|
- */
-
addr = lower_32_bits(prueth->msmcram.pa);
- if (slice)
- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE;
+ if (slice) {
+ if (prueth->pdata.banked_ms_ram)
+ addr += MSMC_RAM_BANK_SIZE;
+ else
+ addr += PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE;
+ }
if (addr % SZ_64K) {
dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n");
@@ -361,39 +388,66 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac)
}
bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET;
- /* workaround for f/w bug. bpool 0 needs to be initilalized */
- writel(addr, &bpool_cfg[0].addr);
- writel(0, &bpool_cfg[0].len);
- for (i = PRUETH_EMAC_BUF_POOL_START;
- i < PRUETH_EMAC_BUF_POOL_START + PRUETH_NUM_BUF_POOLS;
- i++) {
- writel(addr, &bpool_cfg[i].addr);
- writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len);
- addr += PRUETH_EMAC_BUF_POOL_SIZE;
+ /* Configure buffer pools for forwarding buffers
+ * - in mac mode - no forwarding so initialize all pools to 0
+ * - 8 total pools per slice
+ */
+ for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) {
+ writel(0, &bpool_cfg[i].addr);
+ writel(0, &bpool_cfg[i].len);
}
- if (!slice)
- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE;
- else
- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE * 2;
+ /* Configure buffer pools for Local Injection buffers
+ * - used by firmware to store packets received from host core
+ * - 16 total pools per slice
+ */
+ bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET;
+ for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) {
+ int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE;
+
+ /* In EMAC mode, only first 4 buffers are used,
+ * as 1 slice needs to handle only 1 port
+ */
+ if (i < PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) {
+ writel(addr, &bpool_cfg[cfg_idx].addr);
+ writel(PRUETH_EMAC_LI_BUF_POOL_SIZE,
+ &bpool_cfg[cfg_idx].len);
+ addr += PRUETH_EMAC_LI_BUF_POOL_SIZE;
+ } else {
+ writel(0, &bpool_cfg[cfg_idx].addr);
+ writel(0, &bpool_cfg[cfg_idx].len);
+ }
+ }
- /* Pre-emptible RX buffer queue */
- rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET;
+ /* Express RX buffer queue
+ * - used by firmware to store express packets to be transmitted
+ * to host core
+ */
+ rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET;
for (i = 0; i < 3; i++)
writel(addr, &rxq_ctx->start[i]);
- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE;
+ addr += PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE;
writel(addr, &rxq_ctx->end);
- /* Express RX buffer queue */
- rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET;
+ /* Pre-emptible RX buffer queue
+ * - used by firmware to store preemptible packets to be transmitted
+ * to host core
+ */
+ rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET;
for (i = 0; i < 3; i++)
writel(addr, &rxq_ctx->start[i]);
- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE;
+ addr += PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE;
writel(addr, &rxq_ctx->end);
+ /* Set pointer for default dropped packet write
+ * - used by firmware to temporarily store packet to be dropped
+ */
+ rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET;
+ writel(addr, &rxq_ctx->start[0]);
+
return 0;
}
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h
index c884e9fa099e..60d69744ffae 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.h
@@ -26,21 +26,71 @@ struct icssg_flow_cfg {
#define PRUETH_MAX_RX_FLOWS 1 /* excluding default flow */
#define PRUETH_RX_FLOW_DATA 0
-#define PRUETH_EMAC_BUF_POOL_SIZE SZ_8K
-#define PRUETH_EMAC_POOLS_PER_SLICE 24
-#define PRUETH_EMAC_BUF_POOL_START 8
-#define PRUETH_NUM_BUF_POOLS 8
-#define PRUETH_EMAC_RX_CTX_BUF_SIZE SZ_16K /* per slice */
-#define MSMC_RAM_SIZE \
- (2 * (PRUETH_EMAC_BUF_POOL_SIZE * PRUETH_NUM_BUF_POOLS + \
- PRUETH_EMAC_RX_CTX_BUF_SIZE * 2))
-
-#define PRUETH_SW_BUF_POOL_SIZE_HOST SZ_4K
-#define PRUETH_SW_NUM_BUF_POOLS_HOST 8
-#define PRUETH_SW_NUM_BUF_POOLS_PER_PRU 4
-#define MSMC_RAM_SIZE_SWITCH_MODE \
- (MSMC_RAM_SIZE + \
- (2 * PRUETH_SW_BUF_POOL_SIZE_HOST * PRUETH_SW_NUM_BUF_POOLS_HOST))
+/* Defines for forwarding path buffer pools:
+ * - used by firmware to store packets to be forwarded to other port
+ * - 8 total pools per slice
+ * - only used in switch mode (as no forwarding in mac mode)
+ */
+#define PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE 8
+#define PRUETH_SW_FWD_BUF_POOL_SIZE (SZ_8K)
+
+/* Defines for local injection path buffer pools:
+ * - used by firmware to store packets received from host core
+ * - 16 total pools per slice
+ * - 8 pools per port per slice and each slice handles both ports
+ * - only 4 out of 8 pools used per port (as only 4 real QoS levels in ICSSG)
+ * - switch mode: 8 total pools used
+ * - mac mode: 4 total pools used
+ */
+#define PRUETH_NUM_LI_BUF_POOLS_PER_SLICE 16
+#define PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE 8
+#define PRUETH_SW_LI_BUF_POOL_SIZE SZ_4K
+#define PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE 8
+#define PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4
+#define PRUETH_EMAC_LI_BUF_POOL_SIZE SZ_8K
+#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE 4
+#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4
+
+/* Defines for host egress path - express and preemptible buffers
+ * - used by firmware to store express and preemptible packets
+ * to be transmitted to host core
+ * - used by both mac/switch modes
+ */
+#define PRUETH_SW_HOST_EXP_BUF_POOL_SIZE SZ_16K
+#define PRUETH_SW_HOST_PRE_BUF_POOL_SIZE (SZ_16K - SZ_2K)
+#define PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE PRUETH_SW_HOST_EXP_BUF_POOL_SIZE
+#define PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE PRUETH_SW_HOST_PRE_BUF_POOL_SIZE
+
+/* Buffer used by firmware to temporarily store packet to be dropped */
+#define PRUETH_SW_DROP_PKT_BUF_SIZE SZ_2K
+#define PRUETH_EMAC_DROP_PKT_BUF_SIZE PRUETH_SW_DROP_PKT_BUF_SIZE
+
+/* Total switch mode memory usage for buffers per slice */
+#define PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE \
+ (PRUETH_SW_FWD_BUF_POOL_SIZE * PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE + \
+ PRUETH_SW_LI_BUF_POOL_SIZE * PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE + \
+ PRUETH_SW_HOST_EXP_BUF_POOL_SIZE + \
+ PRUETH_SW_HOST_PRE_BUF_POOL_SIZE + \
+ PRUETH_SW_DROP_PKT_BUF_SIZE)
+
+/* Total switch mode memory usage for all buffers */
+#define PRUETH_SW_TOTAL_BUF_SIZE \
+ (2 * PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE)
+
+/* Total mac mode memory usage for buffers per slice */
+#define PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE \
+ (PRUETH_EMAC_LI_BUF_POOL_SIZE * \
+ PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE + \
+ PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE + \
+ PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE + \
+ PRUETH_EMAC_DROP_PKT_BUF_SIZE)
+
+/* Total mac mode memory usage for all buffers */
+#define PRUETH_EMAC_TOTAL_BUF_SIZE \
+ (2 * PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE)
+
+/* Size of 1 bank of MSMC/OC_SRAM memory */
+#define MSMC_RAM_BANK_SIZE SZ_256K
#define PRUETH_SWITCH_FDB_MASK ((SIZE_OF_FDB / NUMBER_OF_FDB_BUCKET_ENTRIES) - 1)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 86fc1278127c..2f5c4335dec3 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -1764,10 +1764,15 @@ static int prueth_probe(struct platform_device *pdev)
goto put_mem;
}
- msmc_ram_size = MSMC_RAM_SIZE;
prueth->is_switchmode_supported = prueth->pdata.switch_mode;
- if (prueth->is_switchmode_supported)
- msmc_ram_size = MSMC_RAM_SIZE_SWITCH_MODE;
+ if (prueth->pdata.banked_ms_ram) {
+ /* Reserve 2 MSMC RAM banks for buffers to avoid arbitration */
+ msmc_ram_size = (2 * MSMC_RAM_BANK_SIZE);
+ } else {
+ msmc_ram_size = PRUETH_EMAC_TOTAL_BUF_SIZE;
+ if (prueth->is_switchmode_supported)
+ msmc_ram_size = PRUETH_SW_TOTAL_BUF_SIZE;
+ }
/* NOTE: FW bug needs buffer base to be 64KB aligned */
prueth->msmcram.va =
@@ -1924,7 +1929,8 @@ put_iep0:
free_pool:
gen_pool_free(prueth->sram_pool,
- (unsigned long)prueth->msmcram.va, msmc_ram_size);
+ (unsigned long)prueth->msmcram.va,
+ prueth->msmcram.size);
put_mem:
pruss_release_mem_region(prueth->pruss, &prueth->shram);
@@ -1976,8 +1982,8 @@ static void prueth_remove(struct platform_device *pdev)
icss_iep_put(prueth->iep0);
gen_pool_free(prueth->sram_pool,
- (unsigned long)prueth->msmcram.va,
- MSMC_RAM_SIZE);
+ (unsigned long)prueth->msmcram.va,
+ prueth->msmcram.size);
pruss_release_mem_region(prueth->pruss, &prueth->shram);
@@ -1994,12 +2000,14 @@ static const struct prueth_pdata am654_icssg_pdata = {
.fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE,
.quirk_10m_link_issue = 1,
.switch_mode = 1,
+ .banked_ms_ram = 0,
};
static const struct prueth_pdata am64x_icssg_pdata = {
.fdqring_mode = K3_RINGACC_RING_MODE_RING,
.quirk_10m_link_issue = 1,
.switch_mode = 1,
+ .banked_ms_ram = 1,
};
static const struct of_device_id prueth_dt_match[] = {
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 23c465f1ce7f..3bb9fd8c3804 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -251,11 +251,13 @@ struct prueth_emac {
* @fdqring_mode: Free desc queue mode
* @quirk_10m_link_issue: 10M link detect errata
* @switch_mode: switch firmware support
+ * @banked_ms_ram: banked memory support
*/
struct prueth_pdata {
enum k3_ring_mode fdqring_mode;
u32 quirk_10m_link_issue:1;
u32 switch_mode:1;
+ u32 banked_ms_ram:1;
};
struct icssg_firmwares {
diff --git a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h
index 490a9cc06fb0..7e053b8af3ec 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h
@@ -180,6 +180,9 @@
/* Used to notify the FW of the current link speed */
#define PORT_LINK_SPEED_OFFSET 0x00A8
+/* 2k memory pointer reserved for default writes by PRU0*/
+#define DEFAULT_MSMC_Q_OFFSET 0x00AC
+
/* TAS gate mask for windows list0 */
#define TAS_GATE_MASK_LIST0 0x0100
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 0f4be72116b8..f0823aa1ede6 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1912,7 +1912,6 @@ static void wx_configure_rx_ring(struct wx *wx,
struct wx_ring *ring)
{
u16 reg_idx = ring->reg_idx;
- union wx_rx_desc *rx_desc;
u64 rdba = ring->dma;
u32 rxdctl;
@@ -1942,9 +1941,9 @@ static void wx_configure_rx_ring(struct wx *wx,
memset(ring->rx_buffer_info, 0,
sizeof(struct wx_rx_buffer) * ring->count);
- /* initialize Rx descriptor 0 */
- rx_desc = WX_RX_DESC(ring, 0);
- rx_desc->wb.upper.length = 0;
+ /* reset ntu and ntc to place SW in sync with hardware */
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
/* enable receive descriptor ring */
wr32m(wx, WX_PX_RR_CFG(reg_idx),
@@ -2778,6 +2777,8 @@ void wx_update_stats(struct wx *wx)
hwstats->fdirmiss += rd32(wx, WX_RDB_FDIR_MISS);
}
+ /* qmprc is not cleared on read, manual reset it */
+ hwstats->qmprc = 0;
for (i = wx->num_vfs * wx->num_rx_queues_per_pool;
i < wx->mac.max_rx_queues; i++)
hwstats->qmprc += rd32(wx, WX_PX_MPRC(i));
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 7f2e6cddfeb1..0213ad5a6ceb 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -174,10 +174,6 @@ static void wx_dma_sync_frag(struct wx_ring *rx_ring,
skb_frag_off(frag),
skb_frag_size(frag),
DMA_FROM_DEVICE);
-
- /* If the page was released, just unmap it. */
- if (unlikely(WX_CB(skb)->page_released))
- page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
}
static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring,
@@ -227,10 +223,6 @@ static void wx_put_rx_buffer(struct wx_ring *rx_ring,
struct sk_buff *skb,
int rx_buffer_pgcnt)
{
- if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma)
- /* the page has been released from the ring */
- WX_CB(skb)->page_released = true;
-
/* clear contents of rx_buffer */
rx_buffer->page = NULL;
rx_buffer->skb = NULL;
@@ -315,7 +307,7 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring,
return false;
dma = page_pool_get_dma_addr(page);
- bi->page_dma = dma;
+ bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
@@ -352,7 +344,7 @@ void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count)
DMA_FROM_DEVICE);
rx_desc->read.pkt_addr =
- cpu_to_le64(bi->page_dma + bi->page_offset);
+ cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++;
@@ -365,6 +357,8 @@ void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count)
/* clear the status bits for the next_to_use descriptor */
rx_desc->wb.upper.status_error = 0;
+ /* clear the length for the next_to_use descriptor */
+ rx_desc->wb.upper.length = 0;
cleaned_count--;
} while (cleaned_count);
@@ -1705,6 +1699,7 @@ static void wx_set_rss_queues(struct wx *wx)
clear_bit(WX_FLAG_FDIR_HASH, wx->flags);
+ wx->ring_feature[RING_F_FDIR].indices = 1;
/* Use Flow Director in addition to RSS to ensure the best
* distribution of flows across cores, even when an FDIR flow
* isn't matched.
@@ -1746,7 +1741,7 @@ static void wx_set_num_queues(struct wx *wx)
*/
static int wx_acquire_msix_vectors(struct wx *wx)
{
- struct irq_affinity affd = { .pre_vectors = 1 };
+ struct irq_affinity affd = { .post_vectors = 1 };
int nvecs, i;
/* We start by asking for one vector per queue pair */
@@ -1783,16 +1778,24 @@ static int wx_acquire_msix_vectors(struct wx *wx)
return nvecs;
}
- wx->msix_entry->entry = 0;
- wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0);
nvecs -= 1;
for (i = 0; i < nvecs; i++) {
wx->msix_q_entries[i].entry = i;
- wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1);
+ wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i);
}
wx->num_q_vectors = nvecs;
+ wx->msix_entry->entry = nvecs;
+ wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs);
+
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) {
+ wx->msix_entry->entry = 0;
+ wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0);
+ wx->msix_q_entries[0].entry = 0;
+ wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1);
+ }
+
return 0;
}
@@ -2291,6 +2294,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction,
if (direction == -1) {
/* other causes */
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags))
+ msix_vector = 0;
msix_vector |= WX_PX_IVAR_ALLOC_VAL;
index = 0;
ivar = rd32(wx, WX_PX_MISC_IVAR);
@@ -2299,8 +2304,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction,
wr32(wx, WX_PX_MISC_IVAR, ivar);
} else {
/* tx or rx causes */
- if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7))
- msix_vector += 1; /* offset for queue vectors */
msix_vector |= WX_PX_IVAR_ALLOC_VAL;
index = ((16 * (queue & 1)) + (8 * direction));
ivar = rd32(wx, WX_PX_IVAR(queue >> 1));
@@ -2339,7 +2342,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector)
itr_reg |= WX_PX_ITR_CNT_WDIS;
- wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg);
+ wr32(wx, WX_PX_ITR(v_idx), itr_reg);
}
/**
@@ -2392,9 +2395,9 @@ void wx_configure_vectors(struct wx *wx)
wx_write_eitr(q_vector);
}
- wx_set_ivar(wx, -1, 0, 0);
+ wx_set_ivar(wx, -1, 0, v_idx);
if (pdev->msix_enabled)
- wr32(wx, WX_PX_ITR(0), 1950);
+ wr32(wx, WX_PX_ITR(v_idx), 1950);
}
EXPORT_SYMBOL(wx_configure_vectors);
@@ -2414,9 +2417,6 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring)
if (rx_buffer->skb) {
struct sk_buff *skb = rx_buffer->skb;
- if (WX_CB(skb)->page_released)
- page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
-
dev_kfree_skb(skb);
}
@@ -2440,6 +2440,9 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring)
}
}
+ /* Zero out the descriptor ring */
+ memset(rx_ring->desc, 0, rx_ring->size);
+
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -2623,7 +2626,7 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring)
struct page_pool_params pp_params = {
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.order = 0,
- .pool_size = rx_ring->size,
+ .pool_size = rx_ring->count,
.nid = dev_to_node(rx_ring->dev),
.dev = rx_ring->dev,
.dma_dir = DMA_FROM_DEVICE,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
index e8656d9d733b..c82ae137756c 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
@@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx)
wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0);
wx->ring_feature[RING_F_VMDQ].offset = 0;
+ clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags);
clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
/* Disable VMDq flag so device will be set in NM mode */
if (wx->ring_feature[RING_F_VMDQ].limit == 1)
@@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs)
set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs);
+ if (num_vfs == 7 && wx->mac.type == wx_mac_em)
+ set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags);
+
/* Enable VMDq flag so device will be set in VM mode */
set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags);
if (!wx->ring_feature[RING_F_VMDQ].limit)
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 7730c9fc3e02..1a90fcede86b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -909,7 +909,6 @@ enum wx_reset_type {
struct wx_cb {
dma_addr_t dma;
u16 append_cnt; /* number of skb's appended */
- bool page_released;
bool dma_released;
};
@@ -998,7 +997,6 @@ struct wx_tx_buffer {
struct wx_rx_buffer {
struct sk_buff *skb;
dma_addr_t dma;
- dma_addr_t page_dma;
struct page *page;
unsigned int page_offset;
};
@@ -1191,6 +1189,7 @@ enum wx_pf_flags {
WX_FLAG_VMDQ_ENABLED,
WX_FLAG_VLAN_PROMISC,
WX_FLAG_SRIOV_ENABLED,
+ WX_FLAG_IRQ_VECTOR_SHARED,
WX_FLAG_FDIR_CAPABLE,
WX_FLAG_FDIR_HASH,
WX_FLAG_FDIR_PERFECT,
@@ -1343,7 +1342,7 @@ struct wx {
};
#define WX_INTR_ALL (~0ULL)
-#define WX_INTR_Q(i) BIT((i) + 1)
+#define WX_INTR_Q(i) BIT((i))
/* register operations */
#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index b5022c49dc5e..e0fc897b0a58 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues)
if (queues)
wx_intr_enable(wx, NGBE_INTR_ALL);
else
- wx_intr_enable(wx, NGBE_INTR_MISC);
+ wx_intr_enable(wx, NGBE_INTR_MISC(wx));
}
/**
@@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx)
* for queue. But when num_vfs == 7, vector[1] is assigned to vf6.
* Misc and queue should reuse interrupt vector[0].
*/
- if (wx->num_vfs == 7)
+ if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags))
err = request_irq(wx->msix_entry->vector,
ngbe_misc_and_queue, 0, netdev->name, wx);
else
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index bb74263f0498..3b2ca7f47e33 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -87,7 +87,7 @@
#define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */
#define NGBE_INTR_ALL 0x1FF
-#define NGBE_INTR_MISC BIT(0)
+#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry)
#define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4))
#define NGBE_CFG_LAN_SPEED 0x14440
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
index 7dbcf41750c1..dc87ccad9652 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c
@@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config,
wx_fc_enable(wx, tx_pause, rx_pause);
txgbe_reconfig_mac(wx);
+ txgbe_enable_sec_tx_path(wx);
txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG);
txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK;
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index 20b9a28bcb55..3885283681ec 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues)
wr32(wx, WX_PX_MISC_IEN, misc_ien);
/* unmask interrupt */
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
if (queues)
wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
}
@@ -78,7 +78,6 @@ free_queue_irqs:
free_irq(wx->msix_q_entries[vector].vector,
wx->q_vector[vector]);
}
- wx_reset_interrupt_capability(wx);
return err;
}
@@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
txgbe->eicr = eicr;
if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) {
wx_msg_task(txgbe->wx);
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
}
return IRQ_WAKE_THREAD;
}
@@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
nhandled++;
}
- wx_intr_enable(wx, TXGBE_INTR_MISC);
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
}
@@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe)
free_irq(txgbe->link_irq, txgbe);
free_irq(txgbe->misc.irq, txgbe);
txgbe_del_irq_domain(txgbe);
+ txgbe->wx->misc_irq_domain = false;
}
int txgbe_setup_misc_irq(struct txgbe *txgbe)
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index f3d2778b8e35..a5867f3c93fc 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev)
wx_configure(wx);
- err = txgbe_request_queue_irqs(wx);
+ err = txgbe_setup_misc_irq(wx->priv);
if (err)
goto err_free_resources;
+ err = txgbe_request_queue_irqs(wx);
+ if (err)
+ goto err_free_misc_irq;
+
/* Notify the stack of the actual queue counts. */
err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
if (err)
@@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev)
err_free_irq:
wx_free_irq(wx);
+err_free_misc_irq:
+ txgbe_free_misc_irq(wx->priv);
+ wx_reset_interrupt_capability(wx);
err_free_resources:
wx_free_resources(wx);
err_reset:
@@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev)
wx_ptp_stop(wx);
txgbe_down(wx);
wx_free_irq(wx);
+ txgbe_free_misc_irq(wx->priv);
wx_free_resources(wx);
txgbe_fdir_filter_exit(wx);
wx_control_hw(wx, false);
@@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev)
int txgbe_setup_tc(struct net_device *dev, u8 tc)
{
struct wx *wx = netdev_priv(dev);
- struct txgbe *txgbe = wx->priv;
/* Hardware has to reinitialize queues and interrupts to
* match packet buffer alignment. Unfortunately, the
@@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
else
txgbe_reset(wx);
- txgbe_free_misc_irq(txgbe);
wx_clear_interrupt_scheme(wx);
if (tc)
@@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc)
netdev_reset_tc(dev);
wx_init_interrupt_scheme(wx);
- txgbe_setup_misc_irq(txgbe);
if (netif_running(dev))
txgbe_open(dev);
@@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev,
txgbe_init_fdir(txgbe);
- err = txgbe_setup_misc_irq(txgbe);
- if (err)
- goto err_release_hw;
-
err = txgbe_init_phy(txgbe);
if (err)
- goto err_free_misc_irq;
+ goto err_release_hw;
err = register_netdev(netdev);
if (err)
@@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev,
err_remove_phy:
txgbe_remove_phy(txgbe);
-err_free_misc_irq:
- txgbe_free_misc_irq(txgbe);
err_release_hw:
wx_clear_interrupt_scheme(wx);
wx_control_hw(wx, false);
@@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev)
unregister_netdev(netdev);
txgbe_remove_phy(txgbe);
- txgbe_free_misc_irq(txgbe);
wx_free_isb_resources(wx);
pci_release_selected_regions(pdev,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 42ec815159e8..41915d7dd372 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -302,8 +302,8 @@ struct txgbe_fdir_filter {
#define TXGBE_DEFAULT_RX_WORK 128
#endif
-#define TXGBE_INTR_MISC BIT(0)
-#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1)
+#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors)
+#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1)
#define TXGBE_MAX_EITR GENMASK(11, 3)
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index edb36ff07a0c..6f82203a414c 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1309,7 +1309,7 @@ ll_temac_ethtools_set_ringparam(struct net_device *ndev,
if (ering->rx_pending > RX_BD_NUM_MAX ||
ering->rx_mini_pending ||
ering->rx_jumbo_pending ||
- ering->rx_pending > TX_BD_NUM_MAX)
+ ering->tx_pending > TX_BD_NUM_MAX)
return -EINVAL;
if (netif_running(ndev))
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index ecf47107146d..4719d40a63ba 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -286,7 +286,7 @@ static void xemaclite_aligned_read(u32 *src_ptr, u8 *dest_ptr,
/* Read the remaining data */
for (; length > 0; length--)
- *to_u8_ptr = *from_u8_ptr;
+ *to_u8_ptr++ = *from_u8_ptr++;
}
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c41a025c66f0..8be9bce66a4e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2317,8 +2317,11 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
if (!ndev)
return NOTIFY_DONE;
- /* set slave flag before open to prevent IPv6 addrconf */
+ /* Set slave flag and no addrconf flag before open
+ * to prevent IPv6 addrconf.
+ */
vf_netdev->flags |= IFF_SLAVE;
+ vf_netdev->priv_flags |= IFF_NO_ADDRCONF;
return NOTIFY_DONE;
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 3d315e30ee47..7edbe76b5455 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port)
return sci;
}
-static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present)
+static sci_t macsec_active_sci(struct macsec_secy *secy)
{
- sci_t sci;
+ struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc);
+
+ /* Case single RX SC */
+ if (rx_sc && !rcu_dereference_bh(rx_sc->next))
+ return (rx_sc->active) ? rx_sc->sci : 0;
+ /* Case no RX SC or multiple */
+ else
+ return 0;
+}
+
+static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present,
+ struct macsec_rxh_data *rxd)
+{
+ struct macsec_dev *macsec;
+ sci_t sci = 0;
- if (sci_present)
+ /* SC = 1 */
+ if (sci_present) {
memcpy(&sci, hdr->secure_channel_id,
sizeof(hdr->secure_channel_id));
- else
+ /* SC = 0; ES = 0 */
+ } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) &&
+ (list_is_singular(&rxd->secys))) {
+ /* Only one SECY should exist on this scenario */
+ macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev,
+ secys);
+ if (macsec)
+ return macsec_active_sci(&macsec->secy);
+ } else {
sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES);
+ }
return sci;
}
@@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
struct macsec_rxh_data *rxd;
struct macsec_dev *macsec;
unsigned int len;
- sci_t sci;
+ sci_t sci = 0;
u32 hdr_pn;
bool cbit;
struct pcpu_rx_sc_stats *rxsc_stats;
@@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC);
macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK;
- sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci);
rcu_read_lock();
rxd = macsec_data_rcu(skb->dev);
+ sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd);
+ if (!sci)
+ goto drop_nosc;
+
list_for_each_entry_rcu(macsec, &rxd->secys, secys) {
struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci);
@@ -1283,6 +1310,7 @@ drop:
macsec_rxsa_put(rx_sa);
drop_nosa:
macsec_rxsc_put(rx_sc);
+drop_nosc:
rcu_read_unlock();
drop_direct:
kfree_skb(skb);
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 4289ccd3e41b..176935a8645f 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset)
*/
static int prepare_extradata(struct netconsole_target *nt)
{
- u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME;
int extradata_len;
/* userdata was appended when configfs write helper was called
@@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt)
*/
extradata_len = nt->userdata_length;
- if (!(nt->sysdata_fields & fields))
+ if (!nt->sysdata_fields)
goto out;
if (nt->sysdata_fields & SYSDATA_CPU_NR)
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index af545d42961c..fa5fbd97ad69 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -371,7 +371,8 @@ static int nsim_poll(struct napi_struct *napi, int budget)
int done;
done = nsim_rcv(rq, budget);
- napi_complete(napi);
+ if (done < budget)
+ napi_complete_done(napi, done);
return done;
}
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index ebf1e849506b..3e9e7f8444b3 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -62,6 +62,13 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb)
unsigned int pkt_len;
int ret;
+ /*
+ * GSO state from the transport layer is not valid for the tunnel/data
+ * path. Reset all GSO fields to prevent any further GSO processing
+ * from entering an inconsistent state.
+ */
+ skb_gso_reset(skb);
+
/* we can't guarantee the packet wasn't corrupted before entering the
* VPN, therefore we give other layers a chance to check that
*/
diff --git a/drivers/net/ovpn/netlink-gen.c b/drivers/net/ovpn/netlink-gen.c
index 58e1a4342378..14298188c5f1 100644
--- a/drivers/net/ovpn/netlink-gen.c
+++ b/drivers/net/ovpn/netlink-gen.c
@@ -29,6 +29,22 @@ const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1] =
[OVPN_A_KEYCONF_DECRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy),
};
+const struct nla_policy ovpn_keyconf_del_input_nl_policy[OVPN_A_KEYCONF_SLOT + 1] = {
+ [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range),
+ [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1),
+};
+
+const struct nla_policy ovpn_keyconf_get_nl_policy[OVPN_A_KEYCONF_CIPHER_ALG + 1] = {
+ [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range),
+ [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1),
+ [OVPN_A_KEYCONF_KEY_ID] = NLA_POLICY_MAX(NLA_U32, 7),
+ [OVPN_A_KEYCONF_CIPHER_ALG] = NLA_POLICY_MAX(NLA_U32, 2),
+};
+
+const struct nla_policy ovpn_keyconf_swap_input_nl_policy[OVPN_A_KEYCONF_PEER_ID + 1] = {
+ [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range),
+};
+
const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1] = {
[OVPN_A_KEYDIR_CIPHER_KEY] = NLA_POLICY_MAX_LEN(256),
[OVPN_A_KEYDIR_NONCE_TAIL] = NLA_POLICY_EXACT_LEN(OVPN_NONCE_TAIL_SIZE),
@@ -60,16 +76,49 @@ const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = {
[OVPN_A_PEER_LINK_TX_PACKETS] = { .type = NLA_UINT, },
};
+const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1] = {
+ [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range),
+};
+
+const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = {
+ [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range),
+ [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, },
+ [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1),
+ [OVPN_A_PEER_SOCKET] = { .type = NLA_U32, },
+ [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, },
+ [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, },
+};
+
+const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = {
+ [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range),
+ [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, },
+ [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1),
+ [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, },
+ [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, },
+};
+
/* OVPN_CMD_PEER_NEW - do */
static const struct nla_policy ovpn_peer_new_nl_policy[OVPN_A_PEER + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_new_input_nl_policy),
};
/* OVPN_CMD_PEER_SET - do */
static const struct nla_policy ovpn_peer_set_nl_policy[OVPN_A_PEER + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_set_input_nl_policy),
};
/* OVPN_CMD_PEER_GET - do */
@@ -86,7 +135,7 @@ static const struct nla_policy ovpn_peer_get_dump_nl_policy[OVPN_A_IFINDEX + 1]
/* OVPN_CMD_PEER_DEL - do */
static const struct nla_policy ovpn_peer_del_nl_policy[OVPN_A_PEER + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_del_input_nl_policy),
};
/* OVPN_CMD_KEY_NEW - do */
@@ -98,19 +147,19 @@ static const struct nla_policy ovpn_key_new_nl_policy[OVPN_A_KEYCONF + 1] = {
/* OVPN_CMD_KEY_GET - do */
static const struct nla_policy ovpn_key_get_nl_policy[OVPN_A_KEYCONF + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_get_nl_policy),
};
/* OVPN_CMD_KEY_SWAP - do */
static const struct nla_policy ovpn_key_swap_nl_policy[OVPN_A_KEYCONF + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_swap_input_nl_policy),
};
/* OVPN_CMD_KEY_DEL - do */
static const struct nla_policy ovpn_key_del_nl_policy[OVPN_A_KEYCONF + 1] = {
[OVPN_A_IFINDEX] = { .type = NLA_U32, },
- [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_del_input_nl_policy),
};
/* Ops table for ovpn */
diff --git a/drivers/net/ovpn/netlink-gen.h b/drivers/net/ovpn/netlink-gen.h
index 66a4e4a0a055..220b5b2fdd4f 100644
--- a/drivers/net/ovpn/netlink-gen.h
+++ b/drivers/net/ovpn/netlink-gen.h
@@ -13,8 +13,14 @@
/* Common nested types */
extern const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1];
+extern const struct nla_policy ovpn_keyconf_del_input_nl_policy[OVPN_A_KEYCONF_SLOT + 1];
+extern const struct nla_policy ovpn_keyconf_get_nl_policy[OVPN_A_KEYCONF_CIPHER_ALG + 1];
+extern const struct nla_policy ovpn_keyconf_swap_input_nl_policy[OVPN_A_KEYCONF_PEER_ID + 1];
extern const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1];
extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1];
+extern const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1];
+extern const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1];
+extern const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1];
int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
index a4ec53def46e..c7f382437630 100644
--- a/drivers/net/ovpn/netlink.c
+++ b/drivers/net/ovpn/netlink.c
@@ -352,7 +352,7 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
- ovpn_peer_nl_policy, info->extack);
+ ovpn_peer_new_input_nl_policy, info->extack);
if (ret)
return ret;
@@ -476,7 +476,7 @@ int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
- ovpn_peer_nl_policy, info->extack);
+ ovpn_peer_set_input_nl_policy, info->extack);
if (ret)
return ret;
@@ -654,7 +654,7 @@ int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info)
struct ovpn_peer *peer;
struct sk_buff *msg;
u32 peer_id;
- int ret;
+ int ret, i;
if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
return -EINVAL;
@@ -668,6 +668,23 @@ int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info)
OVPN_A_PEER_ID))
return -EINVAL;
+ /* OVPN_CMD_PEER_GET expects only the PEER_ID, therefore
+ * ensure that the user hasn't specified any other attribute.
+ *
+ * Unfortunately this check cannot be performed via netlink
+ * spec/policy and must be open-coded.
+ */
+ for (i = 0; i < OVPN_A_PEER_MAX + 1; i++) {
+ if (i == OVPN_A_PEER_ID)
+ continue;
+
+ if (attrs[i]) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "unexpected attribute %u", i);
+ return -EINVAL;
+ }
+ }
+
peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
peer = ovpn_peer_get_by_id(ovpn, peer_id);
if (!peer) {
@@ -768,7 +785,7 @@ int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
- ovpn_peer_nl_policy, info->extack);
+ ovpn_peer_del_input_nl_policy, info->extack);
if (ret)
return ret;
@@ -969,14 +986,14 @@ int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info)
struct ovpn_peer *peer;
struct sk_buff *msg;
u32 peer_id;
- int ret;
+ int ret, i;
if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF))
return -EINVAL;
ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
info->attrs[OVPN_A_KEYCONF],
- ovpn_keyconf_nl_policy, info->extack);
+ ovpn_keyconf_get_nl_policy, info->extack);
if (ret)
return ret;
@@ -988,6 +1005,24 @@ int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info)
OVPN_A_KEYCONF_SLOT))
return -EINVAL;
+ /* OVPN_CMD_KEY_GET expects only the PEER_ID and the SLOT, therefore
+ * ensure that the user hasn't specified any other attribute.
+ *
+ * Unfortunately this check cannot be performed via netlink
+ * spec/policy and must be open-coded.
+ */
+ for (i = 0; i < OVPN_A_KEYCONF_MAX + 1; i++) {
+ if (i == OVPN_A_KEYCONF_PEER_ID ||
+ i == OVPN_A_KEYCONF_SLOT)
+ continue;
+
+ if (attrs[i]) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "unexpected attribute %u", i);
+ return -EINVAL;
+ }
+ }
+
peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]);
peer = ovpn_peer_get_by_id(ovpn, peer_id);
if (!peer) {
@@ -1037,7 +1072,7 @@ int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info)
ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
info->attrs[OVPN_A_KEYCONF],
- ovpn_keyconf_nl_policy, info->extack);
+ ovpn_keyconf_swap_input_nl_policy, info->extack);
if (ret)
return ret;
@@ -1074,7 +1109,7 @@ int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info)
ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
info->attrs[OVPN_A_KEYCONF],
- ovpn_keyconf_nl_policy, info->extack);
+ ovpn_keyconf_del_input_nl_policy, info->extack);
if (ret)
return ret;
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index bff00946eae2..60435a21f29c 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -344,6 +344,7 @@ void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk,
int ret;
skb->dev = peer->ovpn->dev;
+ skb->mark = READ_ONCE(sk->sk_mark);
/* no checksum performed at this layer */
skb->ip_summed = CHECKSUM_NONE;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index a6bcb0fee863..fda2e27c1810 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -445,6 +445,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
lockdep_assert_held_once(&bus->mdio_lock);
+ if (addr >= PHY_MAX_ADDR)
+ return -ENXIO;
+
if (bus->read)
retval = bus->read(bus, addr, regnum);
else
@@ -474,6 +477,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
lockdep_assert_held_once(&bus->mdio_lock);
+ if (addr >= PHY_MAX_ADDR)
+ return -ENXIO;
+
if (bus->write)
err = bus->write(bus, addr, regnum, val);
else
@@ -535,6 +541,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum)
lockdep_assert_held_once(&bus->mdio_lock);
+ if (addr >= PHY_MAX_ADDR)
+ return -ENXIO;
+
if (bus->read_c45)
retval = bus->read_c45(bus, addr, devad, regnum);
else
@@ -566,6 +575,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum,
lockdep_assert_held_once(&bus->mdio_lock);
+ if (addr >= PHY_MAX_ADDR)
+ return -ENXIO;
+
if (bus->write_c45)
err = bus->write_c45(bus, addr, devad, regnum, val);
else
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 13570f628aa5..dc8634e7bcbe 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -332,7 +332,7 @@ static void lan88xx_link_change_notify(struct phy_device *phydev)
* As workaround, set to 10 before setting to 100
* at forced 100 F/H mode.
*/
- if (!phydev->autoneg && phydev->speed == 100) {
+ if (phydev->state == PHY_NOLINK && !phydev->autoneg && phydev->speed == 100) {
/* disable phy interrupt */
temp = phy_read(phydev, LAN88XX_INT_MASK);
temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
@@ -488,6 +488,7 @@ static struct phy_driver microchip_phy_driver[] = {
.config_init = lan88xx_config_init,
.config_aneg = lan88xx_config_aneg,
.link_change_notify = lan88xx_link_change_notify,
+ .soft_reset = genphy_soft_reset,
/* Interrupt handling is broken, do not define related
* functions to force polling.
diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c
index 703321689726..38417e288611 100644
--- a/drivers/net/phy/phy_caps.c
+++ b/drivers/net/phy/phy_caps.c
@@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only)
* When @exact is not set, we return either an exact match, or matching capabilities
* at lower speed, or the lowest matching speed, or NULL.
*
+ * Non-exact matches will try to return an exact speed and duplex match, but may
+ * return matching capabilities with same speed but a different duplex.
+ *
* Returns: a matched link_capabilities according to the above process, NULL
* otherwise.
*/
@@ -195,7 +198,7 @@ const struct link_capabilities *
phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported,
bool exact)
{
- const struct link_capabilities *lcap, *last = NULL;
+ const struct link_capabilities *lcap, *match = NULL, *last = NULL;
for_each_link_caps_desc_speed(lcap) {
if (linkmode_intersects(lcap->linkmodes, supported)) {
@@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported,
if (lcap->speed == speed && lcap->duplex == duplex) {
return lcap;
} else if (!exact) {
- if (lcap->speed <= speed)
- return lcap;
+ if (!match && lcap->speed <= speed)
+ match = lcap;
+
+ if (lcap->speed < speed)
+ break;
}
}
}
- if (!exact)
- return last;
+ if (!match && !exact)
+ match = last;
- return NULL;
+ return match;
}
EXPORT_SYMBOL_GPL(phy_caps_lookup);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 73f9cb2e2844..f76ee8489504 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3416,7 +3416,8 @@ static int phy_probe(struct device *dev)
/* Get the LEDs from the device tree, and instantiate standard
* LEDs for them.
*/
- if (IS_ENABLED(CONFIG_PHYLIB_LEDS))
+ if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev) &&
+ !phy_driver_is_genphy_10g(phydev))
err = of_phy_leds(phydev);
out:
@@ -3433,7 +3434,8 @@ static int phy_remove(struct device *dev)
cancel_delayed_work_sync(&phydev->state_queue);
- if (IS_ENABLED(CONFIG_PHYLIB_LEDS))
+ if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev) &&
+ !phy_driver_is_genphy_10g(phydev))
phy_leds_unregister(phydev);
phydev->state = PHY_DOWN;
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 26350b962890..8f26e395e39f 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -26,9 +26,6 @@
#define AT803X_LED_CONTROL 0x18
-#define AT803X_PHY_MMD3_WOL_CTRL 0x8012
-#define AT803X_WOL_EN BIT(5)
-
#define AT803X_REG_CHIP_CONFIG 0x1f
#define AT803X_BT_BX_REG_SEL 0x8000
@@ -866,30 +863,6 @@ static int at8031_config_init(struct phy_device *phydev)
return at803x_config_init(phydev);
}
-static int at8031_set_wol(struct phy_device *phydev,
- struct ethtool_wolinfo *wol)
-{
- int ret;
-
- /* First setup MAC address and enable WOL interrupt */
- ret = at803x_set_wol(phydev, wol);
- if (ret)
- return ret;
-
- if (wol->wolopts & WAKE_MAGIC)
- /* Enable WOL function for 1588 */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_PHY_MMD3_WOL_CTRL,
- 0, AT803X_WOL_EN);
- else
- /* Disable WoL function for 1588 */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
- AT803X_PHY_MMD3_WOL_CTRL,
- AT803X_WOL_EN, 0);
-
- return ret;
-}
-
static int at8031_config_intr(struct phy_device *phydev)
{
struct at803x_priv *priv = phydev->priv;
diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c
index 71498c518f0f..6de16c0eaa08 100644
--- a/drivers/net/phy/qcom/qca808x.c
+++ b/drivers/net/phy/qcom/qca808x.c
@@ -633,7 +633,7 @@ static struct phy_driver qca808x_driver[] = {
.handle_interrupt = at803x_handle_interrupt,
.get_tunable = at803x_get_tunable,
.set_tunable = at803x_set_tunable,
- .set_wol = at803x_set_wol,
+ .set_wol = at8031_set_wol,
.get_wol = at803x_get_wol,
.get_features = qca808x_get_features,
.config_aneg = qca808x_config_aneg,
diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c
index d28815ef56bb..af7d0d8e81be 100644
--- a/drivers/net/phy/qcom/qcom-phy-lib.c
+++ b/drivers/net/phy/qcom/qcom-phy-lib.c
@@ -115,6 +115,31 @@ int at803x_set_wol(struct phy_device *phydev,
}
EXPORT_SYMBOL_GPL(at803x_set_wol);
+int at8031_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int ret;
+
+ /* First setup MAC address and enable WOL interrupt */
+ ret = at803x_set_wol(phydev, wol);
+ if (ret)
+ return ret;
+
+ if (wol->wolopts & WAKE_MAGIC)
+ /* Enable WOL function for 1588 */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ 0, AT803X_WOL_EN);
+ else
+ /* Disable WoL function for 1588 */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(at8031_set_wol);
+
void at803x_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol)
{
diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h
index 4bb541728846..7f7151c8baca 100644
--- a/drivers/net/phy/qcom/qcom.h
+++ b/drivers/net/phy/qcom/qcom.h
@@ -172,6 +172,9 @@
#define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B
#define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A
+#define AT803X_PHY_MMD3_WOL_CTRL 0x8012
+#define AT803X_WOL_EN BIT(5)
+
#define AT803X_DEBUG_ADDR 0x1D
#define AT803X_DEBUG_DATA 0x1E
@@ -215,6 +218,8 @@ int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg,
int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data);
int at803x_set_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol);
+int at8031_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol);
void at803x_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol);
int at803x_ack_interrupt(struct phy_device *phydev);
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 31463b9e5697..b6489da5cfcd 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -155,10 +155,29 @@ static int smsc_phy_reset(struct phy_device *phydev)
static int lan87xx_config_aneg(struct phy_device *phydev)
{
- int rc;
+ u8 mdix_ctrl;
int val;
+ int rc;
+
+ /* When auto-negotiation is disabled (forced mode), the PHY's
+ * Auto-MDIX will continue toggling the TX/RX pairs.
+ *
+ * To establish a stable link, we must select a fixed MDI mode.
+ * If the user has not specified a fixed MDI mode (i.e., mdix_ctrl is
+ * 'auto'), we default to ETH_TP_MDI. This choice of a ETH_TP_MDI mode
+ * mirrors the behavior the hardware would exhibit if the AUTOMDIX_EN
+ * strap were configured for a fixed MDI connection.
+ */
+ if (phydev->autoneg == AUTONEG_DISABLE) {
+ if (phydev->mdix_ctrl == ETH_TP_MDI_AUTO)
+ mdix_ctrl = ETH_TP_MDI;
+ else
+ mdix_ctrl = phydev->mdix_ctrl;
+ } else {
+ mdix_ctrl = phydev->mdix_ctrl;
+ }
- switch (phydev->mdix_ctrl) {
+ switch (mdix_ctrl) {
case ETH_TP_MDI:
val = SPECIAL_CTRL_STS_OVRRD_AMDIX_;
break;
@@ -167,7 +186,8 @@ static int lan87xx_config_aneg(struct phy_device *phydev)
SPECIAL_CTRL_STS_AMDIX_STATE_;
break;
case ETH_TP_MDI_AUTO:
- val = SPECIAL_CTRL_STS_AMDIX_ENABLE_;
+ val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ |
+ SPECIAL_CTRL_STS_AMDIX_ENABLE_;
break;
default:
return genphy_config_aneg(phydev);
@@ -183,7 +203,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev)
rc |= val;
phy_write(phydev, SPECIAL_CTRL_STS, rc);
- phydev->mdix = phydev->mdix_ctrl;
+ phydev->mdix = mdix_ctrl;
return genphy_config_aneg(phydev);
}
@@ -261,6 +281,33 @@ int lan87xx_read_status(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(lan87xx_read_status);
+static int lan87xx_phy_config_init(struct phy_device *phydev)
+{
+ int rc;
+
+ /* The LAN87xx PHY's initial MDI-X mode is determined by the AUTOMDIX_EN
+ * hardware strap, but the driver cannot read the strap's status. This
+ * creates an unpredictable initial state.
+ *
+ * To ensure consistent and reliable behavior across all boards,
+ * override the strap configuration on initialization and force the PHY
+ * into a known state with Auto-MDIX enabled, which is the expected
+ * default for modern hardware.
+ */
+ rc = phy_modify(phydev, SPECIAL_CTRL_STS,
+ SPECIAL_CTRL_STS_OVRRD_AMDIX_ |
+ SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+ SPECIAL_CTRL_STS_AMDIX_STATE_,
+ SPECIAL_CTRL_STS_OVRRD_AMDIX_ |
+ SPECIAL_CTRL_STS_AMDIX_ENABLE_);
+ if (rc < 0)
+ return rc;
+
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+ return smsc_phy_config_init(phydev);
+}
+
static int lan874x_phy_config_init(struct phy_device *phydev)
{
u16 val;
@@ -695,7 +742,7 @@ static struct phy_driver smsc_phy_driver[] = {
/* basic functions */
.read_status = lan87xx_read_status,
- .config_init = smsc_phy_config_init,
+ .config_init = lan87xx_phy_config_init,
.soft_reset = smsc_phy_reset,
.config_aneg = lan87xx_config_aneg,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f53e255116ea..e3ca6e91efe1 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -4567,8 +4567,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
if (!dev)
return;
- netif_napi_del(&dev->napi);
-
udev = interface_to_usbdev(intf);
net = dev->net;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index b586b1c13a47..f5647ee0adde 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1426,6 +1426,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
+ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9071, 3)}, /* SIMCom 8230C ++ */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d6589b24c68d..44cba7acfe7d 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = {
{ USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) },
{ USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) },
{ USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) },
+ { USB_DEVICE(VENDOR_ID_TPLINK, 0x0602) },
{ USB_DEVICE(VENDOR_ID_DLINK, 0xb301) },
{ USB_DEVICE(VENDOR_ID_DELL, 0xb097) },
{ USB_DEVICE(VENDOR_ID_ASUS, 0x1976) },
diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index c30ca415d1d3..36c73db44f77 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -689,6 +689,10 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
status);
return -ENODEV;
}
+ if (!dev->status) {
+ dev_err(&dev->udev->dev, "No status endpoint found");
+ return -ENODEV;
+ }
/* Initialize sierra private data */
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index e58a0f1b5c5b..a3046142cb8e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -909,7 +909,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
/* NAPI functions as RCU section */
peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held());
- peer_txq = netdev_get_tx_queue(peer_dev, queue_idx);
+ peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL;
for (i = 0; i < budget; i++) {
void *ptr = __ptr_ring_consume(&rq->xdp_ring);
@@ -959,7 +959,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
rq->stats.vs.xdp_packets += done;
u64_stats_update_end(&rq->stats.syncp);
- if (unlikely(netif_tx_queue_stopped(peer_txq)))
+ if (peer_txq && unlikely(netif_tx_queue_stopped(peer_txq)))
netif_tx_wake_queue(peer_txq);
return done;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e53ba600605a..82b4a2a2b8c4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}
+static int check_mergeable_len(struct net_device *dev, void *mrg_ctx,
+ unsigned int len)
+{
+ unsigned int headroom, tailroom, room, truesize;
+
+ truesize = mergeable_ctx_to_truesize(mrg_ctx);
+ headroom = mergeable_ctx_to_headroom(mrg_ctx);
+ tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ room = SKB_DATA_ALIGN(headroom + tailroom);
+
+ if (len > truesize - room) {
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)(truesize - room));
+ DEV_STATS_INC(dev, rx_length_errors);
+ return -1;
+ }
+
+ return 0;
+}
+
static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
unsigned int headroom,
unsigned int len)
@@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi,
* Since most packets only take 1 or 2 ring slots, stopping the queue
* early means 16 slots are typically wasted.
*/
- if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+ if (sq->vq->num_free < MAX_SKB_FRAGS + 2) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
netif_tx_stop_queue(txq);
@@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
/* More just got used, free them then recheck. */
free_old_xmit(sq, txq, false);
- if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
netif_start_subqueue(dev, qnum);
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
}
}
+/* Note that @len is the length of received data without virtio header */
static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
- struct receive_queue *rq, void *buf, u32 len)
+ struct receive_queue *rq, void *buf,
+ u32 len, bool first_buf)
{
struct xdp_buff *xdp;
u32 bufsize;
xdp = (struct xdp_buff *)buf;
- bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len;
+ /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for
+ * virtio header and ask the vhost to fill data from
+ * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len
+ * The first buffer has virtio header so the remaining region for frame
+ * data is
+ * xsk_pool_get_rx_frame_size()
+ * While other buffers than the first one do not have virtio header, so
+ * the maximum frame data's length can be
+ * xsk_pool_get_rx_frame_size() + vi->hdr_len
+ */
+ bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool);
+ if (!first_buf)
+ bufsize += vi->hdr_len;
if (unlikely(len > bufsize)) {
pr_debug("%s: rx error: len %u exceeds truesize %u\n",
@@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi,
u64_stats_add(&stats->bytes, len);
- xdp = buf_to_xdp(vi, rq, buf, len);
+ xdp = buf_to_xdp(vi, rq, buf, len, false);
if (!xdp)
goto err;
@@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu
u64_stats_add(&stats->bytes, len);
- xdp = buf_to_xdp(vi, rq, buf, len);
+ xdp = buf_to_xdp(vi, rq, buf, len, true);
if (!xdp)
return;
@@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
* across multiple buffers (num_buf > 1), and we make sure buffers
* have enough headroom.
*/
-static struct page *xdp_linearize_page(struct receive_queue *rq,
+static struct page *xdp_linearize_page(struct net_device *dev,
+ struct receive_queue *rq,
int *num_buf,
struct page *p,
int offset,
@@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
page_off += *len;
+ /* Only mergeable mode can go inside this while loop. In small mode,
+ * *num_buf == 1, so it cannot go inside.
+ */
while (--*num_buf) {
unsigned int buflen;
void *buf;
+ void *ctx;
int off;
- buf = virtnet_rq_get_buf(rq, &buflen, NULL);
+ buf = virtnet_rq_get_buf(rq, &buflen, &ctx);
if (unlikely(!buf))
goto err_buf;
p = virt_to_head_page(buf);
off = buf - page_address(p);
+ if (check_mergeable_len(dev, ctx, buflen)) {
+ put_page(p);
+ goto err_buf;
+ }
+
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
@@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
headroom = vi->hdr_len + header_offset;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- xdp_page = xdp_linearize_page(rq, &num_buf, page,
+ xdp_page = xdp_linearize_page(dev, rq, &num_buf, page,
offset, header_offset,
&tlen);
if (!xdp_page)
@@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
- unsigned int headroom, tailroom, room;
- unsigned int truesize, cur_frag_size;
struct skb_shared_info *shinfo;
unsigned int xdp_frags_truesz = 0;
+ unsigned int truesize;
struct page *page;
skb_frag_t *frag;
int offset;
@@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
page = virt_to_head_page(buf);
offset = buf - page_address(page);
- truesize = mergeable_ctx_to_truesize(ctx);
- headroom = mergeable_ctx_to_headroom(ctx);
- tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- room = SKB_DATA_ALIGN(headroom + tailroom);
-
- cur_frag_size = truesize;
- xdp_frags_truesz += cur_frag_size;
- if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
+ if (check_mergeable_len(dev, ctx, len)) {
put_page(page);
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
goto err;
}
+ truesize = mergeable_ctx_to_truesize(ctx);
+ xdp_frags_truesz += truesize;
+
frag = &shinfo->frags[shinfo->nr_frags++];
skb_frag_fill_page_desc(frag, page, offset, len);
if (page_is_pfmemalloc(page))
@@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
*/
if (!xdp_prog->aux->xdp_has_frags) {
/* linearize data for XDP */
- xdp_page = xdp_linearize_page(rq, num_buf,
+ xdp_page = xdp_linearize_page(vi->dev, rq, num_buf,
*page, offset,
XDP_PACKET_HEADROOM,
len);
@@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct sk_buff *head_skb, *curr_skb;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
- unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
head_skb = NULL;
u64_stats_add(&stats->bytes, len - vi->hdr_len);
- if (unlikely(len > truesize - room)) {
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
+ if (check_mergeable_len(dev, ctx, len))
goto err_skb;
- }
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
@@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
u64_stats_add(&stats->bytes, len);
page = virt_to_head_page(buf);
- truesize = mergeable_ctx_to_truesize(ctx);
- headroom = mergeable_ctx_to_headroom(ctx);
- tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
- room = SKB_DATA_ALIGN(headroom + tailroom);
- if (unlikely(len > truesize - room)) {
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)(truesize - room));
- DEV_STATS_INC(dev, rx_length_errors);
+ if (check_mergeable_len(dev, ctx, len))
goto err_skb;
- }
+ truesize = mergeable_ctx_to_truesize(ctx);
curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page,
buf, len, truesize);
if (!curr_skb)
@@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
free_old_xmit(sq, txq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
- if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
else
free_old_xmit(sq, txq, !!budget);
- if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
+ if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) {
if (netif_tx_queue_stopped(txq)) {
u64_stats_update_begin(&sq->stats.syncp);
u64_stats_inc(&sq->stats.wake);
@@ -3481,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
{
int qindex, err;
+ if (ring_num <= MAX_SKB_FRAGS + 2) {
+ netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n",
+ ring_num, MAX_SKB_FRAGS + 2);
+ return -EINVAL;
+ }
+
qindex = sq - vi->sq;
virtnet_tx_pause(vi, sq);
@@ -7030,7 +7059,7 @@ static int virtnet_probe(struct virtio_device *vdev)
otherwise get link status from config. */
netif_carrier_off(dev);
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
- virtnet_config_changed_work(&vi->config_work);
+ virtio_config_changed(vi->vdev);
} else {
vi->status = VIRTIO_NET_S_LINK_UP;
virtnet_update_settings(vi);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 8c7ffea0fa44..07fe05384cdf 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4,6 +4,7 @@
* Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
* Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
*/
#include "mac.h"
@@ -1022,6 +1023,26 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar)
return ar->last_wmi_vdev_start_status;
}
+static inline int ath10k_vdev_delete_sync(struct ath10k *ar)
+{
+ unsigned long time_left;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ if (!test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map))
+ return 0;
+
+ if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
+ return -ESHUTDOWN;
+
+ time_left = wait_for_completion_timeout(&ar->vdev_delete_done,
+ ATH10K_VDEV_DELETE_TIMEOUT_HZ);
+ if (time_left == 0)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id)
{
struct cfg80211_chan_def *chandef = NULL;
@@ -5900,7 +5921,6 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw,
struct ath10k *ar = hw->priv;
struct ath10k_vif *arvif = (void *)vif->drv_priv;
struct ath10k_peer *peer;
- unsigned long time_left;
int ret;
int i;
@@ -5940,13 +5960,10 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw,
ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n",
arvif->vdev_id, ret);
- if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) {
- time_left = wait_for_completion_timeout(&ar->vdev_delete_done,
- ATH10K_VDEV_DELETE_TIMEOUT_HZ);
- if (time_left == 0) {
- ath10k_warn(ar, "Timeout in receiving vdev delete response\n");
- goto out;
- }
+ ret = ath10k_vdev_delete_sync(ar);
+ if (ret) {
+ ath10k_warn(ar, "Error in receiving vdev delete response: %d\n", ret);
+ goto out;
}
/* Some firmware revisions don't notify host about self-peer removal
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index b2bf9d72b92f..d51f2e5a79a4 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -938,7 +938,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar)
dev_set_threaded(ar->napi_dev, true);
ath10k_core_napi_enable(ar);
- ath10k_snoc_irq_enable(ar);
+ /* IRQs are left enabled when we restart due to a firmware crash */
+ if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags))
+ ath10k_snoc_irq_enable(ar);
ath10k_snoc_rx_post(ar);
clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags);
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 2e9f8a5e61e4..22a101136135 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -990,6 +990,7 @@ void ath11k_fw_stats_init(struct ath11k *ar)
INIT_LIST_HEAD(&ar->fw_stats.bcn);
init_completion(&ar->fw_stats_complete);
+ init_completion(&ar->fw_stats_done);
}
void ath11k_fw_stats_free(struct ath11k_fw_stats *stats)
@@ -2134,6 +2135,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab)
{
int ret;
+ switch (ath11k_crypto_mode) {
+ case ATH11K_CRYPT_MODE_SW:
+ set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags);
+ set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags);
+ break;
+ case ATH11K_CRYPT_MODE_HW:
+ clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags);
+ clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags);
+ break;
+ default:
+ ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode);
+ return -EINVAL;
+ }
+
ret = ath11k_core_start_firmware(ab, ab->fw_mode);
if (ret) {
ath11k_err(ab, "failed to start firmware: %d\n", ret);
@@ -2152,20 +2167,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab)
goto err_firmware_stop;
}
- switch (ath11k_crypto_mode) {
- case ATH11K_CRYPT_MODE_SW:
- set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags);
- set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags);
- break;
- case ATH11K_CRYPT_MODE_HW:
- clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags);
- clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags);
- break;
- default:
- ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode);
- return -EINVAL;
- }
-
if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW)
set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags);
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 339d4fca1ed5..6b2f207975e3 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -600,6 +600,8 @@ struct ath11k_fw_stats {
struct list_head pdevs;
struct list_head vdevs;
struct list_head bcn;
+ u32 num_vdev_recvd;
+ u32 num_bcn_recvd;
};
struct ath11k_dbg_htt_stats {
@@ -784,7 +786,7 @@ struct ath11k {
u8 alpha2[REG_ALPHA2_LEN + 1];
struct ath11k_fw_stats fw_stats;
struct completion fw_stats_complete;
- bool fw_stats_done;
+ struct completion fw_stats_done;
/* protected by conf_mutex */
bool ps_state_enable;
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c
index bf192529e3fe..5d46f8e4c231 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2020 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/vmalloc.h>
@@ -93,57 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
spin_unlock_bh(&dbr_data->lock);
}
-static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar)
-{
- spin_lock_bh(&ar->data_lock);
- ar->fw_stats_done = false;
- ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs);
- ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs);
- spin_unlock_bh(&ar->data_lock);
-}
-
void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats)
{
struct ath11k_base *ab = ar->ab;
- struct ath11k_pdev *pdev;
- bool is_end;
- static unsigned int num_vdev, num_bcn;
- size_t total_vdevs_started = 0;
- int i;
-
- /* WMI_REQUEST_PDEV_STAT request has been already processed */
-
- if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) {
- ar->fw_stats_done = true;
- return;
- }
-
- if (stats->stats_id == WMI_REQUEST_VDEV_STAT) {
- if (list_empty(&stats->vdevs)) {
- ath11k_warn(ab, "empty vdev stats");
- return;
- }
- /* FW sends all the active VDEV stats irrespective of PDEV,
- * hence limit until the count of all VDEVs started
- */
- for (i = 0; i < ab->num_radios; i++) {
- pdev = rcu_dereference(ab->pdevs_active[i]);
- if (pdev && pdev->ar)
- total_vdevs_started += ar->num_started_vdevs;
- }
-
- is_end = ((++num_vdev) == total_vdevs_started);
-
- list_splice_tail_init(&stats->vdevs,
- &ar->fw_stats.vdevs);
-
- if (is_end) {
- ar->fw_stats_done = true;
- num_vdev = 0;
- }
- return;
- }
+ bool is_end = true;
+ /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and
+ * WMI_REQUEST_VDEV_STAT requests have been already processed.
+ */
if (stats->stats_id == WMI_REQUEST_BCN_STAT) {
if (list_empty(&stats->bcn)) {
ath11k_warn(ab, "empty bcn stats");
@@ -152,97 +109,18 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *
/* Mark end until we reached the count of all started VDEVs
* within the PDEV
*/
- is_end = ((++num_bcn) == ar->num_started_vdevs);
+ if (ar->num_started_vdevs)
+ is_end = ((++ar->fw_stats.num_bcn_recvd) ==
+ ar->num_started_vdevs);
list_splice_tail_init(&stats->bcn,
&ar->fw_stats.bcn);
- if (is_end) {
- ar->fw_stats_done = true;
- num_bcn = 0;
- }
+ if (is_end)
+ complete(&ar->fw_stats_done);
}
}
-static int ath11k_debugfs_fw_stats_request(struct ath11k *ar,
- struct stats_request_params *req_param)
-{
- struct ath11k_base *ab = ar->ab;
- unsigned long timeout, time_left;
- int ret;
-
- lockdep_assert_held(&ar->conf_mutex);
-
- /* FW stats can get split when exceeding the stats data buffer limit.
- * In that case, since there is no end marking for the back-to-back
- * received 'update stats' event, we keep a 3 seconds timeout in case,
- * fw_stats_done is not marked yet
- */
- timeout = jiffies + secs_to_jiffies(3);
-
- ath11k_debugfs_fw_stats_reset(ar);
-
- reinit_completion(&ar->fw_stats_complete);
-
- ret = ath11k_wmi_send_stats_request_cmd(ar, req_param);
-
- if (ret) {
- ath11k_warn(ab, "could not request fw stats (%d)\n",
- ret);
- return ret;
- }
-
- time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ);
-
- if (!time_left)
- return -ETIMEDOUT;
-
- for (;;) {
- if (time_after(jiffies, timeout))
- break;
-
- spin_lock_bh(&ar->data_lock);
- if (ar->fw_stats_done) {
- spin_unlock_bh(&ar->data_lock);
- break;
- }
- spin_unlock_bh(&ar->data_lock);
- }
- return 0;
-}
-
-int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id,
- u32 vdev_id, u32 stats_id)
-{
- struct ath11k_base *ab = ar->ab;
- struct stats_request_params req_param;
- int ret;
-
- mutex_lock(&ar->conf_mutex);
-
- if (ar->state != ATH11K_STATE_ON) {
- ret = -ENETDOWN;
- goto err_unlock;
- }
-
- req_param.pdev_id = pdev_id;
- req_param.vdev_id = vdev_id;
- req_param.stats_id = stats_id;
-
- ret = ath11k_debugfs_fw_stats_request(ar, &req_param);
- if (ret)
- ath11k_warn(ab, "failed to request fw stats: %d\n", ret);
-
- ath11k_dbg(ab, ATH11K_DBG_WMI,
- "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n",
- pdev_id, vdev_id, stats_id);
-
-err_unlock:
- mutex_unlock(&ar->conf_mutex);
-
- return ret;
-}
-
static int ath11k_open_pdev_stats(struct inode *inode, struct file *file)
{
struct ath11k *ar = inode->i_private;
@@ -268,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file)
req_param.vdev_id = 0;
req_param.stats_id = WMI_REQUEST_PDEV_STAT;
- ret = ath11k_debugfs_fw_stats_request(ar, &req_param);
+ ret = ath11k_mac_fw_stats_request(ar, &req_param);
if (ret) {
ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret);
goto err_free;
@@ -339,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file)
req_param.vdev_id = 0;
req_param.stats_id = WMI_REQUEST_VDEV_STAT;
- ret = ath11k_debugfs_fw_stats_request(ar, &req_param);
+ ret = ath11k_mac_fw_stats_request(ar, &req_param);
if (ret) {
ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret);
goto err_free;
@@ -415,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file)
continue;
req_param.vdev_id = arvif->vdev_id;
- ret = ath11k_debugfs_fw_stats_request(ar, &req_param);
+ ret = ath11k_mac_fw_stats_request(ar, &req_param);
if (ret) {
ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret);
goto err_free;
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h
index a39e458637b0..ed7fec177588 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.h
+++ b/drivers/net/wireless/ath/ath11k/debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _ATH11K_DEBUGFS_H_
@@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar);
void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats);
void ath11k_debugfs_fw_stats_init(struct ath11k *ar);
-int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id,
- u32 vdev_id, u32 stats_id);
static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar)
{
@@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar)
return 0;
}
-static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar,
- u32 pdev_id, u32 vdev_id, u32 stats_id)
-{
- return 0;
-}
-
static inline void
ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
enum wmi_direct_buffer_module id,
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 08d7b136851f..13301ca317a5 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -8997,6 +8997,81 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo,
}
}
+static void ath11k_mac_fw_stats_reset(struct ath11k *ar)
+{
+ spin_lock_bh(&ar->data_lock);
+ ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs);
+ ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs);
+ ar->fw_stats.num_vdev_recvd = 0;
+ ar->fw_stats.num_bcn_recvd = 0;
+ spin_unlock_bh(&ar->data_lock);
+}
+
+int ath11k_mac_fw_stats_request(struct ath11k *ar,
+ struct stats_request_params *req_param)
+{
+ struct ath11k_base *ab = ar->ab;
+ unsigned long time_left;
+ int ret;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ ath11k_mac_fw_stats_reset(ar);
+
+ reinit_completion(&ar->fw_stats_complete);
+ reinit_completion(&ar->fw_stats_done);
+
+ ret = ath11k_wmi_send_stats_request_cmd(ar, req_param);
+
+ if (ret) {
+ ath11k_warn(ab, "could not request fw stats (%d)\n",
+ ret);
+ return ret;
+ }
+
+ time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ);
+ if (!time_left)
+ return -ETIMEDOUT;
+
+ /* FW stats can get split when exceeding the stats data buffer limit.
+ * In that case, since there is no end marking for the back-to-back
+ * received 'update stats' event, we keep a 3 seconds timeout in case,
+ * fw_stats_done is not marked yet
+ */
+ time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ);
+ if (!time_left)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id,
+ u32 vdev_id, u32 stats_id)
+{
+ struct ath11k_base *ab = ar->ab;
+ struct stats_request_params req_param;
+ int ret;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ if (ar->state != ATH11K_STATE_ON)
+ return -ENETDOWN;
+
+ req_param.pdev_id = pdev_id;
+ req_param.vdev_id = vdev_id;
+ req_param.stats_id = stats_id;
+
+ ret = ath11k_mac_fw_stats_request(ar, &req_param);
+ if (ret)
+ ath11k_warn(ab, "failed to request fw stats: %d\n", ret);
+
+ ath11k_dbg(ab, ATH11K_DBG_WMI,
+ "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n",
+ pdev_id, vdev_id, stats_id);
+
+ return ret;
+}
+
static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
@@ -9031,11 +9106,12 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw,
ath11k_mac_put_chain_rssi(sinfo, arsta, "ppdu", false);
+ mutex_lock(&ar->conf_mutex);
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) &&
arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA &&
ar->ab->hw_params.supports_rssi_stats &&
- !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0,
- WMI_REQUEST_RSSI_PER_CHAIN_STAT)) {
+ !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0,
+ WMI_REQUEST_RSSI_PER_CHAIN_STAT)) {
ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true);
}
@@ -9043,9 +9119,10 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw,
if (!signal &&
arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA &&
ar->ab->hw_params.supports_rssi_stats &&
- !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0,
- WMI_REQUEST_VDEV_STAT)))
+ !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0,
+ WMI_REQUEST_VDEV_STAT)))
signal = arsta->rssi_beacon;
+ mutex_unlock(&ar->conf_mutex);
ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
"sta statistics db2dbm %u rssi comb %d rssi beacon %d\n",
@@ -9380,38 +9457,6 @@ exit:
return ret;
}
-static int ath11k_fw_stats_request(struct ath11k *ar,
- struct stats_request_params *req_param)
-{
- struct ath11k_base *ab = ar->ab;
- unsigned long time_left;
- int ret;
-
- lockdep_assert_held(&ar->conf_mutex);
-
- spin_lock_bh(&ar->data_lock);
- ar->fw_stats_done = false;
- ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs);
- spin_unlock_bh(&ar->data_lock);
-
- reinit_completion(&ar->fw_stats_complete);
-
- ret = ath11k_wmi_send_stats_request_cmd(ar, req_param);
- if (ret) {
- ath11k_warn(ab, "could not request fw stats (%d)\n",
- ret);
- return ret;
- }
-
- time_left = wait_for_completion_timeout(&ar->fw_stats_complete,
- 1 * HZ);
-
- if (!time_left)
- return -ETIMEDOUT;
-
- return 0;
-}
-
static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
unsigned int link_id,
@@ -9419,7 +9464,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw,
{
struct ath11k *ar = hw->priv;
struct ath11k_base *ab = ar->ab;
- struct stats_request_params req_param = {0};
struct ath11k_fw_stats_pdev *pdev;
int ret;
@@ -9431,9 +9475,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw,
*/
mutex_lock(&ar->conf_mutex);
- if (ar->state != ATH11K_STATE_ON)
- goto err_fallback;
-
/* Firmware doesn't provide Tx power during CAC hence no need to fetch
* the stats.
*/
@@ -9442,10 +9483,8 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw,
return -EAGAIN;
}
- req_param.pdev_id = ar->pdev->pdev_id;
- req_param.stats_id = WMI_REQUEST_PDEV_STAT;
-
- ret = ath11k_fw_stats_request(ar, &req_param);
+ ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0,
+ WMI_REQUEST_PDEV_STAT);
if (ret) {
ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret);
goto err_fallback;
diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h
index f5800fbecff8..5e61eea1bb03 100644
--- a/drivers/net/wireless/ath/ath11k/mac.h
+++ b/drivers/net/wireless/ath/ath11k/mac.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: BSD-3-Clause-Clear */
/*
* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef ATH11K_MAC_H
@@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif,
void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar,
struct ieee80211_vif *vif,
struct ieee80211_chanctx_conf *ctx);
+int ath11k_mac_fw_stats_request(struct ath11k *ar,
+ struct stats_request_params *req_param);
#endif
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index d7f852bebf4a..56af2e9634f4 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff
static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb)
{
struct ath11k_fw_stats stats = {};
+ size_t total_vdevs_started = 0;
+ struct ath11k_pdev *pdev;
+ bool is_end = true;
+ int i;
+
struct ath11k *ar;
int ret;
@@ -8184,25 +8189,57 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk
spin_lock_bh(&ar->data_lock);
- /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via
+ /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and
+ * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via
* debugfs fw stats. Therefore, processing it separately.
*/
if (stats.stats_id == WMI_REQUEST_PDEV_STAT) {
list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs);
- ar->fw_stats_done = true;
+ complete(&ar->fw_stats_done);
+ goto complete;
+ }
+
+ if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) {
+ complete(&ar->fw_stats_done);
goto complete;
}
- /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT
- * are currently requested only via debugfs fw stats. Hence, processing these
- * in debugfs context
+ if (stats.stats_id == WMI_REQUEST_VDEV_STAT) {
+ if (list_empty(&stats.vdevs)) {
+ ath11k_warn(ab, "empty vdev stats");
+ goto complete;
+ }
+ /* FW sends all the active VDEV stats irrespective of PDEV,
+ * hence limit until the count of all VDEVs started
+ */
+ for (i = 0; i < ab->num_radios; i++) {
+ pdev = rcu_dereference(ab->pdevs_active[i]);
+ if (pdev && pdev->ar)
+ total_vdevs_started += ar->num_started_vdevs;
+ }
+
+ if (total_vdevs_started)
+ is_end = ((++ar->fw_stats.num_vdev_recvd) ==
+ total_vdevs_started);
+
+ list_splice_tail_init(&stats.vdevs,
+ &ar->fw_stats.vdevs);
+
+ if (is_end)
+ complete(&ar->fw_stats_done);
+
+ goto complete;
+ }
+
+ /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats.
+ * Hence, processing it in debugfs context
*/
ath11k_debugfs_fw_stats_process(ar, &stats);
complete:
complete(&ar->fw_stats_complete);
- rcu_read_unlock();
spin_unlock_bh(&ar->data_lock);
+ rcu_read_unlock();
/* Since the stats's pdev, vdev and beacon list are spliced and reinitialised
* at this point, no need to free the individual list.
diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
index 31d851d8e688..89ae80934b30 100644
--- a/drivers/net/wireless/ath/ath12k/core.c
+++ b/drivers/net/wireless/ath/ath12k/core.c
@@ -1216,6 +1216,7 @@ void ath12k_fw_stats_init(struct ath12k *ar)
INIT_LIST_HEAD(&ar->fw_stats.pdevs);
INIT_LIST_HEAD(&ar->fw_stats.bcn);
init_completion(&ar->fw_stats_complete);
+ init_completion(&ar->fw_stats_done);
}
void ath12k_fw_stats_free(struct ath12k_fw_stats *stats)
@@ -1228,8 +1229,9 @@ void ath12k_fw_stats_free(struct ath12k_fw_stats *stats)
void ath12k_fw_stats_reset(struct ath12k *ar)
{
spin_lock_bh(&ar->data_lock);
- ar->fw_stats.fw_stats_done = false;
ath12k_fw_stats_free(&ar->fw_stats);
+ ar->fw_stats.num_vdev_recvd = 0;
+ ar->fw_stats.num_bcn_recvd = 0;
spin_unlock_bh(&ar->data_lock);
}
@@ -2129,7 +2131,8 @@ int ath12k_core_init(struct ath12k_base *ab)
if (!ag) {
mutex_unlock(&ath12k_hw_group_mutex);
ath12k_warn(ab, "unable to get hw group\n");
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_unregister_notifier;
}
mutex_unlock(&ath12k_hw_group_mutex);
@@ -2144,7 +2147,7 @@ int ath12k_core_init(struct ath12k_base *ab)
if (ret) {
mutex_unlock(&ag->mutex);
ath12k_warn(ab, "unable to create hw group\n");
- goto err;
+ goto err_destroy_hw_group;
}
}
@@ -2152,9 +2155,12 @@ int ath12k_core_init(struct ath12k_base *ab)
return 0;
-err:
+err_destroy_hw_group:
ath12k_core_hw_group_destroy(ab->ag);
ath12k_core_hw_group_unassign(ab);
+err_unregister_notifier:
+ ath12k_core_panic_notifier_unregister(ab);
+
return ret;
}
diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h
index 941db6e49d6e..7bcd9c70309f 100644
--- a/drivers/net/wireless/ath/ath12k/core.h
+++ b/drivers/net/wireless/ath/ath12k/core.h
@@ -601,6 +601,12 @@ struct ath12k_sta {
#define ATH12K_NUM_CHANS 101
#define ATH12K_MAX_5GHZ_CHAN 173
+static inline bool ath12k_is_2ghz_channel_freq(u32 freq)
+{
+ return freq >= ATH12K_MIN_2GHZ_FREQ &&
+ freq <= ATH12K_MAX_2GHZ_FREQ;
+}
+
enum ath12k_hw_state {
ATH12K_HW_STATE_OFF,
ATH12K_HW_STATE_ON,
@@ -626,7 +632,8 @@ struct ath12k_fw_stats {
struct list_head pdevs;
struct list_head vdevs;
struct list_head bcn;
- bool fw_stats_done;
+ u32 num_vdev_recvd;
+ u32 num_bcn_recvd;
};
struct ath12k_dbg_htt_stats {
@@ -806,6 +813,7 @@ struct ath12k {
bool regdom_set_by_user;
struct completion fw_stats_complete;
+ struct completion fw_stats_done;
struct completion mlo_setup_done;
u32 mlo_setup_status;
diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c
index dd624d73b8b2..23da93afaa5c 100644
--- a/drivers/net/wireless/ath/ath12k/debugfs.c
+++ b/drivers/net/wireless/ath/ath12k/debugfs.c
@@ -1251,64 +1251,6 @@ void ath12k_debugfs_soc_destroy(struct ath12k_base *ab)
*/
}
-void
-ath12k_debugfs_fw_stats_process(struct ath12k *ar,
- struct ath12k_fw_stats *stats)
-{
- struct ath12k_base *ab = ar->ab;
- struct ath12k_pdev *pdev;
- bool is_end;
- static unsigned int num_vdev, num_bcn;
- size_t total_vdevs_started = 0;
- int i;
-
- if (stats->stats_id == WMI_REQUEST_VDEV_STAT) {
- if (list_empty(&stats->vdevs)) {
- ath12k_warn(ab, "empty vdev stats");
- return;
- }
- /* FW sends all the active VDEV stats irrespective of PDEV,
- * hence limit until the count of all VDEVs started
- */
- rcu_read_lock();
- for (i = 0; i < ab->num_radios; i++) {
- pdev = rcu_dereference(ab->pdevs_active[i]);
- if (pdev && pdev->ar)
- total_vdevs_started += pdev->ar->num_started_vdevs;
- }
- rcu_read_unlock();
-
- is_end = ((++num_vdev) == total_vdevs_started);
-
- list_splice_tail_init(&stats->vdevs,
- &ar->fw_stats.vdevs);
-
- if (is_end) {
- ar->fw_stats.fw_stats_done = true;
- num_vdev = 0;
- }
- return;
- }
- if (stats->stats_id == WMI_REQUEST_BCN_STAT) {
- if (list_empty(&stats->bcn)) {
- ath12k_warn(ab, "empty beacon stats");
- return;
- }
- /* Mark end until we reached the count of all started VDEVs
- * within the PDEV
- */
- is_end = ((++num_bcn) == ar->num_started_vdevs);
-
- list_splice_tail_init(&stats->bcn,
- &ar->fw_stats.bcn);
-
- if (is_end) {
- ar->fw_stats.fw_stats_done = true;
- num_bcn = 0;
- }
- }
-}
-
static int ath12k_open_vdev_stats(struct inode *inode, struct file *file)
{
struct ath12k *ar = inode->i_private;
diff --git a/drivers/net/wireless/ath/ath12k/debugfs.h b/drivers/net/wireless/ath/ath12k/debugfs.h
index ebef7dace344..21641a8a0346 100644
--- a/drivers/net/wireless/ath/ath12k/debugfs.h
+++ b/drivers/net/wireless/ath/ath12k/debugfs.h
@@ -12,8 +12,6 @@ void ath12k_debugfs_soc_create(struct ath12k_base *ab);
void ath12k_debugfs_soc_destroy(struct ath12k_base *ab);
void ath12k_debugfs_register(struct ath12k *ar);
void ath12k_debugfs_unregister(struct ath12k *ar);
-void ath12k_debugfs_fw_stats_process(struct ath12k *ar,
- struct ath12k_fw_stats *stats);
void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
void ath12k_debugfs_pdev_create(struct ath12k_base *ab);
@@ -126,11 +124,6 @@ static inline void ath12k_debugfs_unregister(struct ath12k *ar)
{
}
-static inline void ath12k_debugfs_fw_stats_process(struct ath12k *ar,
- struct ath12k_fw_stats *stats)
-{
-}
-
static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar)
{
return false;
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c
index 57648febc4a4..bd95dc88f9b2 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -1060,7 +1060,6 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_
}
rx_tid = &peer->rx_tid[tid];
- paddr_aligned = rx_tid->qbuf.paddr_aligned;
/* Update the tid queue if it is already setup */
if (rx_tid->active) {
ret = ath12k_peer_rx_tid_reo_update(ar, peer, rx_tid,
@@ -1072,6 +1071,7 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_
}
if (!ab->hw_params->reoq_lut_support) {
+ paddr_aligned = rx_tid->qbuf.paddr_aligned;
ret = ath12k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id,
peer_mac,
paddr_aligned, tid,
@@ -1098,6 +1098,7 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_
return ret;
}
+ paddr_aligned = rx_tid->qbuf.paddr_aligned;
if (ab->hw_params->reoq_lut_support) {
/* Update the REO queue LUT at the corresponding peer id
* and tid with qaddr.
diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h
index 0ee9c6b26dab..c1750b5dc03c 100644
--- a/drivers/net/wireless/ath/ath12k/hal.h
+++ b/drivers/net/wireless/ath/ath12k/hal.h
@@ -585,7 +585,8 @@ enum hal_reo_cmd_type {
* or cache was blocked
* @HAL_REO_CMD_FAILED: Command execution failed, could be due to
* invalid queue desc
- * @HAL_REO_CMD_RESOURCE_BLOCKED:
+ * @HAL_REO_CMD_RESOURCE_BLOCKED: Command could not be executed because
+ * one or more descriptors were blocked
* @HAL_REO_CMD_DRAIN:
*/
enum hal_reo_cmd_status {
diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c
index 7e2cf0fb2085..8254dc10b53b 100644
--- a/drivers/net/wireless/ath/ath12k/hw.c
+++ b/drivers/net/wireless/ath/ath12k/hw.c
@@ -951,6 +951,8 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = {
.hal_umac_ce0_dest_reg_base = 0x01b81000,
.hal_umac_ce1_src_reg_base = 0x01b82000,
.hal_umac_ce1_dest_reg_base = 0x01b83000,
+
+ .gcc_gcc_pcie_hot_rst = 0x1e38338,
};
static const struct ath12k_hw_regs qcn9274_v2_regs = {
@@ -1042,6 +1044,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = {
.hal_umac_ce0_dest_reg_base = 0x01b81000,
.hal_umac_ce1_src_reg_base = 0x01b82000,
.hal_umac_ce1_dest_reg_base = 0x01b83000,
+
+ .gcc_gcc_pcie_hot_rst = 0x1e38338,
};
static const struct ath12k_hw_regs ipq5332_regs = {
@@ -1215,6 +1219,8 @@ static const struct ath12k_hw_regs wcn7850_regs = {
.hal_umac_ce0_dest_reg_base = 0x01b81000,
.hal_umac_ce1_src_reg_base = 0x01b82000,
.hal_umac_ce1_dest_reg_base = 0x01b83000,
+
+ .gcc_gcc_pcie_hot_rst = 0x1e40304,
};
static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = {
diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h
index 0fbc17649df4..0a75bc5abfa2 100644
--- a/drivers/net/wireless/ath/ath12k/hw.h
+++ b/drivers/net/wireless/ath/ath12k/hw.h
@@ -375,6 +375,8 @@ struct ath12k_hw_regs {
u32 hal_reo_cmd_ring_base;
u32 hal_reo_status_ring_base;
+
+ u32 gcc_gcc_pcie_hot_rst;
};
static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type)
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index 88b59f3ff87a..59ec422992d3 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -4360,7 +4360,7 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar,
{
struct ath12k_base *ab = ar->ab;
struct ath12k_hw *ah = ath12k_ar_to_ah(ar);
- unsigned long timeout, time_left;
+ unsigned long time_left;
int ret;
guard(mutex)(&ah->hw_mutex);
@@ -4368,19 +4368,13 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar,
if (ah->state != ATH12K_HW_STATE_ON)
return -ENETDOWN;
- /* FW stats can get split when exceeding the stats data buffer limit.
- * In that case, since there is no end marking for the back-to-back
- * received 'update stats' event, we keep a 3 seconds timeout in case,
- * fw_stats_done is not marked yet
- */
- timeout = jiffies + msecs_to_jiffies(3 * 1000);
ath12k_fw_stats_reset(ar);
reinit_completion(&ar->fw_stats_complete);
+ reinit_completion(&ar->fw_stats_done);
ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id,
param->vdev_id, param->pdev_id);
-
if (ret) {
ath12k_warn(ab, "failed to request fw stats: %d\n", ret);
return ret;
@@ -4391,7 +4385,6 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar,
param->pdev_id, param->vdev_id, param->stats_id);
time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ);
-
if (!time_left) {
ath12k_warn(ab, "time out while waiting for get fw stats\n");
return -ETIMEDOUT;
@@ -4400,20 +4393,15 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar,
/* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back
* when stats data buffer limit is reached. fw_stats_complete
* is completed once host receives first event from firmware, but
- * still end might not be marked in the TLV.
- * Below loop is to confirm that firmware completed sending all the event
- * and fw_stats_done is marked true when end is marked in the TLV.
+ * still there could be more events following. Below is to wait
+ * until firmware completes sending all the events.
*/
- for (;;) {
- if (time_after(jiffies, timeout))
- break;
- spin_lock_bh(&ar->data_lock);
- if (ar->fw_stats.fw_stats_done) {
- spin_unlock_bh(&ar->data_lock);
- break;
- }
- spin_unlock_bh(&ar->data_lock);
+ time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ);
+ if (!time_left) {
+ ath12k_warn(ab, "time out while waiting for fw stats done\n");
+ return -ETIMEDOUT;
}
+
return 0;
}
@@ -5890,6 +5878,327 @@ exit:
return ret;
}
+static bool ath12k_mac_is_freq_on_mac(struct ath12k_hw_mode_freq_range_arg *freq_range,
+ u32 freq, u8 mac_id)
+{
+ return (freq >= freq_range[mac_id].low_2ghz_freq &&
+ freq <= freq_range[mac_id].high_2ghz_freq) ||
+ (freq >= freq_range[mac_id].low_5ghz_freq &&
+ freq <= freq_range[mac_id].high_5ghz_freq);
+}
+
+static bool
+ath12k_mac_2_freq_same_mac_in_freq_range(struct ath12k_base *ab,
+ struct ath12k_hw_mode_freq_range_arg *freq_range,
+ u32 freq_link1, u32 freq_link2)
+{
+ u8 i;
+
+ for (i = 0; i < MAX_RADIOS; i++) {
+ if (ath12k_mac_is_freq_on_mac(freq_range, freq_link1, i) &&
+ ath12k_mac_is_freq_on_mac(freq_range, freq_link2, i))
+ return true;
+ }
+
+ return false;
+}
+
+static bool ath12k_mac_is_hw_dbs_capable(struct ath12k_base *ab)
+{
+ return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT,
+ ab->wmi_ab.svc_map) &&
+ ab->wmi_ab.hw_mode_info.support_dbs;
+}
+
+static bool ath12k_mac_2_freq_same_mac_in_dbs(struct ath12k_base *ab,
+ u32 freq_link1, u32 freq_link2)
+{
+ struct ath12k_hw_mode_freq_range_arg *freq_range;
+
+ if (!ath12k_mac_is_hw_dbs_capable(ab))
+ return true;
+
+ freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[ATH12K_HW_MODE_DBS];
+ return ath12k_mac_2_freq_same_mac_in_freq_range(ab, freq_range,
+ freq_link1, freq_link2);
+}
+
+static bool ath12k_mac_is_hw_sbs_capable(struct ath12k_base *ab)
+{
+ return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT,
+ ab->wmi_ab.svc_map) &&
+ ab->wmi_ab.hw_mode_info.support_sbs;
+}
+
+static bool ath12k_mac_2_freq_same_mac_in_sbs(struct ath12k_base *ab,
+ u32 freq_link1, u32 freq_link2)
+{
+ struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *sbs_uppr_share;
+ struct ath12k_hw_mode_freq_range_arg *sbs_low_share;
+ struct ath12k_hw_mode_freq_range_arg *sbs_range;
+
+ if (!ath12k_mac_is_hw_sbs_capable(ab))
+ return true;
+
+ if (ab->wmi_ab.sbs_lower_band_end_freq) {
+ sbs_uppr_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE];
+ sbs_low_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE];
+
+ return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_low_share,
+ freq_link1, freq_link2) ||
+ ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_uppr_share,
+ freq_link1, freq_link2);
+ }
+
+ sbs_range = info->freq_range_caps[ATH12K_HW_MODE_SBS];
+ return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_range,
+ freq_link1, freq_link2);
+}
+
+static bool ath12k_mac_freqs_on_same_mac(struct ath12k_base *ab,
+ u32 freq_link1, u32 freq_link2)
+{
+ return ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_link1, freq_link2) &&
+ ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_link1, freq_link2);
+}
+
+static int ath12k_mac_mlo_sta_set_link_active(struct ath12k_base *ab,
+ enum wmi_mlo_link_force_reason reason,
+ enum wmi_mlo_link_force_mode mode,
+ u8 *mlo_vdev_id_lst,
+ u8 num_mlo_vdev,
+ u8 *mlo_inactive_vdev_lst,
+ u8 num_mlo_inactive_vdev)
+{
+ struct wmi_mlo_link_set_active_arg param = {0};
+ u32 entry_idx, entry_offset, vdev_idx;
+ u8 vdev_id;
+
+ param.reason = reason;
+ param.force_mode = mode;
+
+ for (vdev_idx = 0; vdev_idx < num_mlo_vdev; vdev_idx++) {
+ vdev_id = mlo_vdev_id_lst[vdev_idx];
+ entry_idx = vdev_id / 32;
+ entry_offset = vdev_id % 32;
+ if (entry_idx >= WMI_MLO_LINK_NUM_SZ) {
+ ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d",
+ entry_idx, num_mlo_vdev, vdev_id);
+ return -EINVAL;
+ }
+ param.vdev_bitmap[entry_idx] |= 1 << entry_offset;
+ /* update entry number if entry index changed */
+ if (param.num_vdev_bitmap < entry_idx + 1)
+ param.num_vdev_bitmap = entry_idx + 1;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC,
+ "num_vdev_bitmap %d vdev_bitmap[0] = 0x%x, vdev_bitmap[1] = 0x%x",
+ param.num_vdev_bitmap, param.vdev_bitmap[0], param.vdev_bitmap[1]);
+
+ if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) {
+ for (vdev_idx = 0; vdev_idx < num_mlo_inactive_vdev; vdev_idx++) {
+ vdev_id = mlo_inactive_vdev_lst[vdev_idx];
+ entry_idx = vdev_id / 32;
+ entry_offset = vdev_id % 32;
+ if (entry_idx >= WMI_MLO_LINK_NUM_SZ) {
+ ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d",
+ entry_idx, num_mlo_inactive_vdev, vdev_id);
+ return -EINVAL;
+ }
+ param.inactive_vdev_bitmap[entry_idx] |= 1 << entry_offset;
+ /* update entry number if entry index changed */
+ if (param.num_inactive_vdev_bitmap < entry_idx + 1)
+ param.num_inactive_vdev_bitmap = entry_idx + 1;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC,
+ "num_vdev_bitmap %d inactive_vdev_bitmap[0] = 0x%x, inactive_vdev_bitmap[1] = 0x%x",
+ param.num_inactive_vdev_bitmap,
+ param.inactive_vdev_bitmap[0],
+ param.inactive_vdev_bitmap[1]);
+ }
+
+ if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM ||
+ mode == WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM) {
+ param.num_link_entry = 1;
+ param.link_num[0].num_of_link = num_mlo_vdev - 1;
+ }
+
+ return ath12k_wmi_send_mlo_link_set_active_cmd(ab, &param);
+}
+
+static int ath12k_mac_mlo_sta_update_link_active(struct ath12k_base *ab,
+ struct ieee80211_hw *hw,
+ struct ath12k_vif *ahvif)
+{
+ u8 mlo_vdev_id_lst[IEEE80211_MLD_MAX_NUM_LINKS] = {0};
+ u32 mlo_freq_list[IEEE80211_MLD_MAX_NUM_LINKS] = {0};
+ unsigned long links = ahvif->links_map;
+ enum wmi_mlo_link_force_reason reason;
+ struct ieee80211_chanctx_conf *conf;
+ enum wmi_mlo_link_force_mode mode;
+ struct ieee80211_bss_conf *info;
+ struct ath12k_link_vif *arvif;
+ u8 num_mlo_vdev = 0;
+ u8 link_id;
+
+ for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]);
+ /* make sure vdev is created on this device */
+ if (!arvif || !arvif->is_created || arvif->ar->ab != ab)
+ continue;
+
+ info = ath12k_mac_get_link_bss_conf(arvif);
+ conf = wiphy_dereference(hw->wiphy, info->chanctx_conf);
+ mlo_freq_list[num_mlo_vdev] = conf->def.chan->center_freq;
+
+ mlo_vdev_id_lst[num_mlo_vdev] = arvif->vdev_id;
+ num_mlo_vdev++;
+ }
+
+ /* It is not allowed to activate more links than a single device
+ * supported. Something goes wrong if we reach here.
+ */
+ if (num_mlo_vdev > ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ /* if 2 links are established and both link channels fall on the
+ * same hardware MAC, send command to firmware to deactivate one
+ * of them.
+ */
+ if (num_mlo_vdev == 2 &&
+ ath12k_mac_freqs_on_same_mac(ab, mlo_freq_list[0],
+ mlo_freq_list[1])) {
+ mode = WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM;
+ reason = WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT;
+ return ath12k_mac_mlo_sta_set_link_active(ab, reason, mode,
+ mlo_vdev_id_lst, num_mlo_vdev,
+ NULL, 0);
+ }
+
+ return 0;
+}
+
+static bool ath12k_mac_are_sbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2)
+{
+ if (!ath12k_mac_is_hw_sbs_capable(ab))
+ return false;
+
+ if (ath12k_is_2ghz_channel_freq(freq_1) ||
+ ath12k_is_2ghz_channel_freq(freq_2))
+ return false;
+
+ return !ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_1, freq_2);
+}
+
+static bool ath12k_mac_are_dbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2)
+{
+ if (!ath12k_mac_is_hw_dbs_capable(ab))
+ return false;
+
+ return !ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_1, freq_2);
+}
+
+static int ath12k_mac_select_links(struct ath12k_base *ab,
+ struct ieee80211_vif *vif,
+ struct ieee80211_hw *hw,
+ u16 *selected_links)
+{
+ unsigned long useful_links = ieee80211_vif_usable_links(vif);
+ struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif);
+ u8 num_useful_links = hweight_long(useful_links);
+ struct ieee80211_chanctx_conf *chanctx;
+ struct ath12k_link_vif *assoc_arvif;
+ u32 assoc_link_freq, partner_freq;
+ u16 sbs_links = 0, dbs_links = 0;
+ struct ieee80211_bss_conf *info;
+ struct ieee80211_channel *chan;
+ struct ieee80211_sta *sta;
+ struct ath12k_sta *ahsta;
+ u8 link_id;
+
+ /* activate all useful links if less than max supported */
+ if (num_useful_links <= ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) {
+ *selected_links = useful_links;
+ return 0;
+ }
+
+ /* only in station mode we can get here, so it's safe
+ * to use ap_addr
+ */
+ rcu_read_lock();
+ sta = ieee80211_find_sta(vif, vif->cfg.ap_addr);
+ if (!sta) {
+ rcu_read_unlock();
+ ath12k_warn(ab, "failed to find sta with addr %pM\n", vif->cfg.ap_addr);
+ return -EINVAL;
+ }
+
+ ahsta = ath12k_sta_to_ahsta(sta);
+ assoc_arvif = wiphy_dereference(hw->wiphy, ahvif->link[ahsta->assoc_link_id]);
+ info = ath12k_mac_get_link_bss_conf(assoc_arvif);
+ chanctx = rcu_dereference(info->chanctx_conf);
+ assoc_link_freq = chanctx->def.chan->center_freq;
+ rcu_read_unlock();
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "assoc link %u freq %u\n",
+ assoc_arvif->link_id, assoc_link_freq);
+
+ /* assoc link is already activated and has to be kept active,
+ * only need to select a partner link from others.
+ */
+ useful_links &= ~BIT(assoc_arvif->link_id);
+ for_each_set_bit(link_id, &useful_links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ info = wiphy_dereference(hw->wiphy, vif->link_conf[link_id]);
+ if (!info) {
+ ath12k_warn(ab, "failed to get link info for link: %u\n",
+ link_id);
+ return -ENOLINK;
+ }
+
+ chan = info->chanreq.oper.chan;
+ if (!chan) {
+ ath12k_warn(ab, "failed to get chan for link: %u\n", link_id);
+ return -EINVAL;
+ }
+
+ partner_freq = chan->center_freq;
+ if (ath12k_mac_are_sbs_chan(ab, assoc_link_freq, partner_freq)) {
+ sbs_links |= BIT(link_id);
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "new SBS link %u freq %u\n",
+ link_id, partner_freq);
+ continue;
+ }
+
+ if (ath12k_mac_are_dbs_chan(ab, assoc_link_freq, partner_freq)) {
+ dbs_links |= BIT(link_id);
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "new DBS link %u freq %u\n",
+ link_id, partner_freq);
+ continue;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "non DBS/SBS link %u freq %u\n",
+ link_id, partner_freq);
+ }
+
+ /* choose the first candidate no matter how many is in the list */
+ if (sbs_links)
+ link_id = __ffs(sbs_links);
+ else if (dbs_links)
+ link_id = __ffs(dbs_links);
+ else
+ link_id = ffs(useful_links) - 1;
+
+ ath12k_dbg(ab, ATH12K_DBG_MAC, "select partner link %u\n", link_id);
+
+ *selected_links = BIT(assoc_arvif->link_id) | BIT(link_id);
+
+ return 0;
+}
+
static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
@@ -5899,10 +6208,13 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif);
struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta);
struct ath12k_hw *ah = ath12k_hw_to_ah(hw);
+ struct ath12k_base *prev_ab = NULL, *ab;
struct ath12k_link_vif *arvif;
struct ath12k_link_sta *arsta;
unsigned long valid_links;
- u8 link_id = 0;
+ u16 selected_links = 0;
+ u8 link_id = 0, i;
+ struct ath12k *ar;
int ret;
lockdep_assert_wiphy(hw->wiphy);
@@ -5972,8 +6284,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
* about to move to the associated state.
*/
if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION &&
- old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC)
- ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif));
+ old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) {
+ /* TODO: for now only do link selection for single device
+ * MLO case. Other cases would be handled in the future.
+ */
+ ab = ah->radio[0].ab;
+ if (ab->ag->num_devices == 1) {
+ ret = ath12k_mac_select_links(ab, vif, hw, &selected_links);
+ if (ret) {
+ ath12k_warn(ab,
+ "failed to get selected links: %d\n", ret);
+ goto exit;
+ }
+ } else {
+ selected_links = ieee80211_vif_usable_links(vif);
+ }
+
+ ieee80211_set_active_links(vif, selected_links);
+ }
/* Handle all the other state transitions in generic way */
valid_links = ahsta->links_map;
@@ -5997,6 +6325,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw,
}
}
+ if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION &&
+ old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) {
+ for_each_ar(ah, ar, i) {
+ ab = ar->ab;
+ if (prev_ab == ab)
+ continue;
+
+ ret = ath12k_mac_mlo_sta_update_link_active(ab, hw, ahvif);
+ if (ret) {
+ ath12k_warn(ab,
+ "failed to update link active state on connect %d\n",
+ ret);
+ goto exit;
+ }
+
+ prev_ab = ab;
+ }
+ }
/* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST:
* Remove the station from driver (handle ML sta here since that
* needs special handling. Normal sta will be handled in generic
diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h
index e6e74b45bfa4..cc81b1f5680f 100644
--- a/drivers/net/wireless/ath/ath12k/mac.h
+++ b/drivers/net/wireless/ath/ath12k/mac.h
@@ -54,6 +54,8 @@ struct ath12k_generic_iter {
#define ATH12K_DEFAULT_SCAN_LINK IEEE80211_MLD_MAX_NUM_LINKS
#define ATH12K_NUM_MAX_LINKS (IEEE80211_MLD_MAX_NUM_LINKS + 1)
+#define ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE 2
+
enum ath12k_supported_bw {
ATH12K_BW_20 = 0,
ATH12K_BW_40 = 1,
diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c
index 489d546390fc..1f3cfd9b89fd 100644
--- a/drivers/net/wireless/ath/ath12k/pci.c
+++ b/drivers/net/wireless/ath/ath12k/pci.c
@@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab)
ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val);
- val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST);
+ val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab));
val |= GCC_GCC_PCIE_HOT_RST_VAL;
- ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val);
- val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST);
+ ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val);
+ val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab));
ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val);
diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h
index 0b4c459d6d8e..d1ec8aad7f6c 100644
--- a/drivers/net/wireless/ath/ath12k/pci.h
+++ b/drivers/net/wireless/ath/ath12k/pci.h
@@ -28,7 +28,9 @@
#define PCIE_PCIE_PARF_LTSSM 0x1e081b0
#define PARM_LTSSM_VALUE 0x111
-#define GCC_GCC_PCIE_HOT_RST 0x1e38338
+#define GCC_GCC_PCIE_HOT_RST(ab) \
+ ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst)
+
#define GCC_GCC_PCIE_HOT_RST_VAL 0x10
#define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 60e2444fe08c..465f877fc0fb 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -91,6 +91,11 @@ struct ath12k_wmi_svc_rdy_ext2_parse {
bool dma_ring_cap_done;
bool spectral_bin_scaling_done;
bool mac_phy_caps_ext_done;
+ bool hal_reg_caps_ext2_done;
+ bool scan_radio_caps_ext2_done;
+ bool twt_caps_done;
+ bool htt_msdu_idx_to_qtype_map_done;
+ bool dbs_or_sbs_cap_ext_done;
};
struct ath12k_wmi_rdy_parse {
@@ -4395,6 +4400,7 @@ static int ath12k_wmi_hw_mode_caps_parse(struct ath12k_base *soc,
static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc,
u16 len, const void *ptr, void *data)
{
+ struct ath12k_svc_ext_info *svc_ext_info = &soc->wmi_ab.svc_ext_info;
struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext = data;
const struct ath12k_wmi_hw_mode_cap_params *hw_mode_caps;
enum wmi_host_hw_mode_config_type mode, pref;
@@ -4427,8 +4433,11 @@ static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc,
}
}
- ath12k_dbg(soc, ATH12K_DBG_WMI, "preferred_hw_mode:%d\n",
- soc->wmi_ab.preferred_hw_mode);
+ svc_ext_info->num_hw_modes = svc_rdy_ext->n_hw_mode_caps;
+
+ ath12k_dbg(soc, ATH12K_DBG_WMI, "num hw modes %u preferred_hw_mode %d\n",
+ svc_ext_info->num_hw_modes, soc->wmi_ab.preferred_hw_mode);
+
if (soc->wmi_ab.preferred_hw_mode == WMI_HOST_HW_MODE_MAX)
return -EINVAL;
@@ -4658,6 +4667,65 @@ free_dir_buff:
return ret;
}
+static void
+ath12k_wmi_save_mac_phy_info(struct ath12k_base *ab,
+ const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap,
+ struct ath12k_svc_ext_mac_phy_info *mac_phy_info)
+{
+ mac_phy_info->phy_id = __le32_to_cpu(mac_phy_cap->phy_id);
+ mac_phy_info->supported_bands = __le32_to_cpu(mac_phy_cap->supported_bands);
+ mac_phy_info->hw_freq_range.low_2ghz_freq =
+ __le32_to_cpu(mac_phy_cap->low_2ghz_chan_freq);
+ mac_phy_info->hw_freq_range.high_2ghz_freq =
+ __le32_to_cpu(mac_phy_cap->high_2ghz_chan_freq);
+ mac_phy_info->hw_freq_range.low_5ghz_freq =
+ __le32_to_cpu(mac_phy_cap->low_5ghz_chan_freq);
+ mac_phy_info->hw_freq_range.high_5ghz_freq =
+ __le32_to_cpu(mac_phy_cap->high_5ghz_chan_freq);
+}
+
+static void
+ath12k_wmi_save_all_mac_phy_info(struct ath12k_base *ab,
+ struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext)
+{
+ struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info;
+ const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap;
+ const struct ath12k_wmi_hw_mode_cap_params *hw_mode_cap;
+ struct ath12k_svc_ext_mac_phy_info *mac_phy_info;
+ u32 hw_mode_id, phy_bit_map;
+ u8 hw_idx;
+
+ mac_phy_info = &svc_ext_info->mac_phy_info[0];
+ mac_phy_cap = svc_rdy_ext->mac_phy_caps;
+
+ for (hw_idx = 0; hw_idx < svc_ext_info->num_hw_modes; hw_idx++) {
+ hw_mode_cap = &svc_rdy_ext->hw_mode_caps[hw_idx];
+ hw_mode_id = __le32_to_cpu(hw_mode_cap->hw_mode_id);
+ phy_bit_map = __le32_to_cpu(hw_mode_cap->phy_id_map);
+
+ while (phy_bit_map) {
+ ath12k_wmi_save_mac_phy_info(ab, mac_phy_cap, mac_phy_info);
+ mac_phy_info->hw_mode_config_type =
+ le32_get_bits(hw_mode_cap->hw_mode_config_type,
+ WMI_HW_MODE_CAP_CFG_TYPE);
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "hw_idx %u hw_mode_id %u hw_mode_config_type %u supported_bands %u phy_id %u 2 GHz [%u - %u] 5 GHz [%u - %u]\n",
+ hw_idx, hw_mode_id,
+ mac_phy_info->hw_mode_config_type,
+ mac_phy_info->supported_bands, mac_phy_info->phy_id,
+ mac_phy_info->hw_freq_range.low_2ghz_freq,
+ mac_phy_info->hw_freq_range.high_2ghz_freq,
+ mac_phy_info->hw_freq_range.low_5ghz_freq,
+ mac_phy_info->hw_freq_range.high_5ghz_freq);
+
+ mac_phy_cap++;
+ mac_phy_info++;
+
+ phy_bit_map >>= 1;
+ }
+ }
+}
+
static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab,
u16 tag, u16 len,
const void *ptr, void *data)
@@ -4706,6 +4774,8 @@ static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab,
return ret;
}
+ ath12k_wmi_save_all_mac_phy_info(ab, svc_rdy_ext);
+
svc_rdy_ext->mac_phy_done = true;
} else if (!svc_rdy_ext->ext_hal_reg_done) {
ret = ath12k_wmi_ext_hal_reg_caps(ab, len, ptr, svc_rdy_ext);
@@ -4922,10 +4992,449 @@ static int ath12k_wmi_tlv_mac_phy_caps_ext(struct ath12k_base *ab, u16 tag,
return 0;
}
+static void
+ath12k_wmi_update_freq_info(struct ath12k_base *ab,
+ struct ath12k_svc_ext_mac_phy_info *mac_cap,
+ enum ath12k_hw_mode mode,
+ u32 phy_id)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *mac_range;
+
+ mac_range = &hw_mode_info->freq_range_caps[mode][phy_id];
+
+ if (mac_cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) {
+ mac_range->low_2ghz_freq = max_t(u32,
+ mac_cap->hw_freq_range.low_2ghz_freq,
+ ATH12K_MIN_2GHZ_FREQ);
+ mac_range->high_2ghz_freq = mac_cap->hw_freq_range.high_2ghz_freq ?
+ min_t(u32,
+ mac_cap->hw_freq_range.high_2ghz_freq,
+ ATH12K_MAX_2GHZ_FREQ) :
+ ATH12K_MAX_2GHZ_FREQ;
+ }
+
+ if (mac_cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) {
+ mac_range->low_5ghz_freq = max_t(u32,
+ mac_cap->hw_freq_range.low_5ghz_freq,
+ ATH12K_MIN_5GHZ_FREQ);
+ mac_range->high_5ghz_freq = mac_cap->hw_freq_range.high_5ghz_freq ?
+ min_t(u32,
+ mac_cap->hw_freq_range.high_5ghz_freq,
+ ATH12K_MAX_6GHZ_FREQ) :
+ ATH12K_MAX_6GHZ_FREQ;
+ }
+}
+
+static bool
+ath12k_wmi_all_phy_range_updated(struct ath12k_base *ab,
+ enum ath12k_hw_mode hwmode)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *mac_range;
+ u8 phy_id;
+
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ mac_range = &hw_mode_info->freq_range_caps[hwmode][phy_id];
+ /* modify SBS/DBS range only when both phy for DBS are filled */
+ if (!mac_range->low_2ghz_freq && !mac_range->low_5ghz_freq)
+ return false;
+ }
+
+ return true;
+}
+
+static void ath12k_wmi_update_dbs_freq_info(struct ath12k_base *ab)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *mac_range;
+ u8 phy_id;
+
+ mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_DBS];
+ /* Reset 5 GHz range for shared mac for DBS */
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ if (mac_range[phy_id].low_2ghz_freq &&
+ mac_range[phy_id].low_5ghz_freq) {
+ mac_range[phy_id].low_5ghz_freq = 0;
+ mac_range[phy_id].high_5ghz_freq = 0;
+ }
+ }
+}
+
+static u32
+ath12k_wmi_get_highest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range)
+{
+ u32 highest_freq = 0;
+ u8 phy_id;
+
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ if (range[phy_id].high_5ghz_freq > highest_freq)
+ highest_freq = range[phy_id].high_5ghz_freq;
+ }
+
+ return highest_freq ? highest_freq : ATH12K_MAX_6GHZ_FREQ;
+}
+
+static u32
+ath12k_wmi_get_lowest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range)
+{
+ u32 lowest_freq = 0;
+ u8 phy_id;
+
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ if ((!lowest_freq && range[phy_id].low_5ghz_freq) ||
+ range[phy_id].low_5ghz_freq < lowest_freq)
+ lowest_freq = range[phy_id].low_5ghz_freq;
+ }
+
+ return lowest_freq ? lowest_freq : ATH12K_MIN_5GHZ_FREQ;
+}
+
+static void
+ath12k_wmi_fill_upper_share_sbs_freq(struct ath12k_base *ab,
+ u16 sbs_range_sep,
+ struct ath12k_hw_mode_freq_range_arg *ref_freq)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *upper_sbs_freq_range;
+ u8 phy_id;
+
+ upper_sbs_freq_range =
+ hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE];
+
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ upper_sbs_freq_range[phy_id].low_2ghz_freq =
+ ref_freq[phy_id].low_2ghz_freq;
+ upper_sbs_freq_range[phy_id].high_2ghz_freq =
+ ref_freq[phy_id].high_2ghz_freq;
+
+ /* update for shared mac */
+ if (upper_sbs_freq_range[phy_id].low_2ghz_freq) {
+ upper_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10;
+ upper_sbs_freq_range[phy_id].high_5ghz_freq =
+ ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq);
+ } else {
+ upper_sbs_freq_range[phy_id].low_5ghz_freq =
+ ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq);
+ upper_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep;
+ }
+ }
+}
+
+static void
+ath12k_wmi_fill_lower_share_sbs_freq(struct ath12k_base *ab,
+ u16 sbs_range_sep,
+ struct ath12k_hw_mode_freq_range_arg *ref_freq)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *lower_sbs_freq_range;
+ u8 phy_id;
+
+ lower_sbs_freq_range =
+ hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE];
+
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ lower_sbs_freq_range[phy_id].low_2ghz_freq =
+ ref_freq[phy_id].low_2ghz_freq;
+ lower_sbs_freq_range[phy_id].high_2ghz_freq =
+ ref_freq[phy_id].high_2ghz_freq;
+
+ /* update for shared mac */
+ if (lower_sbs_freq_range[phy_id].low_2ghz_freq) {
+ lower_sbs_freq_range[phy_id].low_5ghz_freq =
+ ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq);
+ lower_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep;
+ } else {
+ lower_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10;
+ lower_sbs_freq_range[phy_id].high_5ghz_freq =
+ ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq);
+ }
+ }
+}
+
+static const char *ath12k_wmi_hw_mode_to_str(enum ath12k_hw_mode hw_mode)
+{
+ static const char * const mode_str[] = {
+ [ATH12K_HW_MODE_SMM] = "SMM",
+ [ATH12K_HW_MODE_DBS] = "DBS",
+ [ATH12K_HW_MODE_SBS] = "SBS",
+ [ATH12K_HW_MODE_SBS_UPPER_SHARE] = "SBS_UPPER_SHARE",
+ [ATH12K_HW_MODE_SBS_LOWER_SHARE] = "SBS_LOWER_SHARE",
+ };
+
+ if (hw_mode >= ARRAY_SIZE(mode_str))
+ return "Unknown";
+
+ return mode_str[hw_mode];
+}
+
+static void
+ath12k_wmi_dump_freq_range_per_mac(struct ath12k_base *ab,
+ struct ath12k_hw_mode_freq_range_arg *freq_range,
+ enum ath12k_hw_mode hw_mode)
+{
+ u8 i;
+
+ for (i = 0; i < MAX_RADIOS; i++)
+ if (freq_range[i].low_2ghz_freq || freq_range[i].low_5ghz_freq)
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "frequency range: %s(%d) mac %d 2 GHz [%d - %d] 5 GHz [%d - %d]",
+ ath12k_wmi_hw_mode_to_str(hw_mode),
+ hw_mode, i,
+ freq_range[i].low_2ghz_freq,
+ freq_range[i].high_2ghz_freq,
+ freq_range[i].low_5ghz_freq,
+ freq_range[i].high_5ghz_freq);
+}
+
+static void ath12k_wmi_dump_freq_range(struct ath12k_base *ab)
+{
+ struct ath12k_hw_mode_freq_range_arg *freq_range;
+ u8 i;
+
+ for (i = ATH12K_HW_MODE_SMM; i < ATH12K_HW_MODE_MAX; i++) {
+ freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[i];
+ ath12k_wmi_dump_freq_range_per_mac(ab, freq_range, i);
+ }
+}
+
+static int ath12k_wmi_modify_sbs_freq(struct ath12k_base *ab, u8 phy_id)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *sbs_mac_range, *shared_mac_range;
+ struct ath12k_hw_mode_freq_range_arg *non_shared_range;
+ u8 shared_phy_id;
+
+ sbs_mac_range = &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][phy_id];
+
+ /* if SBS mac range has both 2.4 and 5 GHz ranges, i.e. shared phy_id
+ * keep the range as it is in SBS
+ */
+ if (sbs_mac_range->low_2ghz_freq && sbs_mac_range->low_5ghz_freq)
+ return 0;
+
+ if (sbs_mac_range->low_2ghz_freq && !sbs_mac_range->low_5ghz_freq) {
+ ath12k_err(ab, "Invalid DBS/SBS mode with only 2.4Ghz");
+ ath12k_wmi_dump_freq_range_per_mac(ab, sbs_mac_range, ATH12K_HW_MODE_SBS);
+ return -EINVAL;
+ }
+
+ non_shared_range = sbs_mac_range;
+ /* if SBS mac range has only 5 GHz then it's the non-shared phy, so
+ * modify the range as per the shared mac.
+ */
+ shared_phy_id = phy_id ? 0 : 1;
+ shared_mac_range =
+ &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][shared_phy_id];
+
+ if (shared_mac_range->low_5ghz_freq > non_shared_range->low_5ghz_freq) {
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "high 5 GHz shared");
+ /* If the shared mac lower 5 GHz frequency is greater than
+ * non-shared mac lower 5 GHz frequency then the shared mac has
+ * high 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz high
+ * freq should be less than the shared mac's low 5 GHz freq.
+ */
+ if (non_shared_range->high_5ghz_freq >=
+ shared_mac_range->low_5ghz_freq)
+ non_shared_range->high_5ghz_freq =
+ max_t(u32, shared_mac_range->low_5ghz_freq - 10,
+ non_shared_range->low_5ghz_freq);
+ } else if (shared_mac_range->high_5ghz_freq <
+ non_shared_range->high_5ghz_freq) {
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "low 5 GHz shared");
+ /* If the shared mac high 5 GHz frequency is less than
+ * non-shared mac high 5 GHz frequency then the shared mac has
+ * low 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz low
+ * freq should be greater than the shared mac's high 5 GHz freq.
+ */
+ if (shared_mac_range->high_5ghz_freq >=
+ non_shared_range->low_5ghz_freq)
+ non_shared_range->low_5ghz_freq =
+ min_t(u32, shared_mac_range->high_5ghz_freq + 10,
+ non_shared_range->high_5ghz_freq);
+ } else {
+ ath12k_warn(ab, "invalid SBS range with all 5 GHz shared");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void ath12k_wmi_update_sbs_freq_info(struct ath12k_base *ab)
+{
+ struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info;
+ struct ath12k_hw_mode_freq_range_arg *mac_range;
+ u16 sbs_range_sep;
+ u8 phy_id;
+ int ret;
+
+ mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS];
+
+ /* If sbs_lower_band_end_freq has a value, then the frequency range
+ * will be split using that value.
+ */
+ sbs_range_sep = ab->wmi_ab.sbs_lower_band_end_freq;
+ if (sbs_range_sep) {
+ ath12k_wmi_fill_upper_share_sbs_freq(ab, sbs_range_sep,
+ mac_range);
+ ath12k_wmi_fill_lower_share_sbs_freq(ab, sbs_range_sep,
+ mac_range);
+ /* Hardware specifies the range boundary with sbs_range_sep,
+ * (i.e. the boundary between 5 GHz high and 5 GHz low),
+ * reset the original one to make sure it will not get used.
+ */
+ memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS);
+ return;
+ }
+
+ /* If sbs_lower_band_end_freq is not set that means firmware will send one
+ * shared mac range and one non-shared mac range. so update that freq.
+ */
+ for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) {
+ ret = ath12k_wmi_modify_sbs_freq(ab, phy_id);
+ if (ret) {
+ memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS);
+ break;
+ }
+ }
+}
+
+static void
+ath12k_wmi_update_mac_freq_info(struct ath12k_base *ab,
+ enum wmi_host_hw_mode_config_type hw_config_type,
+ u32 phy_id,
+ struct ath12k_svc_ext_mac_phy_info *mac_cap)
+{
+ if (phy_id >= MAX_RADIOS) {
+ ath12k_err(ab, "mac more than two not supported: %d", phy_id);
+ return;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "hw_mode_cfg %d mac %d band 0x%x SBS cutoff freq %d 2 GHz [%d - %d] 5 GHz [%d - %d]",
+ hw_config_type, phy_id, mac_cap->supported_bands,
+ ab->wmi_ab.sbs_lower_band_end_freq,
+ mac_cap->hw_freq_range.low_2ghz_freq,
+ mac_cap->hw_freq_range.high_2ghz_freq,
+ mac_cap->hw_freq_range.low_5ghz_freq,
+ mac_cap->hw_freq_range.high_5ghz_freq);
+
+ switch (hw_config_type) {
+ case WMI_HOST_HW_MODE_SINGLE:
+ if (phy_id) {
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "mac phy 1 is not supported");
+ break;
+ }
+ ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SMM, phy_id);
+ break;
+
+ case WMI_HOST_HW_MODE_DBS:
+ if (!ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS))
+ ath12k_wmi_update_freq_info(ab, mac_cap,
+ ATH12K_HW_MODE_DBS, phy_id);
+ break;
+ case WMI_HOST_HW_MODE_DBS_SBS:
+ case WMI_HOST_HW_MODE_DBS_OR_SBS:
+ ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_DBS, phy_id);
+ if (ab->wmi_ab.sbs_lower_band_end_freq ||
+ mac_cap->hw_freq_range.low_5ghz_freq ||
+ mac_cap->hw_freq_range.low_2ghz_freq)
+ ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS,
+ phy_id);
+
+ if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS))
+ ath12k_wmi_update_dbs_freq_info(ab);
+ if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS))
+ ath12k_wmi_update_sbs_freq_info(ab);
+ break;
+ case WMI_HOST_HW_MODE_SBS:
+ case WMI_HOST_HW_MODE_SBS_PASSIVE:
+ ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, phy_id);
+ if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS))
+ ath12k_wmi_update_sbs_freq_info(ab);
+
+ break;
+ default:
+ break;
+ }
+}
+
+static bool ath12k_wmi_sbs_range_present(struct ath12k_base *ab)
+{
+ if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS) ||
+ (ab->wmi_ab.sbs_lower_band_end_freq &&
+ ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_LOWER_SHARE) &&
+ ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_UPPER_SHARE)))
+ return true;
+
+ return false;
+}
+
+static int ath12k_wmi_update_hw_mode_list(struct ath12k_base *ab)
+{
+ struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info;
+ struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info;
+ enum wmi_host_hw_mode_config_type hw_config_type;
+ struct ath12k_svc_ext_mac_phy_info *tmp;
+ bool dbs_mode = false, sbs_mode = false;
+ u32 i, j = 0;
+
+ if (!svc_ext_info->num_hw_modes) {
+ ath12k_err(ab, "invalid number of hw modes");
+ return -EINVAL;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "updated HW mode list: num modes %d",
+ svc_ext_info->num_hw_modes);
+
+ memset(info->freq_range_caps, 0, sizeof(info->freq_range_caps));
+
+ for (i = 0; i < svc_ext_info->num_hw_modes; i++) {
+ if (j >= ATH12K_MAX_MAC_PHY_CAP)
+ return -EINVAL;
+
+ /* Update for MAC0 */
+ tmp = &svc_ext_info->mac_phy_info[j++];
+ hw_config_type = tmp->hw_mode_config_type;
+ ath12k_wmi_update_mac_freq_info(ab, hw_config_type, tmp->phy_id, tmp);
+
+ /* SBS and DBS have dual MAC. Up to 2 MACs are considered. */
+ if (hw_config_type == WMI_HOST_HW_MODE_DBS ||
+ hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE ||
+ hw_config_type == WMI_HOST_HW_MODE_SBS ||
+ hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) {
+ if (j >= ATH12K_MAX_MAC_PHY_CAP)
+ return -EINVAL;
+ /* Update for MAC1 */
+ tmp = &svc_ext_info->mac_phy_info[j++];
+ ath12k_wmi_update_mac_freq_info(ab, hw_config_type,
+ tmp->phy_id, tmp);
+
+ if (hw_config_type == WMI_HOST_HW_MODE_DBS ||
+ hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS)
+ dbs_mode = true;
+
+ if (ath12k_wmi_sbs_range_present(ab) &&
+ (hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE ||
+ hw_config_type == WMI_HOST_HW_MODE_SBS ||
+ hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS))
+ sbs_mode = true;
+ }
+ }
+
+ info->support_dbs = dbs_mode;
+ info->support_sbs = sbs_mode;
+
+ ath12k_wmi_dump_freq_range(ab);
+
+ return 0;
+}
+
static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab,
u16 tag, u16 len,
const void *ptr, void *data)
{
+ const struct ath12k_wmi_dbs_or_sbs_cap_params *dbs_or_sbs_caps;
struct ath12k_wmi_pdev *wmi_handle = &ab->wmi_ab.wmi[0];
struct ath12k_wmi_svc_rdy_ext2_parse *parse = data;
int ret;
@@ -4967,7 +5476,32 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab,
}
parse->mac_phy_caps_ext_done = true;
+ } else if (!parse->hal_reg_caps_ext2_done) {
+ parse->hal_reg_caps_ext2_done = true;
+ } else if (!parse->scan_radio_caps_ext2_done) {
+ parse->scan_radio_caps_ext2_done = true;
+ } else if (!parse->twt_caps_done) {
+ parse->twt_caps_done = true;
+ } else if (!parse->htt_msdu_idx_to_qtype_map_done) {
+ parse->htt_msdu_idx_to_qtype_map_done = true;
+ } else if (!parse->dbs_or_sbs_cap_ext_done) {
+ dbs_or_sbs_caps = ptr;
+ ab->wmi_ab.sbs_lower_band_end_freq =
+ __le32_to_cpu(dbs_or_sbs_caps->sbs_lower_band_end_freq);
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "sbs_lower_band_end_freq %u\n",
+ ab->wmi_ab.sbs_lower_band_end_freq);
+
+ ret = ath12k_wmi_update_hw_mode_list(ab);
+ if (ret) {
+ ath12k_warn(ab, "failed to update hw mode list: %d\n",
+ ret);
+ return ret;
+ }
+
+ parse->dbs_or_sbs_cap_ext_done = true;
}
+
break;
default:
break;
@@ -7626,6 +8160,64 @@ static int ath12k_wmi_pull_fw_stats(struct ath12k_base *ab, struct sk_buff *skb,
&parse);
}
+static void ath12k_wmi_fw_stats_process(struct ath12k *ar,
+ struct ath12k_fw_stats *stats)
+{
+ struct ath12k_base *ab = ar->ab;
+ struct ath12k_pdev *pdev;
+ bool is_end = true;
+ size_t total_vdevs_started = 0;
+ int i;
+
+ if (stats->stats_id == WMI_REQUEST_VDEV_STAT) {
+ if (list_empty(&stats->vdevs)) {
+ ath12k_warn(ab, "empty vdev stats");
+ return;
+ }
+ /* FW sends all the active VDEV stats irrespective of PDEV,
+ * hence limit until the count of all VDEVs started
+ */
+ rcu_read_lock();
+ for (i = 0; i < ab->num_radios; i++) {
+ pdev = rcu_dereference(ab->pdevs_active[i]);
+ if (pdev && pdev->ar)
+ total_vdevs_started += pdev->ar->num_started_vdevs;
+ }
+ rcu_read_unlock();
+
+ if (total_vdevs_started)
+ is_end = ((++ar->fw_stats.num_vdev_recvd) ==
+ total_vdevs_started);
+
+ list_splice_tail_init(&stats->vdevs,
+ &ar->fw_stats.vdevs);
+
+ if (is_end)
+ complete(&ar->fw_stats_done);
+
+ return;
+ }
+
+ if (stats->stats_id == WMI_REQUEST_BCN_STAT) {
+ if (list_empty(&stats->bcn)) {
+ ath12k_warn(ab, "empty beacon stats");
+ return;
+ }
+ /* Mark end until we reached the count of all started VDEVs
+ * within the PDEV
+ */
+ if (ar->num_started_vdevs)
+ is_end = ((++ar->fw_stats.num_bcn_recvd) ==
+ ar->num_started_vdevs);
+
+ list_splice_tail_init(&stats->bcn,
+ &ar->fw_stats.bcn);
+
+ if (is_end)
+ complete(&ar->fw_stats_done);
+ }
+}
+
static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *skb)
{
struct ath12k_fw_stats stats = {};
@@ -7655,19 +8247,15 @@ static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *sk
spin_lock_bh(&ar->data_lock);
- /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via
- * debugfs fw stats. Therefore, processing it separately.
- */
+ /* Handle WMI_REQUEST_PDEV_STAT status update */
if (stats.stats_id == WMI_REQUEST_PDEV_STAT) {
list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs);
- ar->fw_stats.fw_stats_done = true;
+ complete(&ar->fw_stats_done);
goto complete;
}
- /* WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT are currently requested only
- * via debugfs fw stats. Hence, processing these in debugfs context.
- */
- ath12k_debugfs_fw_stats_process(ar, &stats);
+ /* Handle WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT updates. */
+ ath12k_wmi_fw_stats_process(ar, &stats);
complete:
complete(&ar->fw_stats_complete);
@@ -9911,3 +10499,224 @@ int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar,
return 0;
}
+
+static int
+ath12k_wmi_fill_disallowed_bmap(struct ath12k_base *ab,
+ struct wmi_disallowed_mlo_mode_bitmap_params *dislw_bmap,
+ struct wmi_mlo_link_set_active_arg *arg)
+{
+ struct wmi_ml_disallow_mode_bmap_arg *dislw_bmap_arg;
+ u8 i;
+
+ if (arg->num_disallow_mode_comb >
+ ARRAY_SIZE(arg->disallow_bmap)) {
+ ath12k_warn(ab, "invalid num_disallow_mode_comb: %d",
+ arg->num_disallow_mode_comb);
+ return -EINVAL;
+ }
+
+ dislw_bmap_arg = &arg->disallow_bmap[0];
+ for (i = 0; i < arg->num_disallow_mode_comb; i++) {
+ dislw_bmap->tlv_header =
+ ath12k_wmi_tlv_cmd_hdr(0, sizeof(*dislw_bmap));
+ dislw_bmap->disallowed_mode_bitmap =
+ cpu_to_le32(dislw_bmap_arg->disallowed_mode);
+ dislw_bmap->ieee_link_id_comb =
+ le32_encode_bits(dislw_bmap_arg->ieee_link_id[0],
+ WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1) |
+ le32_encode_bits(dislw_bmap_arg->ieee_link_id[1],
+ WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2) |
+ le32_encode_bits(dislw_bmap_arg->ieee_link_id[2],
+ WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3) |
+ le32_encode_bits(dislw_bmap_arg->ieee_link_id[3],
+ WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4);
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "entry %d disallowed_mode %d ieee_link_id_comb 0x%x",
+ i, dislw_bmap_arg->disallowed_mode,
+ dislw_bmap_arg->ieee_link_id_comb);
+ dislw_bmap++;
+ dislw_bmap_arg++;
+ }
+
+ return 0;
+}
+
+int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab,
+ struct wmi_mlo_link_set_active_arg *arg)
+{
+ struct wmi_disallowed_mlo_mode_bitmap_params *disallowed_mode_bmap;
+ struct wmi_mlo_set_active_link_number_params *link_num_param;
+ u32 num_link_num_param = 0, num_vdev_bitmap = 0;
+ struct ath12k_wmi_base *wmi_ab = &ab->wmi_ab;
+ struct wmi_mlo_link_set_active_cmd *cmd;
+ u32 num_inactive_vdev_bitmap = 0;
+ u32 num_disallow_mode_comb = 0;
+ struct wmi_tlv *tlv;
+ struct sk_buff *skb;
+ __le32 *vdev_bitmap;
+ void *buf_ptr;
+ int i, ret;
+ u32 len;
+
+ if (!arg->num_vdev_bitmap && !arg->num_link_entry) {
+ ath12k_warn(ab, "Invalid num_vdev_bitmap and num_link_entry");
+ return -EINVAL;
+ }
+
+ switch (arg->force_mode) {
+ case WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM:
+ case WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM:
+ num_link_num_param = arg->num_link_entry;
+ fallthrough;
+ case WMI_MLO_LINK_FORCE_MODE_ACTIVE:
+ case WMI_MLO_LINK_FORCE_MODE_INACTIVE:
+ case WMI_MLO_LINK_FORCE_MODE_NO_FORCE:
+ num_vdev_bitmap = arg->num_vdev_bitmap;
+ break;
+ case WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE:
+ num_vdev_bitmap = arg->num_vdev_bitmap;
+ num_inactive_vdev_bitmap = arg->num_inactive_vdev_bitmap;
+ break;
+ default:
+ ath12k_warn(ab, "Invalid force mode: %u", arg->force_mode);
+ return -EINVAL;
+ }
+
+ num_disallow_mode_comb = arg->num_disallow_mode_comb;
+ len = sizeof(*cmd) +
+ TLV_HDR_SIZE + sizeof(*link_num_param) * num_link_num_param +
+ TLV_HDR_SIZE + sizeof(*vdev_bitmap) * num_vdev_bitmap +
+ TLV_HDR_SIZE + TLV_HDR_SIZE + TLV_HDR_SIZE +
+ TLV_HDR_SIZE + sizeof(*disallowed_mode_bmap) * num_disallow_mode_comb;
+ if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE)
+ len += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap;
+
+ skb = ath12k_wmi_alloc_skb(wmi_ab, len);
+ if (!skb)
+ return -ENOMEM;
+
+ cmd = (struct wmi_mlo_link_set_active_cmd *)skb->data;
+ cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_LINK_SET_ACTIVE_CMD,
+ sizeof(*cmd));
+ cmd->force_mode = cpu_to_le32(arg->force_mode);
+ cmd->reason = cpu_to_le32(arg->reason);
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "mode %d reason %d num_link_num_param %d num_vdev_bitmap %d inactive %d num_disallow_mode_comb %d",
+ arg->force_mode, arg->reason, num_link_num_param,
+ num_vdev_bitmap, num_inactive_vdev_bitmap,
+ num_disallow_mode_comb);
+
+ buf_ptr = skb->data + sizeof(*cmd);
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT,
+ sizeof(*link_num_param) * num_link_num_param);
+ buf_ptr += TLV_HDR_SIZE;
+
+ if (num_link_num_param) {
+ cmd->ctrl_flags =
+ le32_encode_bits(arg->ctrl_flags.dync_force_link_num ? 1 : 0,
+ CRTL_F_DYNC_FORCE_LINK_NUM);
+
+ link_num_param = buf_ptr;
+ for (i = 0; i < num_link_num_param; i++) {
+ link_num_param->tlv_header =
+ ath12k_wmi_tlv_cmd_hdr(0, sizeof(*link_num_param));
+ link_num_param->num_of_link =
+ cpu_to_le32(arg->link_num[i].num_of_link);
+ link_num_param->vdev_type =
+ cpu_to_le32(arg->link_num[i].vdev_type);
+ link_num_param->vdev_subtype =
+ cpu_to_le32(arg->link_num[i].vdev_subtype);
+ link_num_param->home_freq =
+ cpu_to_le32(arg->link_num[i].home_freq);
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "entry %d num_of_link %d vdev type %d subtype %d freq %d control_flags %d",
+ i, arg->link_num[i].num_of_link,
+ arg->link_num[i].vdev_type,
+ arg->link_num[i].vdev_subtype,
+ arg->link_num[i].home_freq,
+ __le32_to_cpu(cmd->ctrl_flags));
+ link_num_param++;
+ }
+
+ buf_ptr += sizeof(*link_num_param) * num_link_num_param;
+ }
+
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32,
+ sizeof(*vdev_bitmap) * num_vdev_bitmap);
+ buf_ptr += TLV_HDR_SIZE;
+
+ if (num_vdev_bitmap) {
+ vdev_bitmap = buf_ptr;
+ for (i = 0; i < num_vdev_bitmap; i++) {
+ vdev_bitmap[i] = cpu_to_le32(arg->vdev_bitmap[i]);
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "entry %d vdev_id_bitmap 0x%x",
+ i, arg->vdev_bitmap[i]);
+ }
+
+ buf_ptr += sizeof(*vdev_bitmap) * num_vdev_bitmap;
+ }
+
+ if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) {
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32,
+ sizeof(*vdev_bitmap) *
+ num_inactive_vdev_bitmap);
+ buf_ptr += TLV_HDR_SIZE;
+
+ if (num_inactive_vdev_bitmap) {
+ vdev_bitmap = buf_ptr;
+ for (i = 0; i < num_inactive_vdev_bitmap; i++) {
+ vdev_bitmap[i] =
+ cpu_to_le32(arg->inactive_vdev_bitmap[i]);
+ ath12k_dbg(ab, ATH12K_DBG_WMI,
+ "entry %d inactive_vdev_id_bitmap 0x%x",
+ i, arg->inactive_vdev_bitmap[i]);
+ }
+
+ buf_ptr += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap;
+ }
+ } else {
+ /* add empty vdev bitmap2 tlv */
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0);
+ buf_ptr += TLV_HDR_SIZE;
+ }
+
+ /* add empty ieee_link_id_bitmap tlv */
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0);
+ buf_ptr += TLV_HDR_SIZE;
+
+ /* add empty ieee_link_id_bitmap2 tlv */
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0);
+ buf_ptr += TLV_HDR_SIZE;
+
+ tlv = buf_ptr;
+ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT,
+ sizeof(*disallowed_mode_bmap) *
+ arg->num_disallow_mode_comb);
+ buf_ptr += TLV_HDR_SIZE;
+
+ ret = ath12k_wmi_fill_disallowed_bmap(ab, buf_ptr, arg);
+ if (ret)
+ goto free_skb;
+
+ ret = ath12k_wmi_cmd_send(&wmi_ab->wmi[0], skb, WMI_MLO_LINK_SET_ACTIVE_CMDID);
+ if (ret) {
+ ath12k_warn(ab,
+ "failed to send WMI_MLO_LINK_SET_ACTIVE_CMDID: %d\n", ret);
+ goto free_skb;
+ }
+
+ ath12k_dbg(ab, ATH12K_DBG_WMI, "WMI mlo link set active cmd");
+
+ return ret;
+
+free_skb:
+ dev_kfree_skb(skb);
+ return ret;
+}
diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h
index ac18f75e0449..c640ffa180c8 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.h
+++ b/drivers/net/wireless/ath/ath12k/wmi.h
@@ -1974,6 +1974,7 @@ enum wmi_tlv_tag {
WMI_TAG_TPC_STATS_CTL_PWR_TABLE_EVENT,
WMI_TAG_VDEV_SET_TPC_POWER_CMD = 0x3B5,
WMI_TAG_VDEV_CH_POWER_INFO,
+ WMI_TAG_MLO_LINK_SET_ACTIVE_CMD = 0x3BE,
WMI_TAG_EHT_RATE_SET = 0x3C4,
WMI_TAG_DCS_AWGN_INT_TYPE = 0x3C5,
WMI_TAG_MLO_TX_SEND_PARAMS,
@@ -2617,6 +2618,8 @@ struct ath12k_wmi_soc_mac_phy_hw_mode_caps_params {
__le32 num_chainmask_tables;
} __packed;
+#define WMI_HW_MODE_CAP_CFG_TYPE GENMASK(27, 0)
+
struct ath12k_wmi_hw_mode_cap_params {
__le32 tlv_header;
__le32 hw_mode_id;
@@ -2666,6 +2669,12 @@ struct ath12k_wmi_mac_phy_caps_params {
__le32 he_cap_info_2g_ext;
__le32 he_cap_info_5g_ext;
__le32 he_cap_info_internal;
+ __le32 wireless_modes;
+ __le32 low_2ghz_chan_freq;
+ __le32 high_2ghz_chan_freq;
+ __le32 low_5ghz_chan_freq;
+ __le32 high_5ghz_chan_freq;
+ __le32 nss_ratio;
} __packed;
struct ath12k_wmi_hal_reg_caps_ext_params {
@@ -2739,6 +2748,11 @@ struct wmi_service_ready_ext2_event {
__le32 default_num_msduq_supported_per_tid;
} __packed;
+struct ath12k_wmi_dbs_or_sbs_cap_params {
+ __le32 hw_mode_id;
+ __le32 sbs_lower_band_end_freq;
+} __packed;
+
struct ath12k_wmi_caps_ext_params {
__le32 hw_mode_id;
__le32 pdev_and_hw_link_ids;
@@ -5049,6 +5063,53 @@ struct ath12k_wmi_pdev {
u32 rx_decap_mode;
};
+struct ath12k_hw_mode_freq_range_arg {
+ u32 low_2ghz_freq;
+ u32 high_2ghz_freq;
+ u32 low_5ghz_freq;
+ u32 high_5ghz_freq;
+};
+
+struct ath12k_svc_ext_mac_phy_info {
+ enum wmi_host_hw_mode_config_type hw_mode_config_type;
+ u32 phy_id;
+ u32 supported_bands;
+ struct ath12k_hw_mode_freq_range_arg hw_freq_range;
+};
+
+#define ATH12K_MAX_MAC_PHY_CAP 8
+
+struct ath12k_svc_ext_info {
+ u32 num_hw_modes;
+ struct ath12k_svc_ext_mac_phy_info mac_phy_info[ATH12K_MAX_MAC_PHY_CAP];
+};
+
+/**
+ * enum ath12k_hw_mode - enum for host mode
+ * @ATH12K_HW_MODE_SMM: Single mac mode
+ * @ATH12K_HW_MODE_DBS: DBS mode
+ * @ATH12K_HW_MODE_SBS: SBS mode with either high share or low share
+ * @ATH12K_HW_MODE_SBS_UPPER_SHARE: Higher 5 GHz shared with 2.4 GHz
+ * @ATH12K_HW_MODE_SBS_LOWER_SHARE: Lower 5 GHz shared with 2.4 GHz
+ * @ATH12K_HW_MODE_MAX: Max, used to indicate invalid mode
+ */
+enum ath12k_hw_mode {
+ ATH12K_HW_MODE_SMM,
+ ATH12K_HW_MODE_DBS,
+ ATH12K_HW_MODE_SBS,
+ ATH12K_HW_MODE_SBS_UPPER_SHARE,
+ ATH12K_HW_MODE_SBS_LOWER_SHARE,
+ ATH12K_HW_MODE_MAX,
+};
+
+struct ath12k_hw_mode_info {
+ bool support_dbs:1;
+ bool support_sbs:1;
+
+ struct ath12k_hw_mode_freq_range_arg freq_range_caps[ATH12K_HW_MODE_MAX]
+ [MAX_RADIOS];
+};
+
struct ath12k_wmi_base {
struct ath12k_base *ab;
struct ath12k_wmi_pdev wmi[MAX_RADIOS];
@@ -5066,6 +5127,10 @@ struct ath12k_wmi_base {
enum wmi_host_hw_mode_config_type preferred_hw_mode;
struct ath12k_wmi_target_cap_arg *targ_cap;
+
+ struct ath12k_svc_ext_info svc_ext_info;
+ u32 sbs_lower_band_end_freq;
+ struct ath12k_hw_mode_info hw_mode_info;
};
struct wmi_pdev_set_bios_interface_cmd {
@@ -5997,6 +6062,118 @@ struct wmi_vdev_set_tpc_power_cmd {
*/
} __packed;
+#define CRTL_F_DYNC_FORCE_LINK_NUM GENMASK(3, 2)
+
+struct wmi_mlo_link_set_active_cmd {
+ __le32 tlv_header;
+ __le32 force_mode;
+ __le32 reason;
+ __le32 use_ieee_link_id_bitmap;
+ struct ath12k_wmi_mac_addr_params ap_mld_mac_addr;
+ __le32 ctrl_flags;
+} __packed;
+
+struct wmi_mlo_set_active_link_number_params {
+ __le32 tlv_header;
+ __le32 num_of_link;
+ __le32 vdev_type;
+ __le32 vdev_subtype;
+ __le32 home_freq;
+} __packed;
+
+#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1 GENMASK(7, 0)
+#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2 GENMASK(15, 8)
+#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3 GENMASK(23, 16)
+#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4 GENMASK(31, 24)
+
+struct wmi_disallowed_mlo_mode_bitmap_params {
+ __le32 tlv_header;
+ __le32 disallowed_mode_bitmap;
+ __le32 ieee_link_id_comb;
+} __packed;
+
+enum wmi_mlo_link_force_mode {
+ WMI_MLO_LINK_FORCE_MODE_ACTIVE = 1,
+ WMI_MLO_LINK_FORCE_MODE_INACTIVE = 2,
+ WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM = 3,
+ WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM = 4,
+ WMI_MLO_LINK_FORCE_MODE_NO_FORCE = 5,
+ WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE = 6,
+ WMI_MLO_LINK_FORCE_MODE_NON_FORCE_UPDATE = 7,
+};
+
+enum wmi_mlo_link_force_reason {
+ WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT = 1,
+ WMI_MLO_LINK_FORCE_REASON_NEW_DISCONNECT = 2,
+ WMI_MLO_LINK_FORCE_REASON_LINK_REMOVAL = 3,
+ WMI_MLO_LINK_FORCE_REASON_TDLS = 4,
+ WMI_MLO_LINK_FORCE_REASON_REVERT_FAILURE = 5,
+ WMI_MLO_LINK_FORCE_REASON_LINK_DELETE = 6,
+ WMI_MLO_LINK_FORCE_REASON_SINGLE_LINK_EMLSR_OP = 7,
+};
+
+struct wmi_mlo_link_num_arg {
+ u32 num_of_link;
+ u32 vdev_type;
+ u32 vdev_subtype;
+ u32 home_freq;
+};
+
+struct wmi_mlo_control_flags_arg {
+ bool overwrite_force_active_bitmap;
+ bool overwrite_force_inactive_bitmap;
+ bool dync_force_link_num;
+ bool post_re_evaluate;
+ u8 post_re_evaluate_loops;
+ bool dont_reschedule_workqueue;
+};
+
+struct wmi_ml_link_force_cmd_arg {
+ u8 ap_mld_mac_addr[ETH_ALEN];
+ u16 ieee_link_id_bitmap;
+ u16 ieee_link_id_bitmap2;
+ u8 link_num;
+};
+
+struct wmi_ml_disallow_mode_bmap_arg {
+ u32 disallowed_mode;
+ union {
+ u32 ieee_link_id_comb;
+ u8 ieee_link_id[4];
+ };
+};
+
+/* maximum size of link number param array
+ * for MLO link set active command
+ */
+#define WMI_MLO_LINK_NUM_SZ 2
+
+/* maximum size of vdev bitmap array for
+ * MLO link set active command
+ */
+#define WMI_MLO_VDEV_BITMAP_SZ 2
+
+/* Max number of disallowed bitmap combination
+ * sent to firmware
+ */
+#define WMI_ML_MAX_DISALLOW_BMAP_COMB 4
+
+struct wmi_mlo_link_set_active_arg {
+ enum wmi_mlo_link_force_mode force_mode;
+ enum wmi_mlo_link_force_reason reason;
+ u32 num_link_entry;
+ u32 num_vdev_bitmap;
+ u32 num_inactive_vdev_bitmap;
+ struct wmi_mlo_link_num_arg link_num[WMI_MLO_LINK_NUM_SZ];
+ u32 vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ];
+ u32 inactive_vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ];
+ struct wmi_mlo_control_flags_arg ctrl_flags;
+ bool use_ieee_link_id;
+ struct wmi_ml_link_force_cmd_arg force_cmd;
+ u32 num_disallow_mode_comb;
+ struct wmi_ml_disallow_mode_bmap_arg disallow_bmap[WMI_ML_MAX_DISALLOW_BMAP_COMB];
+};
+
void ath12k_wmi_init_qcn9274(struct ath12k_base *ab,
struct ath12k_wmi_resource_config_arg *config);
void ath12k_wmi_init_wcn7850(struct ath12k_base *ab,
@@ -6195,5 +6372,6 @@ bool ath12k_wmi_supports_6ghz_cc_ext(struct ath12k *ar);
int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar,
u32 vdev_id,
struct ath12k_reg_tpc_power_info *param);
-
+int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab,
+ struct wmi_mlo_link_set_active_arg *param);
#endif
diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c
index af98e871199d..5a9e93fd1ef4 100644
--- a/drivers/net/wireless/ath/ath6kl/bmi.c
+++ b/drivers/net/wireless/ath/ath6kl/bmi.c
@@ -87,7 +87,9 @@ int ath6kl_bmi_get_target_info(struct ath6kl *ar,
* We need to do some backwards compatibility to make this work.
*/
if (le32_to_cpu(targ_info->byte_count) != sizeof(*targ_info)) {
- WARN_ON(1);
+ ath6kl_err("mismatched byte count %d vs. expected %zd\n",
+ le32_to_cpu(targ_info->byte_count),
+ sizeof(*targ_info));
return -EINVAL;
}
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index a3e03580cd9f..564ca6a61985 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -438,14 +438,21 @@ static void carl9170_usb_rx_complete(struct urb *urb)
if (atomic_read(&ar->rx_anch_urbs) == 0) {
/*
- * The system is too slow to cope with
- * the enormous workload. We have simply
- * run out of active rx urbs and this
- * unfortunately leads to an unpredictable
- * device.
+ * At this point, either the system is too slow to
+ * cope with the enormous workload (so we have simply
+ * run out of active rx urbs and this unfortunately
+ * leads to an unpredictable device), or the device
+ * is not fully functional after an unsuccessful
+ * firmware loading attempts (so it doesn't pass
+ * ieee80211_register_hw() and there is no internal
+ * workqueue at all).
*/
- ieee80211_queue_work(ar->hw, &ar->ping_work);
+ if (ar->registered)
+ ieee80211_queue_work(ar->hw, &ar->ping_work);
+ else
+ pr_warn_once("device %s is not registered\n",
+ dev_name(&ar->udev->dev));
}
} else {
/*
diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index 67172385a5d6..89d4394cedcf 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -179,9 +179,11 @@ void wil_mask_irq(struct wil6210_priv *wil)
wil_dbg_irq(wil, "mask_irq\n");
wil6210_mask_irq_tx(wil);
- wil6210_mask_irq_tx_edma(wil);
+ if (wil->use_enhanced_dma_hw)
+ wil6210_mask_irq_tx_edma(wil);
wil6210_mask_irq_rx(wil);
- wil6210_mask_irq_rx_edma(wil);
+ if (wil->use_enhanced_dma_hw)
+ wil6210_mask_irq_rx_edma(wil);
wil6210_mask_irq_misc(wil, true);
wil6210_mask_irq_pseudo(wil);
}
@@ -190,10 +192,12 @@ void wil_unmask_irq(struct wil6210_priv *wil)
{
wil_dbg_irq(wil, "unmask_irq\n");
- wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC),
- WIL_ICR_ICC_VALUE);
- wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC),
- WIL_ICR_ICC_VALUE);
+ if (wil->use_enhanced_dma_hw) {
+ wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC),
+ WIL_ICR_ICC_VALUE);
+ wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC),
+ WIL_ICR_ICC_VALUE);
+ }
wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICC),
WIL_ICR_ICC_MISC_VALUE);
wil_w(wil, RGF_INT_GEN_TX_ICR + offsetof(struct RGF_ICR, ICC),
@@ -845,10 +849,12 @@ void wil6210_clear_irq(struct wil6210_priv *wil)
offsetof(struct RGF_ICR, ICR));
wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_TX_ICR) +
offsetof(struct RGF_ICR, ICR));
- wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) +
- offsetof(struct RGF_ICR, ICR));
- wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) +
- offsetof(struct RGF_ICR, ICR));
+ if (wil->use_enhanced_dma_hw) {
+ wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) +
+ offsetof(struct RGF_ICR, ICR));
+ wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) +
+ offsetof(struct RGF_ICR, ICR));
+ }
wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_MISC_ICR) +
offsetof(struct RGF_ICR, ICR));
wmb(); /* make sure write completed */
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
index 0e5130d1fccd..031d88bf6393 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
@@ -203,7 +203,8 @@ il4965_rs_extract_rate(u32 rate_n_flags)
return (u8) (rate_n_flags & 0xFF);
}
-static void
+/* noinline works around https://github.com/llvm/llvm-project/issues/143908 */
+static noinline_for_stack void
il4965_rs_rate_scale_clear_win(struct il_rate_scale_data *win)
{
win->data = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index dbfd45948e8b..66211426aa3a 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1316,6 +1316,7 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans,
sizeof(trans->conf.no_reclaim_cmds));
memcpy(trans->conf.no_reclaim_cmds, no_reclaim_cmds,
sizeof(no_reclaim_cmds));
+ trans->conf.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds);
switch (iwlwifi_mod_params.amsdu_size) {
case IWL_AMSDU_DEF:
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 5cdc09d465d4..e90f3187e55c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -754,7 +754,7 @@ struct iwl_lari_config_change_cmd_v10 {
* according to the BIOS definitions.
* For LARI cmd version 11 - bits 0:4 are supported.
* For LARI cmd version 12 - bits 0:6 are supported and bits 7:31 are
- * reserved. No need to mask out the reserved bits.
+ * reserved.
* @force_disable_channels_bitmap: Bitmap of disabled bands/channels.
* Each bit represents a set of channels in a specific band that should be
* disabled
@@ -787,6 +787,7 @@ struct iwl_lari_config_change_cmd {
/* Activate UNII-1 (5.2GHz) for World Wide */
#define ACTIVATE_5G2_IN_WW_MASK BIT(4)
#define CHAN_STATE_ACTIVE_BITMAP_CMD_V11 0x1F
+#define CHAN_STATE_ACTIVE_BITMAP_CMD_V12 0x7F
/**
* struct iwl_pnvm_init_complete_ntfy - PNVM initialization complete
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c
index 74b90bd92c48..ebfba981cf89 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c
@@ -614,6 +614,7 @@ int iwl_fill_lari_config(struct iwl_fw_runtime *fwrt,
ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ACTIVATE_CHANNEL, &value);
if (!ret) {
+ value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V12;
if (cmd_ver < 8)
value &= ~ACTIVATE_5G2_IN_WW_MASK;
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
index e8820e7cf8fa..1774bb84dd3f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
@@ -77,6 +77,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans,
/* Setup async RX handling */
spin_lock_init(&mld->async_handlers_lock);
+ INIT_LIST_HEAD(&mld->async_handlers_list);
wiphy_work_init(&mld->async_handlers_wk,
iwl_mld_async_handlers_wk);
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c
index 326c300470ea..436219d1ec5e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c
+++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c
@@ -251,8 +251,10 @@ void iwl_mld_configure_lari(struct iwl_mld *mld)
cpu_to_le32(value &= DSM_UNII4_ALLOW_BITMAP);
ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ACTIVATE_CHANNEL, &value);
- if (!ret)
+ if (!ret) {
+ value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V12;
cmd.chan_state_active_bitmap = cpu_to_le32(value);
+ }
ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ENABLE_6E, &value);
if (!ret)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
index 81ca9ff67be9..3f8b840871d3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c
@@ -32,9 +32,9 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm,
unsigned int link_id;
int cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
WIDE_ID(MAC_CONF_GROUP,
- MAC_CONFIG_CMD), 0);
+ MAC_CONFIG_CMD), 1);
- if (WARN_ON(cmd_ver < 1 && cmd_ver > 3))
+ if (WARN_ON(cmd_ver > 3))
return;
cmd->id_and_color = cpu_to_le32(mvmvif->id);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index cb36baac14da..4f2be0c1bd97 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -166,7 +166,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_context_info *ctxt_info;
struct iwl_context_info_rbd_cfg *rx_cfg;
- u32 control_flags = 0, rb_size;
+ u32 control_flags = 0, rb_size, cb_size;
dma_addr_t phys;
int ret;
@@ -202,11 +202,12 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
rb_size = IWL_CTXT_INFO_RB_SIZE_4K;
}
- WARN_ON(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)) > 12);
+ cb_size = RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans));
+ if (WARN_ON(cb_size > 12))
+ cb_size = 12;
+
control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG;
- control_flags |=
- u32_encode_bits(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)),
- IWL_CTXT_INFO_RB_CB_SIZE);
+ control_flags |= u32_encode_bits(cb_size, IWL_CTXT_INFO_RB_CB_SIZE);
control_flags |= u32_encode_bits(rb_size, IWL_CTXT_INFO_RB_SIZE);
ctxt_info->control.control_flags = cpu_to_le32(control_flags);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 656f8b06c27b..0a9e0dbb58fb 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1501,11 +1501,27 @@ static int _iwl_pci_resume(struct device *device, bool restore)
* Scratch value was altered, this means the device was powered off, we
* need to reset it completely.
* Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan,
- * so assume that any bits there mean that the device is usable.
+ * but not bits [15:8]. So if we have bits set in lower word, assume
+ * the device is alive.
+ * For older devices, just try silently to grab the NIC.
*/
- if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ &&
- !iwl_read32(trans, CSR_FUNC_SCRATCH))
- device_was_powered_off = true;
+ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+ if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) &
+ CSR_FUNC_SCRATCH_POWER_OFF_MASK))
+ device_was_powered_off = true;
+ } else {
+ /*
+ * bh are re-enabled by iwl_trans_pcie_release_nic_access,
+ * so re-enable them if _iwl_trans_pcie_grab_nic_access fails.
+ */
+ local_bh_disable();
+ if (_iwl_trans_pcie_grab_nic_access(trans, true)) {
+ iwl_trans_pcie_release_nic_access(trans);
+ } else {
+ device_was_powered_off = true;
+ local_bh_enable();
+ }
+ }
if (restore || device_was_powered_off) {
trans->state = IWL_TRANS_NO_FW;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index c8f4f3a1d2eb..5a9c3b7976a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -546,8 +546,10 @@ again:
}
if (WARN_ON(trans->do_top_reset &&
- trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC))
- return -EINVAL;
+ trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC)) {
+ ret = -EINVAL;
+ goto out;
+ }
/* we need to wait later - set state */
if (trans->do_top_reset)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index bb467e2b1779..eee55428749c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -2101,10 +2101,10 @@ static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans,
bc_ent = cpu_to_le16(len | (sta_id << 12));
- scd_bc_tbl[txq_id * BC_TABLE_SIZE + write_ptr].tfd_offset = bc_ent;
+ scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + write_ptr].tfd_offset = bc_ent;
if (write_ptr < TFD_QUEUE_SIZE_BC_DUP)
- scd_bc_tbl[txq_id * BC_TABLE_SIZE + TFD_QUEUE_SIZE_MAX + write_ptr].tfd_offset =
+ scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + write_ptr].tfd_offset =
bc_ent;
}
@@ -2328,10 +2328,10 @@ static void iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans *trans,
bc_ent = cpu_to_le16(1 | (sta_id << 12));
- scd_bc_tbl[txq_id * BC_TABLE_SIZE + read_ptr].tfd_offset = bc_ent;
+ scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + read_ptr].tfd_offset = bc_ent;
if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
- scd_bc_tbl[txq_id * BC_TABLE_SIZE + TFD_QUEUE_SIZE_MAX + read_ptr].tfd_offset =
+ scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + read_ptr].tfd_offset =
bc_ent;
}
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 738bafc3749b..66f0f5377ac1 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
bss_desc->bcn_ht_oper->ht_param &
- IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) {
- chan_list->chan_scan_param[0].radio_type |=
- CHAN_BW_40MHZ << 2;
+ IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)
SET_SECONDARYCHAN(chan_list->chan_scan_param[0].
radio_type,
(bss_desc->bcn_ht_oper->ht_param &
IEEE80211_HT_PARAM_CHA_SEC_OFFSET));
- }
+
*buffer += struct_size(chan_list, chan_scan_param, 1);
ret_len += struct_size(chan_list, chan_scan_param, 1);
}
diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c
index 4c5b1de0e936..6882e90e90b2 100644
--- a/drivers/net/wireless/marvell/mwifiex/util.c
+++ b/drivers/net/wireless/marvell/mwifiex/util.c
@@ -459,7 +459,9 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv,
"auth: receive authentication from %pM\n",
ieee_hdr->addr3);
} else {
- if (!priv->wdev.connected)
+ if (!priv->wdev.connected ||
+ !ether_addr_equal(ieee_hdr->addr3,
+ priv->curr_bss_params.bss_descriptor.mac_address))
return 0;
if (ieee80211_is_deauth(ieee_hdr->frame_control)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5f8d81cda6cd..74b75035d361 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -1224,6 +1224,16 @@ static inline int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q,
#define mt76_dereference(p, dev) \
rcu_dereference_protected(p, lockdep_is_held(&(dev)->mutex))
+static inline struct mt76_wcid *
+__mt76_wcid_ptr(struct mt76_dev *dev, u16 idx)
+{
+ if (idx >= ARRAY_SIZE(dev->wcid))
+ return NULL;
+ return rcu_dereference(dev->wcid[idx]);
+}
+
+#define mt76_wcid_ptr(dev, idx) __mt76_wcid_ptr(&(dev)->mt76, idx)
+
struct mt76_dev *mt76_alloc_device(struct device *pdev, unsigned int size,
const struct ieee80211_ops *ops,
const struct mt76_driver_ops *drv_ops);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index 863e5770df51..e26cc78fff94 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -44,7 +44,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb)
if (idx >= MT7603_WTBL_STA - 1)
goto free;
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (!wcid)
goto free;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 413973d05b43..6387f9e61060 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -487,10 +487,7 @@ mt7603_rx_get_wcid(struct mt7603_dev *dev, u8 idx, bool unicast)
struct mt7603_sta *sta;
struct mt76_wcid *wcid;
- if (idx >= MT7603_WTBL_SIZE)
- return NULL;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (unicast || !wcid)
return wcid;
@@ -1266,12 +1263,9 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data)
if (pid == MT_PACKET_ID_NO_ACK)
return;
- if (wcidx >= MT7603_WTBL_SIZE)
- return;
-
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 3ca4fae7c4b0..f8d2cc94b742 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -90,10 +90,7 @@ static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev,
struct mt7615_sta *sta;
struct mt76_wcid *wcid;
- if (idx >= MT7615_WTBL_SIZE)
- return NULL;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (unicast || !wcid)
return wcid;
@@ -1504,7 +1501,7 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data)
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index e9ac8a7317a1..0db00efe88b0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -1172,7 +1172,7 @@ void mt76_connac2_txwi_free(struct mt76_dev *dev, struct mt76_txwi_cache *t,
wcid_idx = wcid->idx;
} else {
wcid_idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX);
- wcid = rcu_dereference(dev->wcid[wcid_idx]);
+ wcid = __mt76_wcid_ptr(dev, wcid_idx);
if (wcid && wcid->sta) {
sta = container_of((void *)wcid, struct ieee80211_sta,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index cb13d0a76878..16db0f2082d1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -287,7 +287,7 @@ __mt76_connac_mcu_alloc_sta_req(struct mt76_dev *dev, struct mt76_vif_link *mvif
mt76_connac_mcu_get_wlan_idx(dev, wcid, &hdr.wlan_idx_lo,
&hdr.wlan_idx_hi);
- skb = mt76_mcu_msg_alloc(dev, NULL, len);
+ skb = __mt76_mcu_msg_alloc(dev, NULL, len, len, GFP_ATOMIC);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -1740,8 +1740,8 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
if (!sreq->ssids[i].ssid_len)
continue;
- req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
- memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid,
+ req->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
+ memcpy(req->ssids[n_ssids].ssid, sreq->ssids[i].ssid,
sreq->ssids[i].ssid_len);
n_ssids++;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 4cd63bacd742..9d7ee09b6cc9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -262,10 +262,7 @@ mt76x02_rx_get_sta(struct mt76_dev *dev, u8 idx)
{
struct mt76_wcid *wcid;
- if (idx >= MT76x02_N_WCIDS)
- return NULL;
-
- wcid = rcu_dereference(dev->wcid[idx]);
+ wcid = __mt76_wcid_ptr(dev, idx);
if (!wcid)
return NULL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index d5db6ffd6d36..83488b2d6efb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -564,9 +564,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
rcu_read_lock();
- if (stat->wcid < MT76x02_N_WCIDS)
- wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]);
-
+ wcid = mt76_wcid_ptr(dev, stat->wcid);
if (wcid && wcid->sta) {
void *priv;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 9400e4af2a04..6639976afcee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -56,10 +56,7 @@ static struct mt76_wcid *mt7915_rx_get_wcid(struct mt7915_dev *dev,
struct mt7915_sta *sta;
struct mt76_wcid *wcid;
- if (idx >= ARRAY_SIZE(dev->mt76.wcid))
- return NULL;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (unicast || !wcid)
return wcid;
@@ -917,7 +914,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len)
u16 idx;
idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info);
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
sta = wcid_to_sta(wcid);
if (!sta)
continue;
@@ -1013,12 +1010,9 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data)
if (pid < MT_PACKET_ID_WED)
return;
- if (wcidx >= mt7915_wtbl_size(dev))
- return;
-
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 427542777abc..c6584d2b7509 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3986,7 +3986,7 @@ int mt7915_mcu_wed_wa_tx_stats(struct mt7915_dev *dev, u16 wlan_idx)
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
+ wcid = mt76_wcid_ptr(dev, wlan_idx);
if (wcid)
wcid->stats.tx_packets += le32_to_cpu(res->tx_packets);
else
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index 9c4d5cea0c42..4a82f8e4c118 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -587,12 +587,9 @@ static void mt7915_mmio_wed_update_rx_stats(struct mtk_wed_device *wed,
dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
- if (idx >= mt7915_wtbl_size(dev))
- return;
-
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (wcid) {
wcid->stats.rx_bytes += le32_to_cpu(stats->rx_byte_cnt);
wcid->stats.rx_packets += le32_to_cpu(stats->rx_pkt_cnt);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 5dd57de59f27..f1f76506b0a5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -465,7 +465,7 @@ void mt7921_mac_add_txs(struct mt792x_dev *dev, void *data)
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
@@ -516,7 +516,7 @@ static void mt7921_mac_tx_free(struct mt792x_dev *dev, void *data, int len)
count++;
idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info);
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
sta = wcid_to_sta(wcid);
if (!sta)
continue;
@@ -816,7 +816,7 @@ void mt7921_usb_sdio_tx_complete_skb(struct mt76_dev *mdev,
u16 idx;
idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX);
- wcid = rcu_dereference(mdev->wcid[idx]);
+ wcid = __mt76_wcid_ptr(mdev, idx);
sta = wcid_to_sta(wcid);
if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE)))
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 1fffa43379b2..77f73ae1d7ec 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1180,6 +1180,9 @@ static void mt7921_sta_set_decap_offload(struct ieee80211_hw *hw,
struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv;
struct mt792x_dev *dev = mt792x_hw_dev(hw);
+ if (!msta->deflink.wcid.sta)
+ return;
+
mt792x_mutex_acquire(dev);
if (enabled)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c
index 2a83ff59a968..4249bad83c93 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c
@@ -52,6 +52,8 @@ static int mt7925_thermal_init(struct mt792x_phy *phy)
name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7925_%s",
wiphy_name(wiphy));
+ if (!name)
+ return -ENOMEM;
hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy,
mt7925_hwmon_groups);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
index c871d2f9688b..75823c9fd3a1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -1040,7 +1040,7 @@ void mt7925_mac_add_txs(struct mt792x_dev *dev, void *data)
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
@@ -1122,7 +1122,7 @@ mt7925_mac_tx_free(struct mt792x_dev *dev, void *data, int len)
u16 idx;
idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info);
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
sta = wcid_to_sta(wcid);
if (!sta)
continue;
@@ -1445,7 +1445,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev,
u16 idx;
idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX);
- wcid = rcu_dereference(mdev->wcid[idx]);
+ wcid = __mt76_wcid_ptr(mdev, idx);
sta = wcid_to_sta(wcid);
if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE)))
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
index 94b0099dcd41..5b001548dffc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
@@ -1481,7 +1481,7 @@ mt7925_start_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
mt792x_mutex_acquire(dev);
- err = mt7925_mcu_sched_scan_req(mphy, vif, req);
+ err = mt7925_mcu_sched_scan_req(mphy, vif, req, ies);
if (err < 0)
goto out;
@@ -1603,6 +1603,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw,
unsigned long valid = mvif->valid_links;
u8 i;
+ if (!msta->vif)
+ return;
+
mt792x_mutex_acquire(dev);
valid = ieee80211_vif_is_mld(vif) ? mvif->valid_links : BIT(0);
@@ -1617,6 +1620,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw,
else
clear_bit(MT_WCID_FLAG_HDR_TRANS, &mlink->wcid.flags);
+ if (!mlink->wcid.sta)
+ continue;
+
mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, i);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
index b8542be0d945..8ac6fbb736ab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c
@@ -164,6 +164,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif,
bool suspend, struct cfg80211_wowlan *wowlan)
{
struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv;
+ struct ieee80211_scan_ies ies = {};
struct mt76_dev *dev = phy->dev;
struct {
struct {
@@ -194,7 +195,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif,
req.wow_ctrl_tlv.trigger |= (UNI_WOW_DETECT_TYPE_DISCONNECT |
UNI_WOW_DETECT_TYPE_BCN_LOST);
if (wowlan->nd_config) {
- mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config);
+ mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config, &ies);
req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT;
mt7925_mcu_sched_scan_enable(phy, vif, suspend);
}
@@ -2818,6 +2819,54 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable)
return err;
}
+static void
+mt7925_mcu_build_scan_ie_tlv(struct mt76_dev *mdev,
+ struct sk_buff *skb,
+ struct ieee80211_scan_ies *scan_ies)
+{
+ u32 max_len = sizeof(struct scan_ie_tlv) + MT76_CONNAC_SCAN_IE_LEN;
+ struct scan_ie_tlv *ie;
+ enum nl80211_band i;
+ struct tlv *tlv;
+ const u8 *ies;
+ u16 ies_len;
+
+ for (i = 0; i <= NL80211_BAND_6GHZ; i++) {
+ if (i == NL80211_BAND_60GHZ)
+ continue;
+
+ ies = scan_ies->ies[i];
+ ies_len = scan_ies->len[i];
+
+ if (!ies || !ies_len)
+ continue;
+
+ if (ies_len > max_len)
+ return;
+
+ tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE,
+ sizeof(*ie) + ies_len);
+ ie = (struct scan_ie_tlv *)tlv;
+
+ memcpy(ie->ies, ies, ies_len);
+ ie->ies_len = cpu_to_le16(ies_len);
+
+ switch (i) {
+ case NL80211_BAND_2GHZ:
+ ie->band = 1;
+ break;
+ case NL80211_BAND_6GHZ:
+ ie->band = 3;
+ break;
+ default:
+ ie->band = 2;
+ break;
+ }
+
+ max_len -= (sizeof(*ie) + ies_len);
+ }
+}
+
int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
struct ieee80211_scan_request *scan_req)
{
@@ -2843,7 +2892,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) +
sizeof(*bssid) * MT7925_RNR_SCAN_MAX_BSSIDS +
- sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie);
+ sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie) +
+ MT76_CONNAC_SCAN_IE_LEN;
skb = mt76_mcu_msg_alloc(mdev, NULL, max_len);
if (!skb)
@@ -2869,8 +2919,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
if (i > MT7925_RNR_SCAN_MAX_BSSIDS)
break;
- ssid->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
- memcpy(ssid->ssids[i].ssid, sreq->ssids[i].ssid,
+ ssid->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
+ memcpy(ssid->ssids[n_ssids].ssid, sreq->ssids[i].ssid,
sreq->ssids[i].ssid_len);
n_ssids++;
}
@@ -2925,13 +2975,6 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
}
chan_info->channel_type = sreq->n_channels ? 4 : 0;
- tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie));
- ie = (struct scan_ie_tlv *)tlv;
- if (sreq->ie_len > 0) {
- memcpy(ie->ies, sreq->ie, sreq->ie_len);
- ie->ies_len = cpu_to_le16(sreq->ie_len);
- }
-
req->scan_func |= SCAN_FUNC_SPLIT_SCAN;
tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_MISC, sizeof(*misc));
@@ -2942,6 +2985,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
req->scan_func |= SCAN_FUNC_RANDOM_MAC;
}
+ /* Append scan probe IEs as the last tlv */
+ mt7925_mcu_build_scan_ie_tlv(mdev, skb, &scan_req->ies);
+
err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ),
true);
if (err < 0)
@@ -2953,7 +2999,8 @@ EXPORT_SYMBOL_GPL(mt7925_mcu_hw_scan);
int mt7925_mcu_sched_scan_req(struct mt76_phy *phy,
struct ieee80211_vif *vif,
- struct cfg80211_sched_scan_request *sreq)
+ struct cfg80211_sched_scan_request *sreq,
+ struct ieee80211_scan_ies *ies)
{
struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv;
struct ieee80211_channel **scan_list = sreq->channels;
@@ -3041,12 +3088,8 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy,
}
chan_info->channel_type = sreq->n_channels ? 4 : 0;
- tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie));
- ie = (struct scan_ie_tlv *)tlv;
- if (sreq->ie_len > 0) {
- memcpy(ie->ies, sreq->ie, sreq->ie_len);
- ie->ies_len = cpu_to_le16(sreq->ie_len);
- }
+ /* Append scan probe IEs as the last tlv */
+ mt7925_mcu_build_scan_ie_tlv(mdev, skb, ies);
return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ),
true);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
index ee6fb16e83c5..a40764d89a1f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h
@@ -269,7 +269,7 @@ struct scan_ie_tlv {
__le16 ies_len;
u8 band;
u8 pad;
- u8 ies[MT76_CONNAC_SCAN_IE_LEN];
+ u8 ies[];
};
struct scan_misc_tlv {
@@ -673,7 +673,8 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy,
struct ieee80211_vif *vif);
int mt7925_mcu_sched_scan_req(struct mt76_phy *phy,
struct ieee80211_vif *vif,
- struct cfg80211_sched_scan_request *sreq);
+ struct cfg80211_sched_scan_request *sreq,
+ struct ieee80211_scan_ies *ies);
int mt7925_mcu_sched_scan_enable(struct mt76_phy *phy,
struct ieee80211_vif *vif,
bool enable);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h
index 547489092c29..341987e47f67 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h
@@ -58,7 +58,7 @@
#define MT_INT_TX_DONE_MCU (MT_INT_TX_DONE_MCU_WM | \
MT_INT_TX_DONE_FWDL)
-#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU_WM | \
+#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU | \
MT_INT_TX_DONE_BAND0 | \
GENMASK(18, 4))
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
index a50c1723ca29..05130ec1e5f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
@@ -28,7 +28,7 @@ static const struct ieee80211_iface_combination if_comb[] = {
},
};
-static const struct ieee80211_iface_limit if_limits_chanctx[] = {
+static const struct ieee80211_iface_limit if_limits_chanctx_mcc[] = {
{
.max = 2,
.types = BIT(NL80211_IFTYPE_STATION) |
@@ -36,8 +36,23 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = {
},
{
.max = 1,
- .types = BIT(NL80211_IFTYPE_AP) |
- BIT(NL80211_IFTYPE_P2P_GO)
+ .types = BIT(NL80211_IFTYPE_P2P_GO)
+ },
+ {
+ .max = 1,
+ .types = BIT(NL80211_IFTYPE_P2P_DEVICE)
+ }
+};
+
+static const struct ieee80211_iface_limit if_limits_chanctx_scc[] = {
+ {
+ .max = 2,
+ .types = BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT)
+ },
+ {
+ .max = 1,
+ .types = BIT(NL80211_IFTYPE_AP)
},
{
.max = 1,
@@ -47,11 +62,18 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = {
static const struct ieee80211_iface_combination if_comb_chanctx[] = {
{
- .limits = if_limits_chanctx,
- .n_limits = ARRAY_SIZE(if_limits_chanctx),
+ .limits = if_limits_chanctx_mcc,
+ .n_limits = ARRAY_SIZE(if_limits_chanctx_mcc),
.max_interfaces = 3,
.num_different_channels = 2,
.beacon_int_infra_match = false,
+ },
+ {
+ .limits = if_limits_chanctx_scc,
+ .n_limits = ARRAY_SIZE(if_limits_chanctx_scc),
+ .max_interfaces = 3,
+ .num_different_channels = 1,
+ .beacon_int_infra_match = false,
}
};
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
index 05978d9c7b91..3f1d9ba49076 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
@@ -142,10 +142,7 @@ struct mt76_wcid *mt792x_rx_get_wcid(struct mt792x_dev *dev, u16 idx,
struct mt792x_sta *sta;
struct mt76_wcid *wcid;
- if (idx >= ARRAY_SIZE(dev->mt76.wcid))
- return NULL;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (unicast || !wcid)
return wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index 0dbd4662bc84..92148518f6a5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -61,10 +61,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev,
struct mt76_wcid *wcid;
int i;
- if (idx >= ARRAY_SIZE(dev->mt76.wcid))
- return NULL;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (!wcid)
return NULL;
@@ -1249,7 +1246,7 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len)
u16 idx;
idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info);
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
sta = wcid_to_sta(wcid);
if (!sta)
goto next;
@@ -1471,12 +1468,9 @@ static void mt7996_mac_add_txs(struct mt7996_dev *dev, void *data)
if (pid < MT_PACKET_ID_NO_SKB)
return;
- if (wcidx >= mt7996_wtbl_size(dev))
- return;
-
rcu_read_lock();
- wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+ wcid = mt76_wcid_ptr(dev, wcidx);
if (!wcid)
goto out;
@@ -2353,20 +2347,12 @@ void mt7996_mac_update_stats(struct mt7996_phy *phy)
void mt7996_mac_sta_rc_work(struct work_struct *work)
{
struct mt7996_dev *dev = container_of(work, struct mt7996_dev, rc_work);
- struct ieee80211_bss_conf *link_conf;
- struct ieee80211_link_sta *link_sta;
struct mt7996_sta_link *msta_link;
- struct mt7996_vif_link *link;
- struct mt76_vif_link *mlink;
- struct ieee80211_sta *sta;
struct ieee80211_vif *vif;
- struct mt7996_sta *msta;
struct mt7996_vif *mvif;
LIST_HEAD(list);
u32 changed;
- u8 link_id;
- rcu_read_lock();
spin_lock_bh(&dev->mt76.sta_poll_lock);
list_splice_init(&dev->sta_rc_list, &list);
@@ -2377,46 +2363,28 @@ void mt7996_mac_sta_rc_work(struct work_struct *work)
changed = msta_link->changed;
msta_link->changed = 0;
-
- sta = wcid_to_sta(&msta_link->wcid);
- link_id = msta_link->wcid.link_id;
- msta = msta_link->sta;
- mvif = msta->vif;
- vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv);
-
- mlink = rcu_dereference(mvif->mt76.link[link_id]);
- if (!mlink)
- continue;
-
- link_sta = rcu_dereference(sta->link[link_id]);
- if (!link_sta)
- continue;
-
- link_conf = rcu_dereference(vif->link_conf[link_id]);
- if (!link_conf)
- continue;
+ mvif = msta_link->sta->vif;
+ vif = container_of((void *)mvif, struct ieee80211_vif,
+ drv_priv);
spin_unlock_bh(&dev->mt76.sta_poll_lock);
- link = (struct mt7996_vif_link *)mlink;
-
if (changed & (IEEE80211_RC_SUPP_RATES_CHANGED |
IEEE80211_RC_NSS_CHANGED |
IEEE80211_RC_BW_CHANGED))
- mt7996_mcu_add_rate_ctrl(dev, vif, link_conf,
- link_sta, link, msta_link,
+ mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif,
+ msta_link->wcid.link_id,
true);
if (changed & IEEE80211_RC_SMPS_CHANGED)
- mt7996_mcu_set_fixed_field(dev, link_sta, link,
- msta_link, NULL,
+ mt7996_mcu_set_fixed_field(dev, msta_link->sta, NULL,
+ msta_link->wcid.link_id,
RATE_PARAM_MMPS_UPDATE);
spin_lock_bh(&dev->mt76.sta_poll_lock);
}
spin_unlock_bh(&dev->mt76.sta_poll_lock);
- rcu_read_unlock();
}
void mt7996_mac_work(struct work_struct *work)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
index 78ae9f5cb176..07dd75ce94a5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
@@ -1112,9 +1112,8 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif,
if (err)
return err;
- err = mt7996_mcu_add_rate_ctrl(dev, vif, link_conf,
- link_sta, link,
- msta_link, false);
+ err = mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif,
+ link_id, false);
if (err)
return err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
index f0adc0b4b8b6..994526c65bfc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
@@ -555,7 +555,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb)
switch (le16_to_cpu(res->tag)) {
case UNI_ALL_STA_TXRX_RATE:
wlan_idx = le16_to_cpu(res->rate[i].wlan_idx);
- wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
+ wcid = mt76_wcid_ptr(dev, wlan_idx);
if (!wcid)
break;
@@ -565,7 +565,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb)
break;
case UNI_ALL_STA_TXRX_ADM_STAT:
wlan_idx = le16_to_cpu(res->adm_stat[i].wlan_idx);
- wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
+ wcid = mt76_wcid_ptr(dev, wlan_idx);
if (!wcid)
break;
@@ -579,7 +579,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb)
break;
case UNI_ALL_STA_TXRX_MSDU_COUNT:
wlan_idx = le16_to_cpu(res->msdu_cnt[i].wlan_idx);
- wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
+ wcid = mt76_wcid_ptr(dev, wlan_idx);
if (!wcid)
break;
@@ -676,10 +676,7 @@ mt7996_mcu_wed_rro_event(struct mt7996_dev *dev, struct sk_buff *skb)
e = (void *)skb->data;
idx = le16_to_cpu(e->wlan_id);
- if (idx >= ARRAY_SIZE(dev->mt76.wcid))
- break;
-
- wcid = rcu_dereference(dev->mt76.wcid[idx]);
+ wcid = mt76_wcid_ptr(dev, idx);
if (!wcid || !wcid->sta)
break;
@@ -1905,22 +1902,35 @@ int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev,
MCU_WM_UNI_CMD(RA), true);
}
-int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev,
- struct ieee80211_link_sta *link_sta,
- struct mt7996_vif_link *link,
- struct mt7996_sta_link *msta_link,
- void *data, u32 field)
+int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta,
+ void *data, u8 link_id, u32 field)
{
- struct sta_phy_uni *phy = data;
+ struct mt7996_vif *mvif = msta->vif;
+ struct mt7996_sta_link *msta_link;
struct sta_rec_ra_fixed_uni *ra;
+ struct sta_phy_uni *phy = data;
+ struct mt76_vif_link *mlink;
struct sk_buff *skb;
+ int err = -ENODEV;
struct tlv *tlv;
- skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76,
+ rcu_read_lock();
+
+ mlink = rcu_dereference(mvif->mt76.link[link_id]);
+ if (!mlink)
+ goto error_unlock;
+
+ msta_link = rcu_dereference(msta->link[link_id]);
+ if (!msta_link)
+ goto error_unlock;
+
+ skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, mlink,
&msta_link->wcid,
MT7996_STA_UPDATE_MAX_SIZE);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto error_unlock;
+ }
tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA_UPDATE, sizeof(*ra));
ra = (struct sta_rec_ra_fixed_uni *)tlv;
@@ -1935,106 +1945,149 @@ int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev,
if (phy)
ra->phy = *phy;
break;
- case RATE_PARAM_MMPS_UPDATE:
+ case RATE_PARAM_MMPS_UPDATE: {
+ struct ieee80211_sta *sta = wcid_to_sta(&msta_link->wcid);
+ struct ieee80211_link_sta *link_sta;
+
+ link_sta = rcu_dereference(sta->link[link_id]);
+ if (!link_sta) {
+ dev_kfree_skb(skb);
+ goto error_unlock;
+ }
+
ra->mmps_mode = mt7996_mcu_get_mmps_mode(link_sta->smps_mode);
break;
+ }
default:
break;
}
ra->field = cpu_to_le32(field);
+ rcu_read_unlock();
+
return mt76_mcu_skb_send_msg(&dev->mt76, skb,
MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true);
+error_unlock:
+ rcu_read_unlock();
+
+ return err;
}
static int
-mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev,
- struct ieee80211_link_sta *link_sta,
- struct mt7996_vif_link *link,
- struct mt7996_sta_link *msta_link)
+mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta,
+ struct ieee80211_vif *vif, u8 link_id)
{
- struct cfg80211_chan_def *chandef = &link->phy->mt76->chandef;
- struct cfg80211_bitrate_mask *mask = &link->bitrate_mask;
- enum nl80211_band band = chandef->chan->band;
+ struct ieee80211_link_sta *link_sta;
+ struct cfg80211_bitrate_mask mask;
+ struct mt7996_sta_link *msta_link;
+ struct mt7996_vif_link *link;
struct sta_phy_uni phy = {};
- int ret, nrates = 0;
+ struct ieee80211_sta *sta;
+ int ret, nrates = 0, idx;
+ enum nl80211_band band;
+ bool has_he;
#define __sta_phy_bitrate_mask_check(_mcs, _gi, _ht, _he) \
do { \
- u8 i, gi = mask->control[band]._gi; \
+ u8 i, gi = mask.control[band]._gi; \
gi = (_he) ? gi : gi == NL80211_TXRATE_FORCE_SGI; \
phy.sgi = gi; \
- phy.he_ltf = mask->control[band].he_ltf; \
- for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \
- if (!mask->control[band]._mcs[i]) \
+ phy.he_ltf = mask.control[band].he_ltf; \
+ for (i = 0; i < ARRAY_SIZE(mask.control[band]._mcs); i++) { \
+ if (!mask.control[band]._mcs[i]) \
continue; \
- nrates += hweight16(mask->control[band]._mcs[i]); \
- phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \
+ nrates += hweight16(mask.control[band]._mcs[i]); \
+ phy.mcs = ffs(mask.control[band]._mcs[i]) - 1; \
if (_ht) \
phy.mcs += 8 * i; \
} \
} while (0)
- if (link_sta->he_cap.has_he) {
+ rcu_read_lock();
+
+ link = mt7996_vif_link(dev, vif, link_id);
+ if (!link)
+ goto error_unlock;
+
+ msta_link = rcu_dereference(msta->link[link_id]);
+ if (!msta_link)
+ goto error_unlock;
+
+ sta = wcid_to_sta(&msta_link->wcid);
+ link_sta = rcu_dereference(sta->link[link_id]);
+ if (!link_sta)
+ goto error_unlock;
+
+ band = link->phy->mt76->chandef.chan->band;
+ has_he = link_sta->he_cap.has_he;
+ mask = link->bitrate_mask;
+ idx = msta_link->wcid.idx;
+
+ if (has_he) {
__sta_phy_bitrate_mask_check(he_mcs, he_gi, 0, 1);
} else if (link_sta->vht_cap.vht_supported) {
__sta_phy_bitrate_mask_check(vht_mcs, gi, 0, 0);
} else if (link_sta->ht_cap.ht_supported) {
__sta_phy_bitrate_mask_check(ht_mcs, gi, 1, 0);
} else {
- nrates = hweight32(mask->control[band].legacy);
- phy.mcs = ffs(mask->control[band].legacy) - 1;
+ nrates = hweight32(mask.control[band].legacy);
+ phy.mcs = ffs(mask.control[band].legacy) - 1;
}
+
+ rcu_read_unlock();
+
#undef __sta_phy_bitrate_mask_check
/* fall back to auto rate control */
- if (mask->control[band].gi == NL80211_TXRATE_DEFAULT_GI &&
- mask->control[band].he_gi == GENMASK(7, 0) &&
- mask->control[band].he_ltf == GENMASK(7, 0) &&
+ if (mask.control[band].gi == NL80211_TXRATE_DEFAULT_GI &&
+ mask.control[band].he_gi == GENMASK(7, 0) &&
+ mask.control[band].he_ltf == GENMASK(7, 0) &&
nrates != 1)
return 0;
/* fixed single rate */
if (nrates == 1) {
- ret = mt7996_mcu_set_fixed_field(dev, link_sta, link,
- msta_link, &phy,
+ ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id,
RATE_PARAM_FIXED_MCS);
if (ret)
return ret;
}
/* fixed GI */
- if (mask->control[band].gi != NL80211_TXRATE_DEFAULT_GI ||
- mask->control[band].he_gi != GENMASK(7, 0)) {
+ if (mask.control[band].gi != NL80211_TXRATE_DEFAULT_GI ||
+ mask.control[band].he_gi != GENMASK(7, 0)) {
u32 addr;
/* firmware updates only TXCMD but doesn't take WTBL into
* account, so driver should update here to reflect the
* actual txrate hardware sends out.
*/
- addr = mt7996_mac_wtbl_lmac_addr(dev, msta_link->wcid.idx, 7);
- if (link_sta->he_cap.has_he)
+ addr = mt7996_mac_wtbl_lmac_addr(dev, idx, 7);
+ if (has_he)
mt76_rmw_field(dev, addr, GENMASK(31, 24), phy.sgi);
else
mt76_rmw_field(dev, addr, GENMASK(15, 12), phy.sgi);
- ret = mt7996_mcu_set_fixed_field(dev, link_sta, link,
- msta_link, &phy,
+ ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id,
RATE_PARAM_FIXED_GI);
if (ret)
return ret;
}
/* fixed HE_LTF */
- if (mask->control[band].he_ltf != GENMASK(7, 0)) {
- ret = mt7996_mcu_set_fixed_field(dev, link_sta, link,
- msta_link, &phy,
+ if (mask.control[band].he_ltf != GENMASK(7, 0)) {
+ ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id,
RATE_PARAM_FIXED_HE_LTF);
if (ret)
return ret;
}
return 0;
+
+error_unlock:
+ rcu_read_unlock();
+
+ return -ENODEV;
}
static void
@@ -2145,21 +2198,44 @@ mt7996_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7996_dev *dev,
memset(ra->rx_rcpi, INIT_RCPI, sizeof(ra->rx_rcpi));
}
-int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev,
- struct ieee80211_vif *vif,
- struct ieee80211_bss_conf *link_conf,
- struct ieee80211_link_sta *link_sta,
- struct mt7996_vif_link *link,
- struct mt7996_sta_link *msta_link, bool changed)
+int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta,
+ struct ieee80211_vif *vif, u8 link_id,
+ bool changed)
{
+ struct ieee80211_bss_conf *link_conf;
+ struct ieee80211_link_sta *link_sta;
+ struct mt7996_sta_link *msta_link;
+ struct mt7996_vif_link *link;
+ struct ieee80211_sta *sta;
struct sk_buff *skb;
- int ret;
+ int ret = -ENODEV;
+
+ rcu_read_lock();
+
+ link = mt7996_vif_link(dev, vif, link_id);
+ if (!link)
+ goto error_unlock;
+
+ msta_link = rcu_dereference(msta->link[link_id]);
+ if (!msta_link)
+ goto error_unlock;
+
+ sta = wcid_to_sta(&msta_link->wcid);
+ link_sta = rcu_dereference(sta->link[link_id]);
+ if (!link_sta)
+ goto error_unlock;
+
+ link_conf = rcu_dereference(vif->link_conf[link_id]);
+ if (!link_conf)
+ goto error_unlock;
skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76,
&msta_link->wcid,
MT7996_STA_UPDATE_MAX_SIZE);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
+ if (IS_ERR(skb)) {
+ ret = PTR_ERR(skb);
+ goto error_unlock;
+ }
/* firmware rc algorithm refers to sta_rec_he for HE control.
* once dev->rc_work changes the settings driver should also
@@ -2173,12 +2249,19 @@ int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev,
*/
mt7996_mcu_sta_rate_ctrl_tlv(skb, dev, vif, link_conf, link_sta, link);
+ rcu_read_unlock();
+
ret = mt76_mcu_skb_send_msg(&dev->mt76, skb,
MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true);
if (ret)
return ret;
- return mt7996_mcu_add_rate_ctrl_fixed(dev, link_sta, link, msta_link);
+ return mt7996_mcu_add_rate_ctrl_fixed(dev, msta, vif, link_id);
+
+error_unlock:
+ rcu_read_unlock();
+
+ return ret;
}
static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
index 1ad6bc046f7c..33ac16b64ef1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h
@@ -620,23 +620,17 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev,
int mt7996_mcu_add_obss_spr(struct mt7996_phy *phy,
struct mt7996_vif_link *link,
struct ieee80211_he_obss_pd *he_obss_pd);
-int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev,
- struct ieee80211_vif *vif,
- struct ieee80211_bss_conf *link_conf,
- struct ieee80211_link_sta *link_sta,
- struct mt7996_vif_link *link,
- struct mt7996_sta_link *msta_link, bool changed);
+int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta,
+ struct ieee80211_vif *vif, u8 link_id,
+ bool changed);
int mt7996_set_channel(struct mt76_phy *mphy);
int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag);
int mt7996_mcu_set_tx(struct mt7996_dev *dev, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev,
void *data, u16 version);
-int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev,
- struct ieee80211_link_sta *link_sta,
- struct mt7996_vif_link *link,
- struct mt7996_sta_link *msta_link,
- void *data, u32 field);
+int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta,
+ void *data, u8 link_id, u32 field);
int mt7996_mcu_set_eeprom(struct mt7996_dev *dev);
int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len);
int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num);
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 513916469ca2..e6cf16706667 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -64,7 +64,7 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
struct mt76_wcid *wcid;
- wcid = rcu_dereference(dev->wcid[cb->wcid]);
+ wcid = __mt76_wcid_ptr(dev, cb->wcid);
if (wcid) {
status.sta = wcid_to_sta(wcid);
if (status.sta && (wcid->rate.flags || wcid->rate.legacy)) {
@@ -251,9 +251,7 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *
rcu_read_lock();
- if (wcid_idx < ARRAY_SIZE(dev->wcid))
- wcid = rcu_dereference(dev->wcid[wcid_idx]);
-
+ wcid = __mt76_wcid_ptr(dev, wcid_idx);
mt76_tx_check_non_aql(dev, wcid, skb);
#ifdef CONFIG_NL80211_TESTMODE
@@ -538,7 +536,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
break;
mtxq = (struct mt76_txq *)txq->drv_priv;
- wcid = rcu_dereference(dev->wcid[mtxq->wcid]);
+ wcid = __mt76_wcid_ptr(dev, mtxq->wcid);
if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags))
continue;
@@ -617,7 +615,8 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid,
if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) &&
!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) &&
!ieee80211_is_data(hdr->frame_control) &&
- !ieee80211_is_bufferable_mmpdu(skb))
+ (!ieee80211_is_bufferable_mmpdu(skb) ||
+ ieee80211_is_deauth(hdr->frame_control)))
qid = MT_TXQ_PSD;
q = phy->q_tx[qid];
diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c
index 95b3dc96e4c4..97249ebb4bc8 100644
--- a/drivers/net/wireless/mediatek/mt76/util.c
+++ b/drivers/net/wireless/mediatek/mt76/util.c
@@ -83,7 +83,7 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev, u8 phy_idx)
if (!(mask & 1))
continue;
- wcid = rcu_dereference(dev->wcid[j]);
+ wcid = __mt76_wcid_ptr(dev, j);
if (!wcid || wcid->phy_idx != phy_idx)
continue;
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c
index eface610178d..f7f3a2340c39 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c
@@ -108,7 +108,7 @@ exit_free_device:
}
EXPORT_SYMBOL_GPL(rt2x00soc_probe);
-int rt2x00soc_remove(struct platform_device *pdev)
+void rt2x00soc_remove(struct platform_device *pdev)
{
struct ieee80211_hw *hw = platform_get_drvdata(pdev);
struct rt2x00_dev *rt2x00dev = hw->priv;
@@ -119,8 +119,6 @@ int rt2x00soc_remove(struct platform_device *pdev)
rt2x00lib_remove_dev(rt2x00dev);
rt2x00soc_free_reg(rt2x00dev);
ieee80211_free_hw(hw);
-
- return 0;
}
EXPORT_SYMBOL_GPL(rt2x00soc_remove);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h
index 021fd06b3627..d6226b8a10e0 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h
@@ -17,7 +17,7 @@
* SoC driver handlers.
*/
int rt2x00soc_probe(struct platform_device *pdev, const struct rt2x00_ops *ops);
-int rt2x00soc_remove(struct platform_device *pdev);
+void rt2x00soc_remove(struct platform_device *pdev);
#ifdef CONFIG_PM
int rt2x00soc_suspend(struct platform_device *pdev, pm_message_t state);
int rt2x00soc_resume(struct platform_device *pdev);
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index 9653dbaac3c0..781510a3ec6d 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -583,7 +583,11 @@ void zd_mac_tx_to_dev(struct sk_buff *skb, int error)
skb_queue_tail(q, skb);
while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS) {
- zd_mac_tx_status(hw, skb_dequeue(q),
+ skb = skb_dequeue(q);
+ if (!skb)
+ break;
+
+ zd_mac_tx_status(hw, skb,
mac->ack_pending ? mac->ack_signal : 0,
NULL);
mac->ack_pending = 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 92697f98c601..895fb163d48e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -381,12 +381,12 @@ static void nvme_log_err_passthru(struct request *req)
nr->status & NVME_SC_MASK, /* Status Code */
nr->status & NVME_STATUS_MORE ? "MORE " : "",
nr->status & NVME_STATUS_DNR ? "DNR " : "",
- nr->cmd->common.cdw10,
- nr->cmd->common.cdw11,
- nr->cmd->common.cdw12,
- nr->cmd->common.cdw13,
- nr->cmd->common.cdw14,
- nr->cmd->common.cdw14);
+ le32_to_cpu(nr->cmd->common.cdw10),
+ le32_to_cpu(nr->cmd->common.cdw11),
+ le32_to_cpu(nr->cmd->common.cdw12),
+ le32_to_cpu(nr->cmd->common.cdw13),
+ le32_to_cpu(nr->cmd->common.cdw14),
+ le32_to_cpu(nr->cmd->common.cdw15));
}
enum nvme_disposition {
@@ -764,6 +764,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
+
+ if (!(rq->rq_flags & RQF_DONTPREP))
+ nvme_clear_nvme_request(rq);
+
return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
@@ -2015,21 +2019,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
- struct nvme_id_ns *id, struct queue_limits *lim,
- u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
{
- unsigned int boundary = 0;
+ u32 atomic_bs, boundary = 0;
+
+ /*
+ * We do not support an offset for the atomic boundaries.
+ */
+ if (id->nabo)
+ return bs;
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
- if (le16_to_cpu(id->nabspf))
+ if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+ /*
+ * Use the per-namespace atomic write unit when available.
+ */
+ atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+ if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ } else {
+ /*
+ * Use the controller wide atomic write unit. This sucks
+ * because the limit is defined in terms of logical blocks while
+ * namespaces can have different formats, and because there is
+ * no clear language in the specification prohibiting different
+ * values for different controllers in the subsystem.
+ */
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
+
lim->atomic_write_hw_max = atomic_bs;
lim->atomic_write_hw_boundary = boundary;
lim->atomic_write_hw_unit_min = bs;
lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
lim->features |= BLK_FEAT_ATOMIC_WRITES;
+ return atomic_bs;
}
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2067,34 +2091,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
valid = false;
}
- atomic_bs = phys_bs = bs;
- if (id->nabo == 0) {
- /*
- * Bit 1 indicates whether NAWUPF is defined for this namespace
- * and whether it should be used instead of AWUPF. If NAWUPF ==
- * 0 then AWUPF must be used instead.
- */
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
- atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
- else
- atomic_bs = (1 + ns->ctrl->awupf) * bs;
-
- /*
- * Set subsystem atomic bs.
- */
- if (ns->ctrl->subsys->atomic_bs) {
- if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
- dev_err_ratelimited(ns->ctrl->device,
- "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
- ns->disk ? ns->disk->disk_name : "?",
- ns->ctrl->subsys->atomic_bs,
- atomic_bs);
- }
- } else
- ns->ctrl->subsys->atomic_bs = atomic_bs;
-
- nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
- }
+ phys_bs = bs;
+ atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
@@ -2382,16 +2380,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
- /*
- * Validate the max atomic write size fits within the subsystem's
- * atomic write capabilities.
- */
- if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
- blk_mq_unfreeze_queue(ns->disk->queue, memflags);
- ret = -ENXIO;
- goto out;
- }
-
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3215,6 +3203,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC ||
@@ -3647,7 +3636,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
- ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;
@@ -4036,6 +4024,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
list_add_tail_rcu(&ns->siblings, &head->list);
ns->head = head;
mutex_unlock(&ctrl->subsys->lock);
+
+#ifdef CONFIG_NVME_MULTIPATH
+ cancel_delayed_work(&head->remove_work);
+#endif
return 0;
out_put_ns_head:
@@ -4080,7 +4072,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
return;
}
}
- list_add(&ns->list, &ns->ctrl->namespaces);
+ list_add_rcu(&ns->list, &ns->ctrl->namespaces);
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
@@ -4089,6 +4081,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
struct nvme_ns *ns;
struct gendisk *disk;
int node = ctrl->numa_node;
+ bool last_path = false;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -4181,9 +4174,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
- if (list_empty(&ns->head->list))
+ if (list_empty(&ns->head->list)) {
list_del_init(&ns->head->entry);
+ /*
+ * If multipath is not configured, we still create a namespace
+ * head (nshead), but head->disk is not initialized in that
+ * case. As a result, only a single reference to nshead is held
+ * (via kref_init()) when it is created. Therefore, ensure that
+ * we do not release the reference to nshead twice if head->disk
+ * is not present.
+ */
+ if (ns->head->disk)
+ last_path = true;
+ }
mutex_unlock(&ctrl->subsys->lock);
+ if (last_path)
+ nvme_put_ns_head(ns->head);
nvme_put_ns_head(ns->head);
out_cleanup_disk:
put_disk(disk);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 0b50da2f1175..6b3ac8ae3f34 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
- * For iopoll, complete it directly. Note that using the uring_cmd
- * helper for this is safe only because we check blk_rq_is_poll().
- * As that returns false if we're NOT on a polled queue, then it's
- * safe to use the polled completion helper.
- *
- * Otherwise, move the completion to task work.
+ * IOPOLL could potentially complete this request directly, but
+ * if multiple rings are polling on the same queue, then it's possible
+ * for one ring to find completions for another ring. Punting the
+ * completion via task_work will always direct it to the right
+ * location, rather than potentially complete requests for ringA
+ * under iopoll invocations from ringB.
*/
- if (blk_rq_is_poll(req)) {
- if (pdu->bio)
- blk_rq_unmap_user(pdu->bio);
- io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status);
- } else {
- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
- }
-
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
return RQ_END_IO_FREE;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index e040e467f9fa..3da980dc60d9 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -690,8 +690,8 @@ static void nvme_remove_head(struct nvme_ns_head *head)
nvme_cdev_del(&head->cdev, &head->cdev_device);
synchronize_srcu(&head->srcu);
del_gendisk(head->disk);
- nvme_put_ns_head(head);
}
+ nvme_put_ns_head(head);
}
static void nvme_remove_head_work(struct work_struct *work)
@@ -1200,7 +1200,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
*/
srcu_idx = srcu_read_lock(&head->srcu);
- list_for_each_entry_rcu(ns, &head->list, siblings) {
+ list_for_each_entry_srcu(ns, &head->list, siblings,
+ srcu_read_lock_held(&head->srcu)) {
/*
* Ensure that ns path disk node is already added otherwise we
* may get invalid kobj name for target
@@ -1291,6 +1292,9 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
bool remove = false;
+ if (!head->disk)
+ return;
+
mutex_lock(&head->subsys->lock);
/*
* We are called when all paths have been removed, and at that point
@@ -1311,7 +1315,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
*/
if (!try_module_get(THIS_MODULE))
goto out;
- queue_delayed_work(nvme_wq, &head->remove_work,
+ mod_delayed_work(nvme_wq, &head->remove_work,
head->delayed_removal_secs * HZ);
} else {
list_del_init(&head->entry);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a468cdc5b5cb..7df2ea21851f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,7 +410,6 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
- u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -443,11 +442,11 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
+ u16 awupf; /* 0's based value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
#endif
- u32 atomic_bs;
};
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8ff12e415cb5..320aaa41ec39 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2101,8 +2101,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
-
- nvme_update_attrs(dev);
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -3010,6 +3008,8 @@ static void nvme_reset_work(struct work_struct *work)
if (result < 0)
goto out;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -3343,6 +3343,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result < 0)
goto out_disable;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out_disable;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index df69a9dee71c..51df72f5e89b 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
{
if (bio != &req->b.inline_bio)
bio_put(bio);
+ else
+ bio_uninit(bio);
}
#ifdef CONFIG_NVME_TARGET_TCP_TLS
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 688033b88d38..470bf37e5a63 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1928,10 +1928,10 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
struct sock *sk = queue->sock->sk;
/* Restore the default callbacks before starting upcall */
- read_lock_bh(&sk->sk_callback_lock);
+ write_lock_bh(&sk->sk_callback_lock);
sk->sk_user_data = NULL;
sk->sk_data_ready = port->data_ready;
- read_unlock_bh(&sk->sk_callback_lock);
+ write_unlock_bh(&sk->sk_callback_lock);
if (!nvmet_tcp_try_peek_pdu(queue)) {
if (!nvmet_tcp_tls_handshake(queue))
return;
diff --git a/drivers/nvmem/imx-ocotp-ele.c b/drivers/nvmem/imx-ocotp-ele.c
index ca6dd71d8a2e..7807ec0e2d18 100644
--- a/drivers/nvmem/imx-ocotp-ele.c
+++ b/drivers/nvmem/imx-ocotp-ele.c
@@ -12,6 +12,7 @@
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/if_ether.h> /* ETH_ALEN */
enum fuse_type {
FUSE_FSB = BIT(0),
@@ -118,9 +119,11 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index,
int i;
/* Deal with some post processing of nvmem cell data */
- if (id && !strcmp(id, "mac-address"))
+ if (id && !strcmp(id, "mac-address")) {
+ bytes = min(bytes, ETH_ALEN);
for (i = 0; i < bytes / 2; i++)
swap(buf[i], buf[bytes - i - 1]);
+ }
return 0;
}
diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c
index 79dd4fda0329..7bf7656d4f96 100644
--- a/drivers/nvmem/imx-ocotp.c
+++ b/drivers/nvmem/imx-ocotp.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/if_ether.h> /* ETH_ALEN */
#define IMX_OCOTP_OFFSET_B0W0 0x400 /* Offset from base address of the
* OTP Bank0 Word0
@@ -227,9 +228,11 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index,
int i;
/* Deal with some post processing of nvmem cell data */
- if (id && !strcmp(id, "mac-address"))
+ if (id && !strcmp(id, "mac-address")) {
+ bytes = min(bytes, ETH_ALEN);
for (i = 0; i < bytes / 2; i++)
swap(buf[i], buf[bytes - i - 1]);
+ }
return 0;
}
diff --git a/drivers/nvmem/layouts/u-boot-env.c b/drivers/nvmem/layouts/u-boot-env.c
index 436426d4e8f9..8571aac56295 100644
--- a/drivers/nvmem/layouts/u-boot-env.c
+++ b/drivers/nvmem/layouts/u-boot-env.c
@@ -92,7 +92,7 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem,
size_t crc32_data_offset;
size_t crc32_data_len;
size_t crc32_offset;
- __le32 *crc32_addr;
+ uint32_t *crc32_addr;
size_t data_offset;
size_t data_len;
size_t dev_size;
@@ -143,8 +143,8 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem,
goto err_kfree;
}
- crc32_addr = (__le32 *)(buf + crc32_offset);
- crc32 = le32_to_cpu(*crc32_addr);
+ crc32_addr = (uint32_t *)(buf + crc32_offset);
+ crc32 = *crc32_addr;
crc32_data_len = dev_size - crc32_data_offset;
data_len = dev_size - data_offset;
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index b0992325dd65..b37052863847 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -64,13 +64,13 @@ int pci_host_common_init(struct platform_device *pdev,
of_pci_check_probe_only();
+ platform_set_drvdata(pdev, bridge);
+
/* Parse and map our Configuration Space windows */
cfg = gen_pci_init(dev, bridge, ops);
if (IS_ERR(cfg))
return PTR_ERR(cfg);
- platform_set_drvdata(pdev, bridge);
-
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->enable_device = ops->enable_device;
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
index cc96be450360..28b3e93d31c0 100644
--- a/drivers/pci/controller/pci-hyperv-intf.c
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/hyperv.h>
+#include <linux/export.h>
struct hyperv_pci_block_ops hvpci_block_ops;
EXPORT_SYMBOL_GPL(hvpci_block_ops);
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index ef5d655a0052..13680363ff19 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -600,7 +600,7 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data)
#define hv_msi_prepare pci_msi_prepare
/**
- * hv_arch_irq_unmask() - "Unmask" the IRQ by setting its current
+ * hv_irq_retarget_interrupt() - "Unmask" the IRQ by setting its current
* affinity.
* @data: Describes the IRQ
*
@@ -609,7 +609,7 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data)
* is built out of this PCI bus's instance GUID and the function
* number of the device.
*/
-static void hv_arch_irq_unmask(struct irq_data *data)
+static void hv_irq_retarget_interrupt(struct irq_data *data)
{
struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
struct hv_retarget_device_interrupt *params;
@@ -714,6 +714,20 @@ out:
dev_err(&hbus->hdev->device,
"%s() failed: %#llx", __func__, res);
}
+
+static void hv_arch_irq_unmask(struct irq_data *data)
+{
+ if (hv_root_partition())
+ /*
+ * In case of the nested root partition, the nested hypervisor
+ * is taking care of interrupt remapping and thus the
+ * MAP_DEVICE_INTERRUPT hypercall is required instead of
+ * RETARGET_INTERRUPT.
+ */
+ (void)hv_map_msi_interrupt(data, NULL);
+ else
+ hv_irq_retarget_interrupt(data);
+}
#elif defined(CONFIG_ARM64)
/*
* SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit
@@ -4144,6 +4158,9 @@ static int __init init_hv_pci_drv(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
+ if (hv_root_partition() && !hv_nested)
+ return -ENODEV;
+
ret = hv_pci_irqchip_init();
if (ret)
return ret;
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 77fe73976654..0380d300adca 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -187,6 +187,7 @@ struct apple_pcie {
const struct hw_info *hw;
unsigned long *bitmap;
struct list_head ports;
+ struct list_head entry;
struct completion event;
struct irq_fwspec fwspec;
u32 nvecs;
@@ -205,6 +206,9 @@ struct apple_pcie_port {
int idx;
};
+static LIST_HEAD(pcie_list);
+static DEFINE_MUTEX(pcie_list_lock);
+
static void rmw_set(u32 set, void __iomem *addr)
{
writel_relaxed(readl_relaxed(addr) | set, addr);
@@ -720,13 +724,45 @@ static int apple_msi_init(struct apple_pcie *pcie)
return 0;
}
+static void apple_pcie_register(struct apple_pcie *pcie)
+{
+ guard(mutex)(&pcie_list_lock);
+
+ list_add_tail(&pcie->entry, &pcie_list);
+}
+
+static void apple_pcie_unregister(struct apple_pcie *pcie)
+{
+ guard(mutex)(&pcie_list_lock);
+
+ list_del(&pcie->entry);
+}
+
+static struct apple_pcie *apple_pcie_lookup(struct device *dev)
+{
+ struct apple_pcie *pcie;
+
+ guard(mutex)(&pcie_list_lock);
+
+ list_for_each_entry(pcie, &pcie_list, entry) {
+ if (pcie->dev == dev)
+ return pcie;
+ }
+
+ return NULL;
+}
+
static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev)
{
struct pci_config_window *cfg = pdev->sysdata;
- struct apple_pcie *pcie = cfg->priv;
+ struct apple_pcie *pcie;
struct pci_dev *port_pdev;
struct apple_pcie_port *port;
+ pcie = apple_pcie_lookup(cfg->parent);
+ if (WARN_ON(!pcie))
+ return NULL;
+
/* Find the root port this device is on */
port_pdev = pcie_find_root_port(pdev);
@@ -806,10 +842,14 @@ static void apple_pcie_disable_device(struct pci_host_bridge *bridge, struct pci
static int apple_pcie_init(struct pci_config_window *cfg)
{
- struct apple_pcie *pcie = cfg->priv;
struct device *dev = cfg->parent;
+ struct apple_pcie *pcie;
int ret;
+ pcie = apple_pcie_lookup(dev);
+ if (WARN_ON(!pcie))
+ return -ENOENT;
+
for_each_available_child_of_node_scoped(dev->of_node, of_port) {
ret = apple_pcie_setup_port(pcie, of_port);
if (ret) {
@@ -852,13 +892,18 @@ static int apple_pcie_probe(struct platform_device *pdev)
mutex_init(&pcie->lock);
INIT_LIST_HEAD(&pcie->ports);
- dev_set_drvdata(dev, pcie);
ret = apple_msi_init(pcie);
if (ret)
return ret;
- return pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops);
+ apple_pcie_register(pcie);
+
+ ret = pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops);
+ if (ret)
+ apple_pcie_unregister(pcie);
+
+ return ret;
}
static const struct of_device_id apple_pcie_of_match[] = {
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
index 2c5e6446e00e..260b7de2dbd5 100644
--- a/drivers/pci/ecam.c
+++ b/drivers/pci/ecam.c
@@ -84,8 +84,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev,
goto err_exit_iomap;
}
- cfg->priv = dev_get_drvdata(dev);
-
if (ops->init) {
err = ops->init(cfg);
if (err)
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index ebd342bda235..91d2d92717d9 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -771,7 +771,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
u16 ignored_events = PCI_EXP_SLTSTA_DLLSC;
if (!ctrl->inband_presence_disabled)
- ignored_events |= events & PCI_EXP_SLTSTA_PDC;
+ ignored_events |= PCI_EXP_SLTSTA_PDC;
events &= ~ignored_events;
pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events);
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 6ede55a7c5e6..d686488f4111 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -934,10 +934,12 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag)
if (!pdev->msix_enabled)
return -ENXIO;
- guard(msi_descs_lock)(&pdev->dev);
virq = msi_get_virq(&pdev->dev, index);
if (!virq)
return -ENXIO;
+
+ guard(msi_descs_lock)(&pdev->dev);
+
/*
* This is a horrible hack, but short of implementing a PCI
* specific interrupt chip callback and a huge pile of
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index b78e0e417324..af370628e583 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -1676,19 +1676,24 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
return NULL;
root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops)
- goto free_ri;
+ if (!root_ops) {
+ kfree(ri);
+ return NULL;
+ }
ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg)
- goto free_root_ops;
+ if (!ri->cfg) {
+ kfree(ri);
+ kfree(root_ops);
+ return NULL;
+ }
root_ops->release_info = pci_acpi_generic_release_info;
root_ops->prepare_resources = pci_acpi_root_prepare_resources;
root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
if (!bus)
- goto free_cfg;
+ return NULL;
/* If we must preserve the resource configuration, claim now */
host = pci_find_host_bridge(bus);
@@ -1705,14 +1710,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
pcie_bus_configure_settings(child);
return bus;
-
-free_cfg:
- pci_ecam_free(ri->cfg);
-free_root_ops:
- kfree(root_ops);
-free_ri:
- kfree(ri);
- return NULL;
}
void pcibios_add_bus(struct pci_bus *bus)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e9448d55113b..9e42090fb108 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3217,14 +3217,14 @@ void pci_pm_init(struct pci_dev *dev)
/* find PCI PM capability in list */
pm = pci_find_capability(dev, PCI_CAP_ID_PM);
if (!pm)
- return;
+ goto poweron;
/* Check device's ability to generate PME# */
pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc);
if ((pmc & PCI_PM_CAP_VER_MASK) > 3) {
pci_err(dev, "unsupported PM cap regs version (%u)\n",
pmc & PCI_PM_CAP_VER_MASK);
- return;
+ goto poweron;
}
dev->pm_cap = pm;
@@ -3269,6 +3269,7 @@ void pci_pm_init(struct pci_dev *dev)
pci_read_config_word(dev, PCI_STATUS, &status);
if (status & PCI_STATUS_IMM_READY)
dev->imm_ready = 1;
+poweron:
pci_pm_power_up_and_verify_state(dev);
pm_runtime_forbid(&dev->dev);
pm_runtime_set_active(&dev->dev);
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index ee5f615a9023..4bd73f038ffb 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -254,6 +254,7 @@ bool pcie_ptm_enabled(struct pci_dev *dev)
}
EXPORT_SYMBOL(pcie_ptm_enabled);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
static ssize_t context_update_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
@@ -552,3 +553,4 @@ void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs)
debugfs_remove_recursive(ptm_debugfs->debugfs);
}
EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs);
+#endif
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4b8693ec9e4c..e6a34db77826 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2508,6 +2508,7 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
}
EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
+#if IS_ENABLED(CONFIG_PCI_PWRCTRL)
static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
{
struct pci_host_bridge *host = pci_find_host_bridge(bus);
@@ -2537,6 +2538,12 @@ static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, in
return pdev;
}
+#else
+static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
+{
+ return NULL;
+}
+#endif
/*
* Read the config data for a PCI device, sanity-check it,
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 8e2daea81666..04a5a34e7a95 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -994,7 +994,8 @@ struct phy *phy_create(struct device *dev, struct device_node *node,
}
device_initialize(&phy->dev);
- mutex_init(&phy->mutex);
+ lockdep_register_key(&phy->lockdep_key);
+ mutex_init_with_key(&phy->mutex, &phy->lockdep_key);
phy->dev.class = &phy_class;
phy->dev.parent = dev;
@@ -1259,6 +1260,8 @@ static void phy_release(struct device *dev)
dev_vdbg(dev, "releasing '%s'\n", dev_name(dev));
debugfs_remove_recursive(phy->debugfs);
regulator_put(phy->pwr);
+ mutex_destroy(&phy->mutex);
+ lockdep_unregister_key(&phy->lockdep_key);
ida_free(&phy_ida, phy->id);
kfree(phy);
}
diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c
index b73a1d7e57b3..751b6d8ba2be 100644
--- a/drivers/phy/phy-snps-eusb2.c
+++ b/drivers/phy/phy-snps-eusb2.c
@@ -567,9 +567,11 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev)
}
}
- if (IS_ERR_OR_NULL(phy->ref_clk))
- return dev_err_probe(dev, PTR_ERR(phy->ref_clk),
+ if (IS_ERR_OR_NULL(phy->ref_clk)) {
+ ret = phy->ref_clk ? PTR_ERR(phy->ref_clk) : -ENOENT;
+ return dev_err_probe(dev, ret,
"failed to get ref clk\n");
+ }
num = ARRAY_SIZE(phy->vregs);
for (i = 0; i < num; i++)
diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c
index 23a23f2d64e5..e818f6c3980e 100644
--- a/drivers/phy/tegra/xusb-tegra186.c
+++ b/drivers/phy/tegra/xusb-tegra186.c
@@ -648,14 +648,15 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl)
udelay(100);
}
- if (padctl->soc->trk_hw_mode) {
- value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL2);
- value |= USB2_TRK_HW_MODE;
+ value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL2);
+ if (padctl->soc->trk_update_on_idle)
value &= ~CYA_TRK_CODE_UPDATE_ON_IDLE;
- padctl_writel(padctl, value, XUSB_PADCTL_USB2_BIAS_PAD_CTL2);
- } else {
+ if (padctl->soc->trk_hw_mode)
+ value |= USB2_TRK_HW_MODE;
+ padctl_writel(padctl, value, XUSB_PADCTL_USB2_BIAS_PAD_CTL2);
+
+ if (!padctl->soc->trk_hw_mode)
clk_disable_unprepare(priv->usb2_trk_clk);
- }
}
static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
@@ -782,13 +783,15 @@ static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl,
}
static int tegra186_xusb_padctl_id_override(struct tegra_xusb_padctl *padctl,
- bool status)
+ struct tegra_xusb_usb2_port *port, bool status)
{
- u32 value;
+ u32 value, id_override;
+ int err = 0;
dev_dbg(padctl->dev, "%s id override\n", status ? "set" : "clear");
value = padctl_readl(padctl, USB2_VBUS_ID);
+ id_override = value & ID_OVERRIDE(~0);
if (status) {
if (value & VBUS_OVERRIDE) {
@@ -799,15 +802,35 @@ static int tegra186_xusb_padctl_id_override(struct tegra_xusb_padctl *padctl,
value = padctl_readl(padctl, USB2_VBUS_ID);
}
- value &= ~ID_OVERRIDE(~0);
- value |= ID_OVERRIDE_GROUNDED;
+ if (id_override != ID_OVERRIDE_GROUNDED) {
+ value &= ~ID_OVERRIDE(~0);
+ value |= ID_OVERRIDE_GROUNDED;
+ padctl_writel(padctl, value, USB2_VBUS_ID);
+
+ err = regulator_enable(port->supply);
+ if (err) {
+ dev_err(padctl->dev, "Failed to enable regulator: %d\n", err);
+ return err;
+ }
+ }
} else {
- value &= ~ID_OVERRIDE(~0);
- value |= ID_OVERRIDE_FLOATING;
+ if (id_override == ID_OVERRIDE_GROUNDED) {
+ /*
+ * The regulator is disabled only when the role transitions
+ * from USB_ROLE_HOST to USB_ROLE_NONE.
+ */
+ err = regulator_disable(port->supply);
+ if (err) {
+ dev_err(padctl->dev, "Failed to disable regulator: %d\n", err);
+ return err;
+ }
+
+ value &= ~ID_OVERRIDE(~0);
+ value |= ID_OVERRIDE_FLOATING;
+ padctl_writel(padctl, value, USB2_VBUS_ID);
+ }
}
- padctl_writel(padctl, value, USB2_VBUS_ID);
-
return 0;
}
@@ -826,27 +849,20 @@ static int tegra186_utmi_phy_set_mode(struct phy *phy, enum phy_mode mode,
if (mode == PHY_MODE_USB_OTG) {
if (submode == USB_ROLE_HOST) {
- tegra186_xusb_padctl_id_override(padctl, true);
-
- err = regulator_enable(port->supply);
+ err = tegra186_xusb_padctl_id_override(padctl, port, true);
+ if (err)
+ goto out;
} else if (submode == USB_ROLE_DEVICE) {
tegra186_xusb_padctl_vbus_override(padctl, true);
} else if (submode == USB_ROLE_NONE) {
- /*
- * When port is peripheral only or role transitions to
- * USB_ROLE_NONE from USB_ROLE_DEVICE, regulator is not
- * enabled.
- */
- if (regulator_is_enabled(port->supply))
- regulator_disable(port->supply);
-
- tegra186_xusb_padctl_id_override(padctl, false);
+ err = tegra186_xusb_padctl_id_override(padctl, port, false);
+ if (err)
+ goto out;
tegra186_xusb_padctl_vbus_override(padctl, false);
}
}
-
+out:
mutex_unlock(&padctl->lock);
-
return err;
}
@@ -1710,7 +1726,8 @@ const struct tegra_xusb_padctl_soc tegra234_xusb_padctl_soc = {
.num_supplies = ARRAY_SIZE(tegra194_xusb_padctl_supply_names),
.supports_gen2 = true,
.poll_trk_completed = true,
- .trk_hw_mode = true,
+ .trk_hw_mode = false,
+ .trk_update_on_idle = true,
.supports_lp_cfg_en = true,
};
EXPORT_SYMBOL_GPL(tegra234_xusb_padctl_soc);
diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h
index 6e45d194c689..d2b5f9565132 100644
--- a/drivers/phy/tegra/xusb.h
+++ b/drivers/phy/tegra/xusb.h
@@ -434,6 +434,7 @@ struct tegra_xusb_padctl_soc {
bool need_fake_usb3_port;
bool poll_trk_completed;
bool trk_hw_mode;
+ bool trk_update_on_idle;
bool supports_lp_cfg_en;
};
diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c
index 06ae1fe8b8c5..b51704bafd81 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c
@@ -1074,7 +1074,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct
u32 idx = 0;
int ret;
- for_each_gpiochip_node(dev, child) {
+ device_for_each_child_node(dev, child) {
+ if (fwnode_property_present(child, "gpio-controller"))
+ continue;
+
npctl->nfunctions++;
npctl->ngroups += of_get_child_count(to_of_node(child));
}
@@ -1092,7 +1095,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct
if (!npctl->groups)
return -ENOMEM;
- for_each_gpiochip_node(dev, child) {
+ device_for_each_child_node(dev, child) {
+ if (fwnode_property_present(child, "gpio-controller"))
+ continue;
+
ret = ma35_pinctrl_parse_functions(child, npctl, idx++);
if (ret) {
fwnode_handle_put(child);
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 5cf3db6d78b7..b3f0d02aeeb3 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -979,6 +979,17 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend
pin, is_suspend ? "suspend" : "hibernate");
}
+ /*
+ * debounce enabled over suspend has shown issues with a GPIO
+ * being unable to wake the system, as we're only interested in
+ * the actual wakeup event, clear it.
+ */
+ if (gpio_dev->saved_regs[i] & (DB_CNTRl_MASK << DB_CNTRL_OFF)) {
+ amd_gpio_set_debounce(gpio_dev, pin, 0);
+ pm_pr_dbg("Clearing debounce for GPIO #%d during %s.\n",
+ pin, is_suspend ? "suspend" : "hibernate");
+ }
+
raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
}
diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c
index 9bf53de20be8..04afb344e9e5 100644
--- a/drivers/pinctrl/pinctrl-aw9523.c
+++ b/drivers/pinctrl/pinctrl-aw9523.c
@@ -784,7 +784,7 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins)
gc->set_config = gpiochip_generic_config;
gc->parent = dev;
gc->owner = THIS_MODULE;
- gc->can_sleep = false;
+ gc->can_sleep = true;
return 0;
}
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index fe2d52e434db..8a2ef74862d3 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -374,11 +374,6 @@ static struct st_pio_control *st_get_pio_control(
}
/* Low level functions.. */
-static inline int st_gpio_bank(int gpio)
-{
- return gpio/ST_GPIO_PINS_PER_BANK;
-}
-
static inline int st_gpio_pin(int gpio)
{
return gpio%ST_GPIO_PINS_PER_BANK;
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index d6bb8f58978d..4edb20e61951 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -823,7 +823,7 @@ static struct platform_driver tb10x_pinctrl_pdrv = {
.remove = tb10x_pinctrl_remove,
.driver = {
.name = "tb10x_pinctrl",
- .of_match_table = of_match_ptr(tb10x_pinctrl_dt_ids),
+ .of_match_table = tb10x_pinctrl_dt_ids,
}
};
diff --git a/drivers/pinctrl/qcom/pinctrl-apq8064.c b/drivers/pinctrl/qcom/pinctrl-apq8064.c
index 20c3b9025044..3654913f1ae5 100644
--- a/drivers/pinctrl/qcom/pinctrl-apq8064.c
+++ b/drivers/pinctrl/qcom/pinctrl-apq8064.c
@@ -629,7 +629,6 @@ static struct platform_driver apq8064_pinctrl_driver = {
.of_match_table = apq8064_pinctrl_of_match,
},
.probe = apq8064_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init apq8064_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-apq8084.c b/drivers/pinctrl/qcom/pinctrl-apq8084.c
index 3fc0a40762b6..27693cd64881 100644
--- a/drivers/pinctrl/qcom/pinctrl-apq8084.c
+++ b/drivers/pinctrl/qcom/pinctrl-apq8084.c
@@ -1207,7 +1207,6 @@ static struct platform_driver apq8084_pinctrl_driver = {
.of_match_table = apq8084_pinctrl_of_match,
},
.probe = apq8084_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init apq8084_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
index 1f7944dd829d..6ede3149b6e1 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
@@ -710,7 +710,6 @@ static struct platform_driver ipq4019_pinctrl_driver = {
.of_match_table = ipq4019_pinctrl_of_match,
},
.probe = ipq4019_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq4019_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5018.c b/drivers/pinctrl/qcom/pinctrl-ipq5018.c
index e2951f81c3ee..10b99d5d8a11 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq5018.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq5018.c
@@ -754,7 +754,6 @@ static struct platform_driver ipq5018_pinctrl_driver = {
.of_match_table = ipq5018_pinctrl_of_match,
},
.probe = ipq5018_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq5018_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5332.c b/drivers/pinctrl/qcom/pinctrl-ipq5332.c
index 625f8014051f..1ac2fc09c119 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq5332.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq5332.c
@@ -834,7 +834,6 @@ static struct platform_driver ipq5332_pinctrl_driver = {
.of_match_table = ipq5332_pinctrl_of_match,
},
.probe = ipq5332_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq5332_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5424.c b/drivers/pinctrl/qcom/pinctrl-ipq5424.c
index 0d610b076da3..7ff1f8acc1a3 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq5424.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq5424.c
@@ -791,7 +791,6 @@ static struct platform_driver ipq5424_pinctrl_driver = {
.of_match_table = ipq5424_pinctrl_of_match,
},
.probe = ipq5424_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq5424_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq6018.c b/drivers/pinctrl/qcom/pinctrl-ipq6018.c
index 0ad08647dbcd..a4ba980252e1 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq6018.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq6018.c
@@ -1080,7 +1080,6 @@ static struct platform_driver ipq6018_pinctrl_driver = {
.of_match_table = ipq6018_pinctrl_of_match,
},
.probe = ipq6018_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq6018_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8064.c b/drivers/pinctrl/qcom/pinctrl-ipq8064.c
index e2bb94e86aef..0a9e357e64c6 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq8064.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq8064.c
@@ -631,7 +631,6 @@ static struct platform_driver ipq8064_pinctrl_driver = {
.of_match_table = ipq8064_pinctrl_of_match,
},
.probe = ipq8064_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq8064_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
index 337f3a1c92c1..482f13282fc2 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c
@@ -1041,7 +1041,6 @@ static struct platform_driver ipq8074_pinctrl_driver = {
.of_match_table = ipq8074_pinctrl_of_match,
},
.probe = ipq8074_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq8074_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-ipq9574.c b/drivers/pinctrl/qcom/pinctrl-ipq9574.c
index e2491617b236..89c05d8eb550 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq9574.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq9574.c
@@ -799,7 +799,6 @@ static struct platform_driver ipq9574_pinctrl_driver = {
.of_match_table = ipq9574_pinctrl_of_match,
},
.probe = ipq9574_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init ipq9574_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9607.c b/drivers/pinctrl/qcom/pinctrl-mdm9607.c
index e7cd3ef1cf3e..3e18ba124fed 100644
--- a/drivers/pinctrl/qcom/pinctrl-mdm9607.c
+++ b/drivers/pinctrl/qcom/pinctrl-mdm9607.c
@@ -1059,7 +1059,6 @@ static struct platform_driver mdm9607_pinctrl_driver = {
.of_match_table = mdm9607_pinctrl_of_match,
},
.probe = mdm9607_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init mdm9607_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9615.c b/drivers/pinctrl/qcom/pinctrl-mdm9615.c
index 0a2ae383d3d5..bea1ca3d1b7f 100644
--- a/drivers/pinctrl/qcom/pinctrl-mdm9615.c
+++ b/drivers/pinctrl/qcom/pinctrl-mdm9615.c
@@ -446,7 +446,6 @@ static struct platform_driver mdm9615_pinctrl_driver = {
.of_match_table = mdm9615_pinctrl_of_match,
},
.probe = mdm9615_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init mdm9615_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index f012ea88aa22..f713c80d7f3e 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -1038,6 +1038,25 @@ static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d,
test_bit(d->hwirq, pctrl->skip_wake_irqs);
}
+static void msm_gpio_irq_init_valid_mask(struct gpio_chip *gc,
+ unsigned long *valid_mask,
+ unsigned int ngpios)
+{
+ struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
+ const struct msm_pingroup *g;
+ int i;
+
+ bitmap_fill(valid_mask, ngpios);
+
+ for (i = 0; i < ngpios; i++) {
+ g = &pctrl->soc->groups[i];
+
+ if (g->intr_detection_width != 1 &&
+ g->intr_detection_width != 2)
+ clear_bit(i, valid_mask);
+ }
+}
+
static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1441,8 +1460,9 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
girq->parents[0] = pctrl->irq;
+ girq->init_valid_mask = msm_gpio_irq_init_valid_mask;
- ret = gpiochip_add_data(&pctrl->chip, pctrl);
+ ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
if (ret) {
dev_err(pctrl->dev, "Failed register gpiochip\n");
return ret;
@@ -1463,7 +1483,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
dev_name(pctrl->dev), 0, 0, chip->ngpio);
if (ret) {
dev_err(pctrl->dev, "Failed to add pin range\n");
- gpiochip_remove(&pctrl->chip);
return ret;
}
}
@@ -1599,13 +1618,5 @@ int msm_pinctrl_probe(struct platform_device *pdev,
}
EXPORT_SYMBOL(msm_pinctrl_probe);
-void msm_pinctrl_remove(struct platform_device *pdev)
-{
- struct msm_pinctrl *pctrl = platform_get_drvdata(pdev);
-
- gpiochip_remove(&pctrl->chip);
-}
-EXPORT_SYMBOL(msm_pinctrl_remove);
-
MODULE_DESCRIPTION("Qualcomm Technologies, Inc. TLMM driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h
index 63852ed70295..d7dc0947bb16 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.h
+++ b/drivers/pinctrl/qcom/pinctrl-msm.h
@@ -171,6 +171,5 @@ extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops;
int msm_pinctrl_probe(struct platform_device *pdev,
const struct msm_pinctrl_soc_data *soc_data);
-void msm_pinctrl_remove(struct platform_device *pdev);
#endif
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8226.c b/drivers/pinctrl/qcom/pinctrl-msm8226.c
index 64fee70f1772..f9a957347340 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8226.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8226.c
@@ -654,7 +654,6 @@ static struct platform_driver msm8226_pinctrl_driver = {
.of_match_table = msm8226_pinctrl_of_match,
},
.probe = msm8226_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8226_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8660.c b/drivers/pinctrl/qcom/pinctrl-msm8660.c
index 999a5f867eb5..4dbc19ffd80e 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8660.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8660.c
@@ -981,7 +981,6 @@ static struct platform_driver msm8660_pinctrl_driver = {
.of_match_table = msm8660_pinctrl_of_match,
},
.probe = msm8660_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8660_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8909.c b/drivers/pinctrl/qcom/pinctrl-msm8909.c
index 756856d20d6b..0aa4f77b774f 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8909.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8909.c
@@ -929,7 +929,6 @@ static struct platform_driver msm8909_pinctrl_driver = {
.of_match_table = msm8909_pinctrl_of_match,
},
.probe = msm8909_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8909_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8916.c b/drivers/pinctrl/qcom/pinctrl-msm8916.c
index cea5c54f92fe..0dfc6dd33d58 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8916.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8916.c
@@ -969,7 +969,6 @@ static struct platform_driver msm8916_pinctrl_driver = {
.of_match_table = msm8916_pinctrl_of_match,
},
.probe = msm8916_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8916_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8917.c b/drivers/pinctrl/qcom/pinctrl-msm8917.c
index 350636807b07..2e1a94ab18b2 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8917.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8917.c
@@ -1607,7 +1607,6 @@ static struct platform_driver msm8917_pinctrl_driver = {
.of_match_table = msm8917_pinctrl_of_match,
},
.probe = msm8917_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8917_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8953.c b/drivers/pinctrl/qcom/pinctrl-msm8953.c
index 998351bdfee1..956383341a7a 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8953.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8953.c
@@ -1816,7 +1816,6 @@ static struct platform_driver msm8953_pinctrl_driver = {
.of_match_table = msm8953_pinctrl_of_match,
},
.probe = msm8953_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8953_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8960.c b/drivers/pinctrl/qcom/pinctrl-msm8960.c
index ebe230b3b437..a937ea867de7 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8960.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8960.c
@@ -1246,7 +1246,6 @@ static struct platform_driver msm8960_pinctrl_driver = {
.of_match_table = msm8960_pinctrl_of_match,
},
.probe = msm8960_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8960_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8976.c b/drivers/pinctrl/qcom/pinctrl-msm8976.c
index c30d80e4e98c..3bcb03387781 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8976.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8976.c
@@ -1096,7 +1096,6 @@ static struct platform_driver msm8976_pinctrl_driver = {
.of_match_table = msm8976_pinctrl_of_match,
},
.probe = msm8976_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8976_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8994.c b/drivers/pinctrl/qcom/pinctrl-msm8994.c
index b1a6759ab4a5..7a3b6cbccb68 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8994.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8994.c
@@ -1343,7 +1343,6 @@ static struct platform_driver msm8994_pinctrl_driver = {
.of_match_table = msm8994_pinctrl_of_match,
},
.probe = msm8994_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8994_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8996.c b/drivers/pinctrl/qcom/pinctrl-msm8996.c
index 1b5d80eaab83..d86d83106d3b 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8996.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8996.c
@@ -1920,7 +1920,6 @@ static struct platform_driver msm8996_pinctrl_driver = {
.of_match_table = msm8996_pinctrl_of_match,
},
.probe = msm8996_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8996_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8998.c b/drivers/pinctrl/qcom/pinctrl-msm8998.c
index b7cbf32b3125..1daee815888f 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8998.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8998.c
@@ -1535,7 +1535,6 @@ static struct platform_driver msm8998_pinctrl_driver = {
.of_match_table = msm8998_pinctrl_of_match,
},
.probe = msm8998_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8998_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8x74.c b/drivers/pinctrl/qcom/pinctrl-msm8x74.c
index 238c83f6ec4f..8253aa25775b 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm8x74.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm8x74.c
@@ -1083,7 +1083,6 @@ static struct platform_driver msm8x74_pinctrl_driver = {
.of_match_table = msm8x74_pinctrl_of_match,
},
.probe = msm8x74_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init msm8x74_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c
index f885af571ec9..eeeec6434f6a 100644
--- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c
+++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c
@@ -167,6 +167,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = {
PINCTRL_PIN(62, "GPIO_62"),
PINCTRL_PIN(63, "GPIO_63"),
PINCTRL_PIN(64, "GPIO_64"),
+ PINCTRL_PIN(65, "GPIO_65"),
+ PINCTRL_PIN(66, "GPIO_66"),
+ PINCTRL_PIN(67, "GPIO_67"),
+ PINCTRL_PIN(68, "GPIO_68"),
PINCTRL_PIN(69, "GPIO_69"),
PINCTRL_PIN(70, "GPIO_70"),
PINCTRL_PIN(71, "GPIO_71"),
@@ -181,12 +185,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = {
PINCTRL_PIN(80, "GPIO_80"),
PINCTRL_PIN(81, "GPIO_81"),
PINCTRL_PIN(82, "GPIO_82"),
+ PINCTRL_PIN(83, "GPIO_83"),
+ PINCTRL_PIN(84, "GPIO_84"),
+ PINCTRL_PIN(85, "GPIO_85"),
PINCTRL_PIN(86, "GPIO_86"),
PINCTRL_PIN(87, "GPIO_87"),
PINCTRL_PIN(88, "GPIO_88"),
PINCTRL_PIN(89, "GPIO_89"),
PINCTRL_PIN(90, "GPIO_90"),
PINCTRL_PIN(91, "GPIO_91"),
+ PINCTRL_PIN(92, "GPIO_92"),
+ PINCTRL_PIN(93, "GPIO_93"),
PINCTRL_PIN(94, "GPIO_94"),
PINCTRL_PIN(95, "GPIO_95"),
PINCTRL_PIN(96, "GPIO_96"),
@@ -1125,7 +1134,6 @@ static struct platform_driver qcm2290_pinctrl_driver = {
.of_match_table = qcm2290_pinctrl_of_match,
},
.probe = qcm2290_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qcm2290_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qcs404.c b/drivers/pinctrl/qcom/pinctrl-qcs404.c
index ae7224012f8a..54e3b4435349 100644
--- a/drivers/pinctrl/qcom/pinctrl-qcs404.c
+++ b/drivers/pinctrl/qcom/pinctrl-qcs404.c
@@ -1644,7 +1644,6 @@ static struct platform_driver qcs404_pinctrl_driver = {
.of_match_table = qcs404_pinctrl_of_match,
},
.probe = qcs404_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qcs404_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c
index 17ca743c2210..2a943bc46a62 100644
--- a/drivers/pinctrl/qcom/pinctrl-qcs615.c
+++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c
@@ -1087,7 +1087,6 @@ static struct platform_driver qcs615_tlmm_driver = {
.of_match_table = qcs615_tlmm_of_match,
},
.probe = qcs615_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qcs615_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c
index 5f5f7c4ac644..d6437e26392b 100644
--- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c
+++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c
@@ -1227,7 +1227,6 @@ static struct platform_driver qcs8300_pinctrl_driver = {
.of_match_table = qcs8300_pinctrl_of_match,
},
.probe = qcs8300_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qcs8300_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
index b5808fcfb13c..9ecc4d40e4dc 100644
--- a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
+++ b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
@@ -145,7 +145,6 @@ static struct platform_driver qdf2xxx_pinctrl_driver = {
.acpi_match_table = qdf2xxx_acpi_ids,
},
.probe = qdf2xxx_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qdf2xxx_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-qdu1000.c b/drivers/pinctrl/qcom/pinctrl-qdu1000.c
index 47bc529ef550..eacb89fa3888 100644
--- a/drivers/pinctrl/qcom/pinctrl-qdu1000.c
+++ b/drivers/pinctrl/qcom/pinctrl-qdu1000.c
@@ -1248,7 +1248,6 @@ static struct platform_driver qdu1000_tlmm_driver = {
.of_match_table = qdu1000_tlmm_of_match,
},
.probe = qdu1000_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init qdu1000_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sa8775p.c b/drivers/pinctrl/qcom/pinctrl-sa8775p.c
index a5b38221aea8..1b62eb3e6620 100644
--- a/drivers/pinctrl/qcom/pinctrl-sa8775p.c
+++ b/drivers/pinctrl/qcom/pinctrl-sa8775p.c
@@ -1540,7 +1540,6 @@ static struct platform_driver sa8775p_pinctrl_driver = {
.of_match_table = sa8775p_pinctrl_of_match,
},
.probe = sa8775p_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sa8775p_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sar2130p.c b/drivers/pinctrl/qcom/pinctrl-sar2130p.c
index 19a2e37826c7..3dd1b5e5cfee 100644
--- a/drivers/pinctrl/qcom/pinctrl-sar2130p.c
+++ b/drivers/pinctrl/qcom/pinctrl-sar2130p.c
@@ -1486,7 +1486,6 @@ static struct platform_driver sar2130p_tlmm_driver = {
.of_match_table = sar2130p_tlmm_of_match,
},
.probe = sar2130p_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sar2130p_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sc7180.c b/drivers/pinctrl/qcom/pinctrl-sc7180.c
index 6eb0c73791c0..c43fe10b71ad 100644
--- a/drivers/pinctrl/qcom/pinctrl-sc7180.c
+++ b/drivers/pinctrl/qcom/pinctrl-sc7180.c
@@ -1159,7 +1159,6 @@ static struct platform_driver sc7180_pinctrl_driver = {
.of_match_table = sc7180_pinctrl_of_match,
},
.probe = sc7180_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sc7180_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sc7280.c b/drivers/pinctrl/qcom/pinctrl-sc7280.c
index 0c10eeb60b55..1b070e9d41f5 100644
--- a/drivers/pinctrl/qcom/pinctrl-sc7280.c
+++ b/drivers/pinctrl/qcom/pinctrl-sc7280.c
@@ -1505,7 +1505,6 @@ static struct platform_driver sc7280_pinctrl_driver = {
.of_match_table = sc7280_pinctrl_of_match,
},
.probe = sc7280_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sc7280_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
index d6a79ad41a40..26dd165d1543 100644
--- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c
+++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
@@ -1720,7 +1720,6 @@ static struct platform_driver sc8180x_pinctrl_driver = {
.acpi_match_table = sc8180x_pinctrl_acpi_match,
},
.probe = sc8180x_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sc8180x_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c
index 96f4fb5a5d29..6ccd7e5648d4 100644
--- a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c
+++ b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c
@@ -1926,7 +1926,6 @@ static struct platform_driver sc8280xp_pinctrl_driver = {
.of_match_table = sc8280xp_pinctrl_of_match,
},
.probe = sc8280xp_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sc8280xp_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660.c b/drivers/pinctrl/qcom/pinctrl-sdm660.c
index 907e4ffca5e7..1a78288f1bc8 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm660.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm660.c
@@ -1442,7 +1442,6 @@ static struct platform_driver sdm660_pinctrl_driver = {
.of_match_table = sdm660_pinctrl_of_match,
},
.probe = sdm660_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdm660_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm670.c b/drivers/pinctrl/qcom/pinctrl-sdm670.c
index c76183ba95e1..0fe1fa94cd6d 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm670.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm670.c
@@ -1337,7 +1337,6 @@ static struct platform_driver sdm670_pinctrl_driver = {
.of_match_table = sdm670_pinctrl_of_match,
},
.probe = sdm670_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdm670_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c
index cc05c415ed15..0446e291aa48 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm845.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c
@@ -1351,7 +1351,6 @@ static struct platform_driver sdm845_pinctrl_driver = {
.acpi_match_table = ACPI_PTR(sdm845_pinctrl_acpi_match),
},
.probe = sdm845_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdm845_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdx55.c b/drivers/pinctrl/qcom/pinctrl-sdx55.c
index 8826db9d21d0..2c17bf889146 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdx55.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdx55.c
@@ -990,7 +990,6 @@ static struct platform_driver sdx55_pinctrl_driver = {
.of_match_table = sdx55_pinctrl_of_match,
},
.probe = sdx55_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdx55_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdx65.c b/drivers/pinctrl/qcom/pinctrl-sdx65.c
index f6f319c997fc..85b5c0206dbd 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdx65.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdx65.c
@@ -939,7 +939,6 @@ static struct platform_driver sdx65_pinctrl_driver = {
.of_match_table = sdx65_pinctrl_of_match,
},
.probe = sdx65_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdx65_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdx75.c b/drivers/pinctrl/qcom/pinctrl-sdx75.c
index 3cfe8c7f04df..ab13a3a57a83 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdx75.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdx75.c
@@ -1124,7 +1124,6 @@ static struct platform_driver sdx75_pinctrl_driver = {
.of_match_table = sdx75_pinctrl_of_match,
},
.probe = sdx75_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sdx75_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm4450.c b/drivers/pinctrl/qcom/pinctrl-sm4450.c
index 622f20e6f6f8..1ecdf1ab4f27 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm4450.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm4450.c
@@ -994,7 +994,6 @@ static struct platform_driver sm4450_tlmm_driver = {
.of_match_table = sm4450_tlmm_of_match,
},
.probe = sm4450_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
MODULE_DEVICE_TABLE(of, sm4450_tlmm_of_match);
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6115.c b/drivers/pinctrl/qcom/pinctrl-sm6115.c
index 4e91c75ad952..c273efa43996 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm6115.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm6115.c
@@ -907,7 +907,6 @@ static struct platform_driver sm6115_tlmm_driver = {
.of_match_table = sm6115_tlmm_of_match,
},
.probe = sm6115_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm6115_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6125.c b/drivers/pinctrl/qcom/pinctrl-sm6125.c
index c188842047aa..5092f20e0c1b 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm6125.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm6125.c
@@ -1266,7 +1266,6 @@ static struct platform_driver sm6125_tlmm_driver = {
.of_match_table = sm6125_tlmm_of_match,
},
.probe = sm6125_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm6125_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c
index f3828c07b134..ba4686c86c54 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm6350.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c
@@ -1373,7 +1373,6 @@ static struct platform_driver sm6350_tlmm_driver = {
.of_match_table = sm6350_tlmm_of_match,
},
.probe = sm6350_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm6350_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6375.c b/drivers/pinctrl/qcom/pinctrl-sm6375.c
index c82c8516932e..49031571e65e 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm6375.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm6375.c
@@ -1516,7 +1516,6 @@ static struct platform_driver sm6375_tlmm_driver = {
.of_match_table = sm6375_tlmm_of_match,
},
.probe = sm6375_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm6375_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm7150.c b/drivers/pinctrl/qcom/pinctrl-sm7150.c
index 3c7fd8af6635..6e89966cd70e 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm7150.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm7150.c
@@ -1255,7 +1255,6 @@ static struct platform_driver sm7150_tlmm_driver = {
.of_match_table = sm7150_tlmm_of_match,
},
.probe = sm7150_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm7150_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c
index 01aea9c70b7a..794ed99463f7 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8150.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c
@@ -1542,7 +1542,6 @@ static struct platform_driver sm8150_pinctrl_driver = {
.of_match_table = sm8150_pinctrl_of_match,
},
.probe = sm8150_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8150_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c
index e9961a49ff98..fb6f005d64f5 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8250.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c
@@ -1351,7 +1351,6 @@ static struct platform_driver sm8250_pinctrl_driver = {
.of_match_table = sm8250_pinctrl_of_match,
},
.probe = sm8250_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8250_pinctrl_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c
index 9c69458bd910..c8a3f39ce6f1 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8350.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c
@@ -1642,7 +1642,6 @@ static struct platform_driver sm8350_tlmm_driver = {
.of_match_table = sm8350_tlmm_of_match,
},
.probe = sm8350_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8350_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450.c b/drivers/pinctrl/qcom/pinctrl-sm8450.c
index d11bb1ee9e3d..f2e52d5a0f93 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8450.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8450.c
@@ -1677,7 +1677,6 @@ static struct platform_driver sm8450_tlmm_driver = {
.of_match_table = sm8450_tlmm_of_match,
},
.probe = sm8450_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8450_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8550.c b/drivers/pinctrl/qcom/pinctrl-sm8550.c
index 3c847d9cb5d9..1b4496cb39eb 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8550.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8550.c
@@ -1762,7 +1762,6 @@ static struct platform_driver sm8550_tlmm_driver = {
.of_match_table = sm8550_tlmm_of_match,
},
.probe = sm8550_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8550_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8650.c b/drivers/pinctrl/qcom/pinctrl-sm8650.c
index 104708252d12..449a0077f4b1 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8650.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8650.c
@@ -1742,7 +1742,6 @@ static struct platform_driver sm8650_tlmm_driver = {
.of_match_table = sm8650_tlmm_of_match,
},
.probe = sm8650_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8650_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c
index b94fb4ee0ec3..8516693d1db5 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8750.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c
@@ -1711,7 +1711,6 @@ static struct platform_driver sm8750_tlmm_driver = {
.of_match_table = sm8750_tlmm_of_match,
},
.probe = sm8750_tlmm_probe,
- .remove = msm_pinctrl_remove,
};
static int __init sm8750_tlmm_init(void)
diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c
index 419cb8facb2f..d4b215f34c39 100644
--- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c
+++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c
@@ -1861,7 +1861,6 @@ static struct platform_driver x1e80100_pinctrl_driver = {
.of_match_table = x1e80100_pinctrl_of_match,
},
.probe = x1e80100_pinctrl_probe,
- .remove = msm_pinctrl_remove,
};
static int __init x1e80100_pinctrl_init(void)
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c
index 1833078f6877..4e34b0cd3b73 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c
@@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev,
*/
static int prepare_function_table(struct device *dev, struct device_node *pnode,
struct sunxi_desc_pin *pins, int npins,
- const u8 *irq_bank_muxes)
+ unsigned pin_base, const u8 *irq_bank_muxes)
{
struct device_node *node;
struct property *prop;
@@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode,
*/
for (i = 0; i < npins; i++) {
struct sunxi_desc_pin *pin = &pins[i];
- int bank = pin->pin.number / PINS_PER_BANK;
+ int bank = (pin->pin.number - pin_base) / PINS_PER_BANK;
if (irq_bank_muxes[bank]) {
pin->variant++;
@@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode,
last_bank = 0;
for (i = 0; i < npins; i++) {
struct sunxi_desc_pin *pin = &pins[i];
- int bank = pin->pin.number / PINS_PER_BANK;
+ int bank = (pin->pin.number - pin_base) / PINS_PER_BANK;
int lastfunc = pin->variant + 1;
int irq_mux = irq_bank_muxes[bank];
@@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev,
return PTR_ERR(pins);
ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins,
- irq_bank_muxes);
+ desc->pin_base, irq_bank_muxes);
if (ret)
return ret;
diff --git a/drivers/platform/arm64/huawei-gaokun-ec.c b/drivers/platform/arm64/huawei-gaokun-ec.c
index 7e5aa7ca2403..7170f8eb76f7 100644
--- a/drivers/platform/arm64/huawei-gaokun-ec.c
+++ b/drivers/platform/arm64/huawei-gaokun-ec.c
@@ -662,6 +662,7 @@ static void gaokun_aux_release(struct device *dev)
{
struct auxiliary_device *adev = to_auxiliary_dev(dev);
+ of_node_put(dev->of_node);
kfree(adev);
}
@@ -693,6 +694,7 @@ static int gaokun_aux_init(struct device *parent, const char *name,
ret = auxiliary_device_init(adev);
if (ret) {
+ of_node_put(adev->dev.of_node);
kfree(adev);
return ret;
}
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index 900069eb186e..4776013e0764 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -15,6 +15,7 @@
#include <linux/hwmon.h>
#include <linux/platform_device.h>
#include <linux/string.h>
+#include <linux/string_helpers.h>
#include <uapi/linux/psci.h>
#define MLXBF_PMC_WRITE_REG_32 0x82000009
@@ -715,7 +716,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_llt_events[] = {
{101, "GDC_BANK0_HIT_DCL_PARTIAL"},
{102, "GDC_BANK0_EVICT_DCL"},
{103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA0"},
- {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"},
+ {104, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"},
{105, "GDC_BANK0_ARB_STRB"},
{106, "GDC_BANK0_ARB_WAIT"},
{107, "GDC_BANK0_GGA_STRB"},
@@ -1222,7 +1223,7 @@ static int mlxbf_pmc_get_event_num(const char *blk, const char *evt)
return -ENODEV;
}
-/* Get the event number given the name */
+/* Get the event name given the number */
static char *mlxbf_pmc_get_event_name(const char *blk, u32 evt)
{
const struct mlxbf_pmc_events *events;
@@ -1784,6 +1785,7 @@ static ssize_t mlxbf_pmc_event_store(struct device *dev,
attr, struct mlxbf_pmc_attribute, dev_attr);
unsigned int blk_num, cnt_num;
bool is_l3 = false;
+ char *evt_name;
int evt_num;
int err;
@@ -1791,14 +1793,23 @@ static ssize_t mlxbf_pmc_event_store(struct device *dev,
cnt_num = attr_event->index;
if (isalpha(buf[0])) {
+ /* Remove the trailing newline character if present */
+ evt_name = kstrdup_and_replace(buf, '\n', '\0', GFP_KERNEL);
+ if (!evt_name)
+ return -ENOMEM;
+
evt_num = mlxbf_pmc_get_event_num(pmc->block_name[blk_num],
- buf);
+ evt_name);
+ kfree(evt_name);
if (evt_num < 0)
return -EINVAL;
} else {
err = kstrtouint(buf, 0, &evt_num);
if (err < 0)
return err;
+
+ if (!mlxbf_pmc_get_event_name(pmc->block_name[blk_num], evt_num))
+ return -EINVAL;
}
if (strstr(pmc->block_name[blk_num], "l3cache"))
@@ -1879,13 +1890,14 @@ static ssize_t mlxbf_pmc_enable_store(struct device *dev,
{
struct mlxbf_pmc_attribute *attr_enable = container_of(
attr, struct mlxbf_pmc_attribute, dev_attr);
- unsigned int en, blk_num;
+ unsigned int blk_num;
u32 word;
int err;
+ bool en;
blk_num = attr_enable->nr;
- err = kstrtouint(buf, 0, &en);
+ err = kstrtobool(buf, &en);
if (err < 0)
return err;
@@ -1905,14 +1917,11 @@ static ssize_t mlxbf_pmc_enable_store(struct device *dev,
MLXBF_PMC_CRSPACE_PERFMON_CTL(pmc->block[blk_num].counters),
MLXBF_PMC_WRITE_REG_32, word);
} else {
- if (en && en != 1)
- return -EINVAL;
-
err = mlxbf_pmc_config_l3_counters(blk_num, false, !!en);
if (err)
return err;
- if (en == 1) {
+ if (en) {
err = mlxbf_pmc_config_l3_counters(blk_num, true, false);
if (err)
return err;
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index aae99adb29eb..14aa87b39be5 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -281,7 +281,8 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
vring->align = SMP_CACHE_BYTES;
vring->index = i;
vring->vdev_id = tm_vdev->vdev.id.device;
- vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN;
+ vring->drop_desc.len = cpu_to_virtio32(&tm_vdev->vdev,
+ VRING_DROP_DESC_MAX_LEN);
dev = &tm_vdev->vdev.dev;
size = vring_size(vring->num, vring->align);
@@ -1287,7 +1288,7 @@ static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
}
-/* Set TmFifo thresolds which is used to trigger interrupts. */
+/* Set TmFifo thresholds which is used to trigger interrupts. */
static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
{
u64 ctl;
diff --git a/drivers/platform/mellanox/mlxreg-dpu.c b/drivers/platform/mellanox/mlxreg-dpu.c
index 52260106a9f1..39f89c47144a 100644
--- a/drivers/platform/mellanox/mlxreg-dpu.c
+++ b/drivers/platform/mellanox/mlxreg-dpu.c
@@ -483,7 +483,7 @@ static int mlxreg_dpu_config_init(struct mlxreg_dpu *mlxreg_dpu, void *regmap,
mlxreg_dpu->io_data,
sizeof(*mlxreg_dpu->io_data));
if (IS_ERR(mlxreg_dpu->io_regs)) {
- dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n",
+ dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n",
data->hpdev.brdinfo->type, data->hpdev.nr,
data->hpdev.brdinfo->addr);
return PTR_ERR(mlxreg_dpu->io_regs);
diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c
index aee395bb48ae..d1518598dfed 100644
--- a/drivers/platform/mellanox/mlxreg-lc.c
+++ b/drivers/platform/mellanox/mlxreg-lc.c
@@ -57,9 +57,9 @@ enum mlxreg_lc_state {
* @dev: platform device;
* @lock: line card lock;
* @par_regmap: parent device regmap handle;
- * @data: pltaform core data;
+ * @data: platform core data;
* @io_data: register access platform data;
- * @led_data: LED platform data ;
+ * @led_data: LED platform data;
* @mux_data: MUX platform data;
* @led: LED device;
* @io_regs: register access device;
@@ -171,7 +171,7 @@ static int mlxreg_lc_chan[] = {
0x4e, 0x4f
};
-/* Defaul mux configuration. */
+/* Default mux configuration. */
static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = {
{
.chan_ids = mlxreg_lc_chan,
@@ -181,7 +181,7 @@ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = {
},
};
-/* Defaul mux board info. */
+/* Default mux board info. */
static struct i2c_board_info mlxreg_lc_mux_brdinfo = {
I2C_BOARD_INFO("i2c-mux-mlxcpld", 0x32),
};
@@ -688,7 +688,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent,
if (regval & mlxreg_lc->data->mask) {
mlxreg_lc->state |= MLXREG_LC_SYNCED;
mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_SYNCED, 1);
- if (mlxreg_lc->state & ~MLXREG_LC_POWERED) {
+ if (!(mlxreg_lc->state & MLXREG_LC_POWERED)) {
err = mlxreg_lc_power_on_off(mlxreg_lc, 1);
if (err)
goto mlxreg_lc_regmap_power_on_off_fail;
@@ -758,7 +758,7 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap,
platform_device_register_resndata(dev, "mlxreg-io", data->hpdev.nr, NULL, 0,
mlxreg_lc->io_data, sizeof(*mlxreg_lc->io_data));
if (IS_ERR(mlxreg_lc->io_regs)) {
- dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n",
+ dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n",
data->hpdev.brdinfo->type, data->hpdev.nr,
data->hpdev.brdinfo->addr);
err = PTR_ERR(mlxreg_lc->io_regs);
diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c
index db31c8bf2255..51504113c17e 100644
--- a/drivers/platform/mellanox/nvsw-sn2201.c
+++ b/drivers/platform/mellanox/nvsw-sn2201.c
@@ -1181,7 +1181,7 @@ static int nvsw_sn2201_i2c_completion_notify(void *handle, int id)
if (!nvsw_sn2201->main_mux_devs->adapter) {
err = -ENODEV;
dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n",
- nvsw_sn2201->cpld_devs->nr);
+ nvsw_sn2201->main_mux_devs->nr);
goto i2c_get_adapter_main_fail;
}
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index abbc2644ff6d..bea87a85ae75 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -58,6 +58,8 @@ obj-$(CONFIG_X86_PLATFORM_DRIVERS_HP) += hp/
# Hewlett Packard Enterprise
obj-$(CONFIG_UV_SYSFS) += uv_sysfs.o
+obj-$(CONFIG_FW_ATTR_CLASS) += firmware_attributes_class.o
+
# IBM Thinkpad and Lenovo
obj-$(CONFIG_IBM_RTL) += ibm_rtl.o
obj-$(CONFIG_IDEAPAD_LAPTOP) += ideapad-laptop.o
@@ -128,7 +130,6 @@ obj-$(CONFIG_SYSTEM76_ACPI) += system76_acpi.o
obj-$(CONFIG_TOPSTAR_LAPTOP) += topstar-laptop.o
# Platform drivers
-obj-$(CONFIG_FW_ATTR_CLASS) += firmware_attributes_class.o
obj-$(CONFIG_SERIAL_MULTI_INSTANTIATE) += serial-multi-instantiate.o
obj-$(CONFIG_TOUCHSCREEN_DMI) += touchscreen_dmi.o
obj-$(CONFIG_WIRELESS_HOTKEY) += wireless-hotkey.o
diff --git a/drivers/platform/x86/amd/amd_isp4.c b/drivers/platform/x86/amd/amd_isp4.c
index 0cc01441bcbb..0d494899502c 100644
--- a/drivers/platform/x86/amd/amd_isp4.c
+++ b/drivers/platform/x86/amd/amd_isp4.c
@@ -11,6 +11,7 @@
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/property.h>
+#include <linux/soc/amd/isp4_misc.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/units.h>
@@ -20,6 +21,9 @@
#define AMDISP_OV05C10_REMOTE_EP_NAME "ov05c10_isp_4_1_1"
#define AMD_ISP_PLAT_DRV_NAME "amd-isp4"
+static const struct software_node isp4_mipi1_endpoint_node;
+static const struct software_node ov05c10_endpoint_node;
+
/*
* AMD ISP platform info definition to initialize sensor
* specific platform configuration to prepare the amdisp
@@ -42,55 +46,116 @@ struct amdisp_platform {
struct mutex lock; /* protects i2c client creation */
};
-/* Top-level OV05C10 camera node property table */
+/* Root AMD CAMERA SWNODE */
+
+/* Root amd camera node definition */
+static const struct software_node amd_camera_node = {
+ .name = "amd_camera",
+};
+
+/* ISP4 SWNODE */
+
+/* ISP4 OV05C10 camera node definition */
+static const struct software_node isp4_node = {
+ .name = "isp4",
+ .parent = &amd_camera_node,
+};
+
+/*
+ * ISP4 Ports node definition. No properties defined for
+ * ports node.
+ */
+static const struct software_node isp4_ports = {
+ .name = "ports",
+ .parent = &isp4_node,
+};
+
+/*
+ * ISP4 Port node definition. No properties defined for
+ * port node.
+ */
+static const struct software_node isp4_port_node = {
+ .name = "port@0",
+ .parent = &isp4_ports,
+};
+
+/*
+ * ISP4 MIPI1 remote endpoint points to OV05C10 endpoint
+ * node.
+ */
+static const struct software_node_ref_args isp4_refs[] = {
+ SOFTWARE_NODE_REFERENCE(&ov05c10_endpoint_node),
+};
+
+/* ISP4 MIPI1 endpoint node properties table */
+static const struct property_entry isp4_mipi1_endpoint_props[] = {
+ PROPERTY_ENTRY_REF_ARRAY("remote-endpoint", isp4_refs),
+ { }
+};
+
+/* ISP4 MIPI1 endpoint node definition */
+static const struct software_node isp4_mipi1_endpoint_node = {
+ .name = "endpoint",
+ .parent = &isp4_port_node,
+ .properties = isp4_mipi1_endpoint_props,
+};
+
+/* I2C1 SWNODE */
+
+/* I2C1 camera node property table */
+static const struct property_entry i2c1_camera_props[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 1 * HZ_PER_MHZ),
+ { }
+};
+
+/* I2C1 camera node definition */
+static const struct software_node i2c1_node = {
+ .name = "i2c1",
+ .parent = &amd_camera_node,
+ .properties = i2c1_camera_props,
+};
+
+/* I2C1 camera node property table */
static const struct property_entry ov05c10_camera_props[] = {
PROPERTY_ENTRY_U32("clock-frequency", 24 * HZ_PER_MHZ),
{ }
};
-/* Root AMD ISP OV05C10 camera node definition */
-static const struct software_node camera_node = {
+/* OV05C10 camera node definition */
+static const struct software_node ov05c10_camera_node = {
.name = AMDISP_OV05C10_HID,
+ .parent = &i2c1_node,
.properties = ov05c10_camera_props,
};
/*
- * AMD ISP OV05C10 Ports node definition. No properties defined for
+ * OV05C10 Ports node definition. No properties defined for
* ports node for OV05C10.
*/
-static const struct software_node ports = {
+static const struct software_node ov05c10_ports = {
.name = "ports",
- .parent = &camera_node,
-};
-
-/*
- * AMD ISP OV05C10 Port node definition. No properties defined for
- * port node for OV05C10.
- */
-static const struct software_node port_node = {
- .name = "port@",
- .parent = &ports,
+ .parent = &ov05c10_camera_node,
};
/*
- * Remote endpoint AMD ISP node definition. No properties defined for
- * remote endpoint node for OV05C10.
+ * OV05C10 Port node definition.
*/
-static const struct software_node remote_ep_isp_node = {
- .name = AMDISP_OV05C10_REMOTE_EP_NAME,
+static const struct software_node ov05c10_port_node = {
+ .name = "port@0",
+ .parent = &ov05c10_ports,
};
/*
- * Remote endpoint reference for isp node included in the
- * OV05C10 endpoint.
+ * OV05C10 remote endpoint points to ISP4 MIPI1 endpoint
+ * node.
*/
static const struct software_node_ref_args ov05c10_refs[] = {
- SOFTWARE_NODE_REFERENCE(&remote_ep_isp_node),
+ SOFTWARE_NODE_REFERENCE(&isp4_mipi1_endpoint_node),
};
/* OV05C10 supports one single link frequency */
static const u64 ov05c10_link_freqs[] = {
- 925 * HZ_PER_MHZ,
+ 900 * HZ_PER_MHZ,
};
/* OV05C10 supports only 2-lane configuration */
@@ -110,27 +175,64 @@ static const struct property_entry ov05c10_endpoint_props[] = {
{ }
};
-/* AMD ISP endpoint node definition */
-static const struct software_node endpoint_node = {
+/* OV05C10 endpoint node definition */
+static const struct software_node ov05c10_endpoint_node = {
.name = "endpoint",
- .parent = &port_node,
+ .parent = &ov05c10_port_node,
.properties = ov05c10_endpoint_props,
};
/*
- * AMD ISP swnode graph uses 5 nodes and also its relationship is
- * fixed to align with the structure that v4l2 expects for successful
- * endpoint fwnode parsing.
+ * AMD Camera swnode graph uses 10 nodes and also its relationship is
+ * fixed to align with the structure that v4l2 and i2c frameworks expects
+ * for successful parsing of fwnodes and its properties with standard names.
*
* It is only the node property_entries that will vary for each platform
* supporting different sensor modules.
+ *
+ * AMD ISP4 SWNODE GRAPH Structure
+ *
+ * amd_camera {
+ * isp4 {
+ * ports {
+ * port@0 {
+ * isp4_mipi1_ep: endpoint {
+ * remote-endpoint = &OMNI5C10_ep;
+ * };
+ * };
+ * };
+ * };
+ *
+ * i2c1 {
+ * clock-frequency = 1 MHz;
+ * OMNI5C10 {
+ * clock-frequency = 24MHz;
+ * ports {
+ * port@0 {
+ * OMNI5C10_ep: endpoint {
+ * bus-type = 4;
+ * data-lanes = <1 2>;
+ * link-frequencies = 900MHz;
+ * remote-endpoint = &isp4_mipi1;
+ * };
+ * };
+ * };
+ * };
+ * };
+ * };
+ *
*/
-static const struct software_node *ov05c10_nodes[] = {
- &camera_node,
- &ports,
- &port_node,
- &endpoint_node,
- &remote_ep_isp_node,
+static const struct software_node *amd_isp4_nodes[] = {
+ &amd_camera_node,
+ &isp4_node,
+ &isp4_ports,
+ &isp4_port_node,
+ &isp4_mipi1_endpoint_node,
+ &i2c1_node,
+ &ov05c10_camera_node,
+ &ov05c10_ports,
+ &ov05c10_port_node,
+ &ov05c10_endpoint_node,
NULL
};
@@ -140,7 +242,7 @@ static const struct amdisp_platform_info ov05c10_platform_config = {
.dev_name = "ov05c10",
I2C_BOARD_INFO("ov05c10", AMDISP_OV05C10_I2C_ADDR),
},
- .swnodes = ov05c10_nodes,
+ .swnodes = amd_isp4_nodes,
};
static const struct acpi_device_id amdisp_sensor_ids[] = {
@@ -151,7 +253,7 @@ MODULE_DEVICE_TABLE(acpi, amdisp_sensor_ids);
static inline bool is_isp_i2c_adapter(struct i2c_adapter *adap)
{
- return !strcmp(adap->owner->name, "i2c_designware_amdisp");
+ return !strcmp(adap->name, AMDISP_I2C_ADAP_NAME);
}
static void instantiate_isp_i2c_client(struct amdisp_platform *isp4_platform,
@@ -232,7 +334,8 @@ static struct amdisp_platform *prepare_amdisp_platform(struct device *dev,
if (ret)
return ERR_PTR(ret);
- isp4_platform->board_info.swnode = src->swnodes[0];
+ /* initialize ov05c10_camera_node */
+ isp4_platform->board_info.swnode = src->swnodes[6];
return isp4_platform;
}
@@ -257,6 +360,7 @@ static int amd_isp_probe(struct platform_device *pdev)
{
const struct amdisp_platform_info *pinfo;
struct amdisp_platform *isp4_platform;
+ struct acpi_device *adev;
int ret;
pinfo = device_get_match_data(&pdev->dev);
@@ -274,6 +378,10 @@ static int amd_isp_probe(struct platform_device *pdev)
if (ret)
goto error_unregister_sw_node;
+ adev = ACPI_COMPANION(&pdev->dev);
+ /* initialize root amd_camera_node */
+ adev->driver_data = (void *)pinfo->swnodes[0];
+
/* check if adapter is already registered and create i2c client instance */
i2c_for_each_dev(isp4_platform, try_to_instantiate_i2c_client);
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c
index 538b36b97095..885e2f8136fd 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -97,7 +97,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms
short_sleep = jiffies + msecs_to_jiffies(HSMP_SHORT_SLEEP);
timeout = jiffies + msecs_to_jiffies(HSMP_MSG_TIMEOUT);
- while (time_before(jiffies, timeout)) {
+ while (true) {
ret = sock->amd_hsmp_rdwr(sock, mbinfo->msg_resp_off, &mbox_status, HSMP_RD);
if (ret) {
dev_err(sock->dev, "Error %d reading mailbox status\n", ret);
@@ -106,6 +106,10 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms
if (mbox_status != HSMP_STATUS_NOT_READY)
break;
+
+ if (!time_before(jiffies, timeout))
+ break;
+
if (time_before(jiffies, short_sleep))
usleep_range(50, 100);
else
@@ -210,13 +214,7 @@ int hsmp_send_message(struct hsmp_message *msg)
return -ENODEV;
sock = &hsmp_pdev.sock[msg->sock_ind];
- /*
- * The time taken by smu operation to complete is between
- * 10us to 1ms. Sometime it may take more time.
- * In SMP system timeout of 100 millisecs should
- * be enough for the previous thread to finish the operation
- */
- ret = down_timeout(&sock->hsmp_sem, msecs_to_jiffies(HSMP_MSG_TIMEOUT));
+ ret = down_interruptible(&sock->hsmp_sem);
if (ret < 0)
return ret;
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index 5c7c01f66cde..131f10b68308 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -11,7 +11,7 @@
#include <linux/dmi.h>
#include <linux/io.h>
#include <linux/ioport.h>
-#include <asm/amd/fch.h>
+#include <linux/platform_data/x86/amd-fch.h>
#include "pmc.h"
@@ -225,6 +225,15 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"),
}
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */
+ {
+ .ident = "PCSpecialist Lafite Pro V 14M",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lafite Pro V 14M"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index 37c7a57afee5..0b9b23eb7c2c 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -157,6 +157,8 @@ static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
return -ENOMEM;
}
+ memset_io(dev->smu_virt_addr, 0, sizeof(struct smu_metrics));
+
/* Start the logging */
amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_RESET, false);
amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, false);
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index 76910601cac8..ef988605c4da 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -280,7 +280,7 @@ int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer)
dev_err(dev->dev, "Invalid CPU id: 0x%x", dev->cpu_id);
}
- dev->buf = kzalloc(dev->mtable_size, GFP_KERNEL);
+ dev->buf = devm_kzalloc(dev->dev, dev->mtable_size, GFP_KERNEL);
if (!dev->buf)
return -ENOMEM;
}
@@ -493,7 +493,6 @@ static void amd_pmf_remove(struct platform_device *pdev)
mutex_destroy(&dev->lock);
mutex_destroy(&dev->update_mutex);
mutex_destroy(&dev->cb_mutex);
- kfree(dev->buf);
}
static const struct attribute_group *amd_pmf_driver_groups[] = {
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index d3bd12ad036a..4f626ebcb619 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -358,30 +358,28 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf,
return -EINVAL;
/* re-alloc to the new buffer length of the policy binary */
- new_policy_buf = memdup_user(buf, length);
- if (IS_ERR(new_policy_buf))
- return PTR_ERR(new_policy_buf);
+ new_policy_buf = devm_kzalloc(dev->dev, length, GFP_KERNEL);
+ if (!new_policy_buf)
+ return -ENOMEM;
+
+ if (copy_from_user(new_policy_buf, buf, length)) {
+ devm_kfree(dev->dev, new_policy_buf);
+ return -EFAULT;
+ }
- kfree(dev->policy_buf);
+ devm_kfree(dev->dev, dev->policy_buf);
dev->policy_buf = new_policy_buf;
dev->policy_sz = length;
- if (!amd_pmf_pb_valid(dev)) {
- ret = -EINVAL;
- goto cleanup;
- }
+ if (!amd_pmf_pb_valid(dev))
+ return -EINVAL;
amd_pmf_hex_dump_pb(dev);
ret = amd_pmf_start_policy_engine(dev);
if (ret < 0)
- goto cleanup;
+ return ret;
return length;
-
-cleanup:
- kfree(dev->policy_buf);
- dev->policy_buf = NULL;
- return ret;
}
static const struct file_operations pb_fops = {
@@ -422,12 +420,12 @@ static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_
rc = tee_client_open_session(ctx, &sess_arg, NULL);
if (rc < 0 || sess_arg.ret != 0) {
pr_err("Failed to open TEE session err:%#x, rc:%d\n", sess_arg.ret, rc);
- return rc;
+ return rc ?: -EINVAL;
}
*id = sess_arg.session;
- return rc;
+ return 0;
}
static int amd_pmf_register_input_device(struct amd_pmf_dev *dev)
@@ -462,7 +460,9 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid)
dev->tee_ctx = tee_client_open_context(NULL, amd_pmf_amdtee_ta_match, NULL, NULL);
if (IS_ERR(dev->tee_ctx)) {
dev_err(dev->dev, "Failed to open TEE context\n");
- return PTR_ERR(dev->tee_ctx);
+ ret = PTR_ERR(dev->tee_ctx);
+ dev->tee_ctx = NULL;
+ return ret;
}
ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid);
@@ -502,9 +502,12 @@ out_ctx:
static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
{
+ if (!dev->tee_ctx)
+ return;
tee_shm_free(dev->fw_shm_pool);
tee_client_close_session(dev->tee_ctx, dev->session_id);
tee_client_close_context(dev->tee_ctx);
+ dev->tee_ctx = NULL;
}
int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
@@ -527,64 +530,45 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
ret = amd_pmf_set_dram_addr(dev, true);
if (ret)
- goto err_cancel_work;
+ return ret;
dev->policy_base = devm_ioremap_resource(dev->dev, dev->res);
- if (IS_ERR(dev->policy_base)) {
- ret = PTR_ERR(dev->policy_base);
- goto err_free_dram_buf;
- }
+ if (IS_ERR(dev->policy_base))
+ return PTR_ERR(dev->policy_base);
- dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
- if (!dev->policy_buf) {
- ret = -ENOMEM;
- goto err_free_dram_buf;
- }
+ dev->policy_buf = devm_kzalloc(dev->dev, dev->policy_sz, GFP_KERNEL);
+ if (!dev->policy_buf)
+ return -ENOMEM;
memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz);
if (!amd_pmf_pb_valid(dev)) {
dev_info(dev->dev, "No Smart PC policy present\n");
- ret = -EINVAL;
- goto err_free_policy;
+ return -EINVAL;
}
amd_pmf_hex_dump_pb(dev);
- dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
- if (!dev->prev_data) {
- ret = -ENOMEM;
- goto err_free_policy;
- }
+ dev->prev_data = devm_kzalloc(dev->dev, sizeof(*dev->prev_data), GFP_KERNEL);
+ if (!dev->prev_data)
+ return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) {
ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]);
if (ret)
- goto err_free_prev_data;
+ return ret;
ret = amd_pmf_start_policy_engine(dev);
- switch (ret) {
- case TA_PMF_TYPE_SUCCESS:
- status = true;
- break;
- case TA_ERROR_CRYPTO_INVALID_PARAM:
- case TA_ERROR_CRYPTO_BIN_TOO_LARGE:
- amd_pmf_tee_deinit(dev);
- status = false;
- break;
- default:
- ret = -EINVAL;
- amd_pmf_tee_deinit(dev);
- goto err_free_prev_data;
- }
-
+ dev_dbg(dev->dev, "start policy engine ret: %d\n", ret);
+ status = ret == TA_PMF_TYPE_SUCCESS;
if (status)
break;
+ amd_pmf_tee_deinit(dev);
}
if (!status && !pb_side_load) {
ret = -EINVAL;
- goto err_free_prev_data;
+ goto err;
}
if (pb_side_load)
@@ -592,22 +576,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
ret = amd_pmf_register_input_device(dev);
if (ret)
- goto err_pmf_remove_pb;
+ goto err;
return 0;
-err_pmf_remove_pb:
- if (pb_side_load && dev->esbin)
- amd_pmf_remove_pb(dev);
- amd_pmf_tee_deinit(dev);
-err_free_prev_data:
- kfree(dev->prev_data);
-err_free_policy:
- kfree(dev->policy_buf);
-err_free_dram_buf:
- kfree(dev->buf);
-err_cancel_work:
- cancel_delayed_work_sync(&dev->pb_work);
+err:
+ amd_pmf_deinit_smart_pc(dev);
return ret;
}
@@ -621,11 +595,5 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
amd_pmf_remove_pb(dev);
cancel_delayed_work_sync(&dev->pb_work);
- kfree(dev->prev_data);
- dev->prev_data = NULL;
- kfree(dev->policy_buf);
- dev->policy_buf = NULL;
- kfree(dev->buf);
- dev->buf = NULL;
amd_pmf_tee_deinit(dev);
}
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 3f8b2a324efd..f84c3d03c1de 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -530,6 +530,15 @@ static const struct dmi_system_id asus_quirks[] = {
},
.driver_data = &quirk_asus_zenbook_duo_kbd,
},
+ {
+ .callback = dmi_matched,
+ .ident = "ASUS Zenbook Duo UX8406CA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "UX8406CA"),
+ },
+ .driver_data = &quirk_asus_zenbook_duo_kbd,
+ },
{},
};
diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c
index c42f9228b0b2..b58cf74197f0 100644
--- a/drivers/platform/x86/dell/alienware-wmi-wmax.c
+++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c
@@ -90,6 +90,14 @@ static struct awcc_quirks empty_quirks;
static const struct dmi_system_id awcc_dmi_table[] __initconst = {
{
+ .ident = "Alienware Area-51m",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Alienware Area-51m"),
+ },
+ .driver_data = &generic_quirks,
+ },
+ {
.ident = "Alienware Area-51m R2",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
@@ -98,6 +106,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = {
.driver_data = &generic_quirks,
},
{
+ .ident = "Alienware m15 R5",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R5"),
+ },
+ .driver_data = &generic_quirks,
+ },
+ {
.ident = "Alienware m15 R7",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
@@ -119,7 +135,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = {
DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"),
},
- .driver_data = &g_series_quirks,
+ .driver_data = &generic_quirks,
},
{
.ident = "Alienware m16 R2",
@@ -233,6 +249,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = {
},
.driver_data = &g_series_quirks,
},
+ {}
};
enum AWCC_GET_FAN_SENSORS_OPERATIONS {
diff --git a/drivers/platform/x86/dell/dell-lis3lv02d.c b/drivers/platform/x86/dell/dell-lis3lv02d.c
index efe26d667973..732de5f556f8 100644
--- a/drivers/platform/x86/dell/dell-lis3lv02d.c
+++ b/drivers/platform/x86/dell/dell-lis3lv02d.c
@@ -45,9 +45,11 @@ static const struct dmi_system_id lis3lv02d_devices[] __initconst = {
* Additional individual entries were added after verification.
*/
DELL_LIS3LV02D_DMI_ENTRY("Latitude 5480", 0x29),
+ DELL_LIS3LV02D_DMI_ENTRY("Latitude 5500", 0x29),
DELL_LIS3LV02D_DMI_ENTRY("Latitude E6330", 0x29),
DELL_LIS3LV02D_DMI_ENTRY("Latitude E6430", 0x29),
DELL_LIS3LV02D_DMI_ENTRY("Precision 3540", 0x29),
+ DELL_LIS3LV02D_DMI_ENTRY("Precision 3551", 0x29),
DELL_LIS3LV02D_DMI_ENTRY("Precision M6800", 0x29),
DELL_LIS3LV02D_DMI_ENTRY("Vostro V131", 0x1d),
DELL_LIS3LV02D_DMI_ENTRY("Vostro 5568", 0x29),
diff --git a/drivers/platform/x86/dell/dell-wmi-ddv.c b/drivers/platform/x86/dell/dell-wmi-ddv.c
index 67f3d7158403..62e3d060f038 100644
--- a/drivers/platform/x86/dell/dell-wmi-ddv.c
+++ b/drivers/platform/x86/dell/dell-wmi-ddv.c
@@ -689,9 +689,13 @@ static int dell_wmi_ddv_battery_translate(struct dell_wmi_ddv_data *data,
dev_dbg(&data->wdev->dev, "Translation cache miss\n");
- /* Perform a translation between a ACPI battery and a battery index */
-
- ret = power_supply_get_property(battery, POWER_SUPPLY_PROP_SERIAL_NUMBER, &val);
+ /*
+ * Perform a translation between a ACPI battery and a battery index.
+ * We have to use power_supply_get_property_direct() here because this
+ * function will also get called from the callbacks of the power supply
+ * extension.
+ */
+ ret = power_supply_get_property_direct(battery, POWER_SUPPLY_PROP_SERIAL_NUMBER, &val);
if (ret < 0)
return ret;
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
index 3ad33a094588..817ee7ba07ca 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
@@ -89,6 +89,11 @@ extern struct wmi_sysman_priv wmi_priv;
enum { ENUM, INT, STR, PO };
+#define ENUM_MIN_ELEMENTS 8
+#define INT_MIN_ELEMENTS 9
+#define STR_MIN_ELEMENTS 8
+#define PO_MIN_ELEMENTS 4
+
enum {
ATTR_NAME,
DISPL_NAME_LANG_CODE,
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
index 8cc212c85266..fc2f58b4cbc6 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
@@ -23,9 +23,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID);
if (!obj)
return -EIO;
- if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) {
+ if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < ENUM_MIN_ELEMENTS ||
+ obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) {
kfree(obj);
- return -EINVAL;
+ return -EIO;
}
ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer);
kfree(obj);
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
index 951e75b538fa..735248064239 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
@@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID);
if (!obj)
return -EIO;
- if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) {
+ if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < INT_MIN_ELEMENTS ||
+ obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) {
kfree(obj);
- return -EINVAL;
+ return -EIO;
}
ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[CURRENT_VAL].integer.value);
kfree(obj);
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
index d8f1bf5e58a0..3167e06d416e 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
@@ -26,9 +26,10 @@ static ssize_t is_enabled_show(struct kobject *kobj, struct kobj_attribute *attr
obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID);
if (!obj)
return -EIO;
- if (obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) {
+ if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < PO_MIN_ELEMENTS ||
+ obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) {
kfree(obj);
- return -EINVAL;
+ return -EIO;
}
ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[IS_PASS_SET].integer.value);
kfree(obj);
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
index c392f0ecf8b5..0d2c74f8d1aa 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
@@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID);
if (!obj)
return -EIO;
- if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) {
+ if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < STR_MIN_ELEMENTS ||
+ obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) {
kfree(obj);
- return -EINVAL;
+ return -EIO;
}
ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer);
kfree(obj);
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
index d00389b860e4..f5402b714657 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
@@ -407,10 +407,10 @@ static int init_bios_attributes(int attr_type, const char *guid)
return retval;
switch (attr_type) {
- case ENUM: min_elements = 8; break;
- case INT: min_elements = 9; break;
- case STR: min_elements = 8; break;
- case PO: min_elements = 4; break;
+ case ENUM: min_elements = ENUM_MIN_ELEMENTS; break;
+ case INT: min_elements = INT_MIN_ELEMENTS; break;
+ case STR: min_elements = STR_MIN_ELEMENTS; break;
+ case PO: min_elements = PO_MIN_ELEMENTS; break;
default:
pr_err("Error: Unknown attr_type: %d\n", attr_type);
return -EINVAL;
@@ -597,7 +597,7 @@ err_release_attributes_data:
release_attributes_data();
err_destroy_classdev:
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(wmi_priv.class_dev);
err_exit_bios_attr_pass_interface:
exit_bios_attr_pass_interface();
@@ -611,7 +611,7 @@ err_exit_bios_attr_set_interface:
static void __exit sysman_exit(void)
{
release_attributes_data();
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(wmi_priv.class_dev);
exit_bios_attr_set_interface();
exit_bios_attr_pass_interface();
}
diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c
index e30ca325938c..9dd9f2cb074f 100644
--- a/drivers/platform/x86/dell/dell_rbu.c
+++ b/drivers/platform/x86/dell/dell_rbu.c
@@ -45,7 +45,7 @@
MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>");
MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems");
MODULE_LICENSE("GPL");
-MODULE_VERSION("3.2");
+MODULE_VERSION("3.3");
#define BIOS_SCAN_LIMIT 0xffffffff
#define MAX_IMAGE_LENGTH 16
@@ -91,7 +91,7 @@ static void init_packet_head(void)
rbu_data.imagesize = 0;
}
-static int create_packet(void *data, size_t length)
+static int create_packet(void *data, size_t length) __must_hold(&rbu_data.lock)
{
struct packet_data *newpacket;
int ordernum = 0;
@@ -292,7 +292,7 @@ static int packet_read_list(char *data, size_t * pread_length)
remaining_bytes = *pread_length;
bytes_read = rbu_data.packet_read_count;
- list_for_each_entry(newpacket, (&packet_data_head.list)->next, list) {
+ list_for_each_entry(newpacket, &packet_data_head.list, list) {
bytes_copied = do_packet_read(pdest, newpacket,
remaining_bytes, bytes_read, &temp_count);
remaining_bytes -= bytes_copied;
@@ -315,14 +315,14 @@ static void packet_empty_list(void)
{
struct packet_data *newpacket, *tmp;
- list_for_each_entry_safe(newpacket, tmp, (&packet_data_head.list)->next, list) {
+ list_for_each_entry_safe(newpacket, tmp, &packet_data_head.list, list) {
list_del(&newpacket->list);
/*
* zero out the RBU packet memory before freeing
* to make sure there are no stale RBU packets left in memory
*/
- memset(newpacket->data, 0, rbu_data.packetsize);
+ memset(newpacket->data, 0, newpacket->length);
set_memory_wb((unsigned long)newpacket->data,
1 << newpacket->ordernum);
free_pages((unsigned long) newpacket->data,
diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
index 13237890fc92..5bfa7159f5bc 100644
--- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
+++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
@@ -1034,7 +1034,7 @@ err_release_attributes_data:
release_attributes_data();
err_destroy_classdev:
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(bioscfg_drv.class_dev);
err_unregister_class:
hp_exit_attr_set_interface();
@@ -1045,7 +1045,7 @@ err_unregister_class:
static void __exit hp_exit(void)
{
release_attributes_data();
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(bioscfg_drv.class_dev);
hp_exit_attr_set_interface();
}
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index ede483573fe0..edb9d2fb02ec 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmi.h>
#include <linux/i8042.h>
@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv)
*/
#define IDEAPAD_EC_TIMEOUT 200 /* in ms */
+/*
+ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being
+ * polled too frequently. Doing so may break the state machine in the EC,
+ * resulting in a hard shutdown.
+ *
+ * It is also observed that frequent polls may disturb the ongoing operation
+ * and notably delay the availability of EC response.
+ *
+ * These values are used as the delay before the first poll and the interval
+ * between subsequent polls to solve the above issues.
+ */
+#define IDEAPAD_EC_POLL_MIN_US 150
+#define IDEAPAD_EC_POLL_MAX_US 300
+
static int eval_int(acpi_handle handle, const char *name, unsigned long *res)
{
unsigned long long result;
@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat
end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1;
while (time_before(jiffies, end_jiffies)) {
- schedule();
+ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US);
err = eval_vpcr(handle, 1, &val);
if (err)
@@ -1654,7 +1669,7 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv)
priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
priv->kbd_bl.led.brightness_get = ideapad_kbd_bl_led_cdev_brightness_get;
priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set;
- priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED;
+ priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED | LED_RETAIN_AT_SHUTDOWN;
err = led_classdev_register(&priv->platform_device->dev, &priv->kbd_bl.led);
if (err)
@@ -1713,7 +1728,7 @@ static int ideapad_fn_lock_led_init(struct ideapad_private *priv)
priv->fn_lock.led.name = "platform::" LED_FUNCTION_FNLOCK;
priv->fn_lock.led.brightness_get = ideapad_fn_lock_led_cdev_get;
priv->fn_lock.led.brightness_set_blocking = ideapad_fn_lock_led_cdev_set;
- priv->fn_lock.led.flags = LED_BRIGHT_HW_CHANGED;
+ priv->fn_lock.led.flags = LED_BRIGHT_HW_CHANGED | LED_RETAIN_AT_SHUTDOWN;
err = led_classdev_register(&priv->platform_device->dev, &priv->fn_lock.led);
if (err)
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 0b5e43444ed6..f25a427cccda 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -54,6 +54,7 @@ static const struct acpi_device_id intel_hid_ids[] = {
{ "INTC107B" },
{ "INTC10CB" },
{ "INTC10CC" },
+ { "INTC10F1" },
{ }
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
index e136d18b1d38..4a94a4ee031e 100644
--- a/drivers/platform/x86/intel/pmc/core.h
+++ b/drivers/platform/x86/intel/pmc/core.h
@@ -299,6 +299,13 @@ enum ppfear_regs {
#define PTL_PCD_PMC_MMIO_REG_LEN 0x31A8
/* SSRAM PMC Device ID */
+/* LNL */
+#define PMC_DEVID_LNL_SOCM 0xa87f
+
+/* PTL */
+#define PMC_DEVID_PTL_PCDH 0xe37f
+#define PMC_DEVID_PTL_PCDP 0xe47f
+
/* ARL */
#define PMC_DEVID_ARL_SOCM 0x777f
#define PMC_DEVID_ARL_SOCS 0xae7f
diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
index b207247eb5dd..93579152188e 100644
--- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c
+++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
@@ -187,6 +187,9 @@ static const struct pci_device_id intel_pmc_ssram_telemetry_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_MTL_SOCM) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCS) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCM) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_LNL_SOCM) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDH) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDP) },
{ }
};
MODULE_DEVICE_TABLE(pci, intel_pmc_ssram_telemetry_pci_ids);
diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c
index 0c5c88eb7baf..9d8247bb9cfa 100644
--- a/drivers/platform/x86/intel/tpmi_power_domains.c
+++ b/drivers/platform/x86/intel/tpmi_power_domains.c
@@ -228,8 +228,10 @@ static int __init tpmi_init(void)
domain_die_map = kcalloc(size_mul(topology_max_packages(), MAX_POWER_DOMAINS),
sizeof(*domain_die_map), GFP_KERNEL);
- if (!domain_die_map)
+ if (!domain_die_map) {
+ ret = -ENOMEM;
goto free_domain_mask;
+ }
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"platform/x86/tpmi_power_domains:online",
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
index 0f8aea18275b..65897fae17df 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
@@ -58,7 +58,7 @@ static ssize_t show_agent_types(struct kobject *kobj, struct kobj_attribute *att
if (length)
length += sysfs_emit_at(buf, length, " ");
- length += sysfs_emit_at(buf, length, agent_name[agent]);
+ length += sysfs_emit_at(buf, length, "%s", agent_name[agent]);
}
length += sysfs_emit_at(buf, length, "\n");
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
index 1c7b2f2716ca..44d9948ed224 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
@@ -511,10 +511,13 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
/* Get the package ID from the TPMI core */
plat_info = tpmi_get_platform_data(auxdev);
- if (plat_info)
- pkg = plat_info->package_id;
- else
+ if (unlikely(!plat_info)) {
dev_info(&auxdev->dev, "Platform information is NULL\n");
+ ret = -ENODEV;
+ goto err_rem_common;
+ }
+
+ pkg = plat_info->package_id;
for (i = 0; i < num_resources; ++i) {
struct tpmi_uncore_power_domain_info *pd_info;
diff --git a/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c b/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c
index 89153afd7015..7b9bad1978ff 100644
--- a/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c
+++ b/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c
@@ -122,26 +122,35 @@ static int lenovo_super_hotkey_wmi_led_init(enum mute_led_type led_type, struct
return -EIO;
union acpi_object *obj __free(kfree) = output.pointer;
- if (obj && obj->type == ACPI_TYPE_INTEGER)
- led_version = obj->integer.value;
- else
+ if (!obj || obj->type != ACPI_TYPE_INTEGER)
return -EIO;
- wpriv->cdev[led_type].max_brightness = LED_ON;
- wpriv->cdev[led_type].flags = LED_CORE_SUSPENDRESUME;
+ led_version = obj->integer.value;
+
+ /*
+ * Output parameters define: 0 means mute LED is not supported, Non-zero means
+ * mute LED can be supported.
+ */
+ if (led_version == 0)
+ return 0;
+
switch (led_type) {
case MIC_MUTE:
- if (led_version != WMI_LUD_SUPPORT_MICMUTE_LED_VER)
- return -EIO;
+ if (led_version != WMI_LUD_SUPPORT_MICMUTE_LED_VER) {
+ pr_warn("The MIC_MUTE LED of this device isn't supported.\n");
+ return 0;
+ }
wpriv->cdev[led_type].name = "platform::micmute";
wpriv->cdev[led_type].brightness_set_blocking = &lsh_wmi_micmute_led_set;
wpriv->cdev[led_type].default_trigger = "audio-micmute";
break;
case AUDIO_MUTE:
- if (led_version != WMI_LUD_SUPPORT_AUDIOMUTE_LED_VER)
- return -EIO;
+ if (led_version != WMI_LUD_SUPPORT_AUDIOMUTE_LED_VER) {
+ pr_warn("The AUDIO_MUTE LED of this device isn't supported.\n");
+ return 0;
+ }
wpriv->cdev[led_type].name = "platform::mute";
wpriv->cdev[led_type].brightness_set_blocking = &lsh_wmi_audiomute_led_set;
@@ -152,6 +161,9 @@ static int lenovo_super_hotkey_wmi_led_init(enum mute_led_type led_type, struct
return -EINVAL;
}
+ wpriv->cdev[led_type].max_brightness = LED_ON;
+ wpriv->cdev[led_type].flags = LED_CORE_SUSPENDRESUME;
+
err = devm_led_classdev_register(dev, &wpriv->cdev[led_type]);
if (err < 0) {
dev_err(dev, "Could not register mute LED %d : %d\n", led_type, err);
diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c
index 8b788822237b..3e019c51913e 100644
--- a/drivers/platform/x86/portwell-ec.c
+++ b/drivers/platform/x86/portwell-ec.c
@@ -236,6 +236,7 @@ static int pwec_probe(struct platform_device *pdev)
return ret;
}
+ ec_wdt_dev.parent = &pdev->dev;
ret = devm_watchdog_register_device(&pdev->dev, &ec_wdt_dev);
if (ret < 0) {
dev_err(&pdev->dev, "failed to register Portwell EC Watchdog\n");
diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c
index 5878a351993e..3c13e13d4885 100644
--- a/drivers/platform/x86/samsung-galaxybook.c
+++ b/drivers/platform/x86/samsung-galaxybook.c
@@ -1403,6 +1403,7 @@ static int galaxybook_probe(struct platform_device *pdev)
}
static const struct acpi_device_id galaxybook_device_ids[] = {
+ { "SAM0426" },
{ "SAM0427" },
{ "SAM0428" },
{ "SAM0429" },
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 00b1e7c79a3d..b73b84fdb15e 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -973,6 +973,7 @@ static const struct attribute_group auth_attr_group = {
.is_visible = auth_attr_is_visible,
.attrs = auth_attrs,
};
+__ATTRIBUTE_GROUPS(auth_attr);
/* ---- Attributes sysfs --------------------------------------------------------- */
static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -1188,6 +1189,7 @@ static const struct attribute_group tlmi_attr_group = {
.is_visible = attr_is_visible,
.attrs = tlmi_attrs,
};
+__ATTRIBUTE_GROUPS(tlmi_attr);
static void tlmi_attr_setting_release(struct kobject *kobj)
{
@@ -1207,11 +1209,13 @@ static void tlmi_pwd_setting_release(struct kobject *kobj)
static const struct kobj_type tlmi_attr_setting_ktype = {
.release = &tlmi_attr_setting_release,
.sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = tlmi_attr_groups,
};
static const struct kobj_type tlmi_pwd_setting_ktype = {
.release = &tlmi_pwd_setting_release,
.sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = auth_attr_groups,
};
static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -1380,21 +1384,18 @@ static struct kobj_attribute debug_cmd = __ATTR_WO(debug_cmd);
/* ---- Initialisation --------------------------------------------------------- */
static void tlmi_release_attr(void)
{
- int i;
+ struct kobject *pos, *n;
/* Attribute structures */
- for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
- if (tlmi_priv.setting[i]) {
- sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
- kobject_put(&tlmi_priv.setting[i]->kobj);
- }
- }
sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &save_settings.attr);
if (tlmi_priv.can_debug_cmd && debug_support)
sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr);
+ list_for_each_entry_safe(pos, n, &tlmi_priv.attribute_kset->list, entry)
+ kobject_put(pos);
+
kset_unregister(tlmi_priv.attribute_kset);
/* Free up any saved signatures */
@@ -1402,19 +1403,8 @@ static void tlmi_release_attr(void)
kfree(tlmi_priv.pwd_admin->save_signature);
/* Authentication structures */
- sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
- kobject_put(&tlmi_priv.pwd_admin->kobj);
- sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
- kobject_put(&tlmi_priv.pwd_power->kobj);
-
- if (tlmi_priv.opcode_support) {
- sysfs_remove_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group);
- kobject_put(&tlmi_priv.pwd_system->kobj);
- sysfs_remove_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group);
- kobject_put(&tlmi_priv.pwd_hdd->kobj);
- sysfs_remove_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group);
- kobject_put(&tlmi_priv.pwd_nvme->kobj);
- }
+ list_for_each_entry_safe(pos, n, &tlmi_priv.authentication_kset->list, entry)
+ kobject_put(pos);
kset_unregister(tlmi_priv.authentication_kset);
}
@@ -1455,6 +1445,14 @@ static int tlmi_sysfs_init(void)
goto fail_device_created;
}
+ tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
+ &tlmi_priv.class_dev->kobj);
+ if (!tlmi_priv.authentication_kset) {
+ kset_unregister(tlmi_priv.attribute_kset);
+ ret = -ENOMEM;
+ goto fail_device_created;
+ }
+
for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
/* Check if index is a valid setting - skip if it isn't */
if (!tlmi_priv.setting[i])
@@ -1471,12 +1469,8 @@ static int tlmi_sysfs_init(void)
/* Build attribute */
tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
- ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
- "%s", tlmi_priv.setting[i]->display_name);
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
+ NULL, "%s", tlmi_priv.setting[i]->display_name);
if (ret)
goto fail_create_attr;
}
@@ -1496,55 +1490,34 @@ static int tlmi_sysfs_init(void)
}
/* Create authentication entries */
- tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
- &tlmi_priv.class_dev->kobj);
- if (!tlmi_priv.authentication_kset) {
- ret = -ENOMEM;
- goto fail_create_attr;
- }
tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
+ NULL, "%s", "Admin");
if (ret)
goto fail_create_attr;
tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "Power-on");
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
+ NULL, "%s", "Power-on");
if (ret)
goto fail_create_attr;
if (tlmi_priv.opcode_support) {
tlmi_priv.pwd_system->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_add(&tlmi_priv.pwd_system->kobj, NULL, "%s", "System");
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.pwd_system->kobj, &tlmi_pwd_setting_ktype,
+ NULL, "%s", "System");
if (ret)
goto fail_create_attr;
tlmi_priv.pwd_hdd->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_add(&tlmi_priv.pwd_hdd->kobj, NULL, "%s", "HDD");
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.pwd_hdd->kobj, &tlmi_pwd_setting_ktype,
+ NULL, "%s", "HDD");
if (ret)
goto fail_create_attr;
tlmi_priv.pwd_nvme->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_add(&tlmi_priv.pwd_nvme->kobj, NULL, "%s", "NVMe");
- if (ret)
- goto fail_create_attr;
-
- ret = sysfs_create_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group);
+ ret = kobject_init_and_add(&tlmi_priv.pwd_nvme->kobj, &tlmi_pwd_setting_ktype,
+ NULL, "%s", "NVMe");
if (ret)
goto fail_create_attr;
}
@@ -1554,7 +1527,7 @@ static int tlmi_sysfs_init(void)
fail_create_attr:
tlmi_release_attr();
fail_device_created:
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(tlmi_priv.class_dev);
fail_class_created:
return ret;
}
@@ -1577,8 +1550,6 @@ static struct tlmi_pwd_setting *tlmi_create_auth(const char *pwd_type,
new_pwd->maxlen = tlmi_priv.pwdcfg.core.max_length;
new_pwd->index = 0;
- kobject_init(&new_pwd->kobj, &tlmi_pwd_setting_ktype);
-
return new_pwd;
}
@@ -1683,7 +1654,6 @@ static int tlmi_analyze(struct wmi_device *wdev)
if (setting->possible_values)
strreplace(setting->possible_values, ',', ';');
- kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
tlmi_priv.setting[i] = setting;
kfree(item);
}
@@ -1781,7 +1751,7 @@ fail_clear_attr:
static void tlmi_remove(struct wmi_device *wdev)
{
tlmi_release_attr();
- device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+ device_unregister(tlmi_priv.class_dev);
}
static int tlmi_probe(struct wmi_device *wdev, const void *context)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index e7350c9fa3aa..b59b4d90b0c7 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -3295,6 +3295,7 @@ static const struct key_entry keymap_lenovo[] __initconst = {
*/
{ KE_KEY, 0x131d, { KEY_VENDOR } }, /* System debug info, similar to old ThinkPad key */
{ KE_KEY, 0x1320, { KEY_LINK_PHONE } },
+ { KE_KEY, 0x1402, { KEY_LINK_PHONE } },
{ KE_KEY, TP_HKEY_EV_TRACK_DOUBLETAP /* 0x8036 */, { KEY_PROG4 } },
{ KE_END }
};
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index e46453750d5f..03aecf8bb7f8 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -177,16 +177,22 @@ static int wmi_device_enable(struct wmi_device *wdev, bool enable)
acpi_handle handle;
acpi_status status;
- if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE))
- return 0;
-
if (wblock->dev.dev.type == &wmi_type_method)
return 0;
- if (wblock->dev.dev.type == &wmi_type_event)
+ if (wblock->dev.dev.type == &wmi_type_event) {
+ /*
+ * Windows always enables/disables WMI events, even when they are
+ * not marked as being expensive. We follow this behavior for
+ * compatibility reasons.
+ */
snprintf(method, sizeof(method), "WE%02X", wblock->gblock.notify_id);
- else
+ } else {
+ if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE))
+ return 0;
+
get_acpi_method_name(wblock, 'C', method);
+ }
/*
* Not all WMI devices marked as expensive actually implement the
diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index c1e148657c87..39359811a930 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -8,6 +8,7 @@
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/hrtimer.h>
+#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/ktime.h>
@@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
struct cpuidle_device *dev;
ktime_t domain_wakeup, next_hrtimer;
ktime_t now = ktime_get();
+ struct device *cpu_dev;
+ s64 cpu_constraint, global_constraint;
s64 idle_duration_ns;
int cpu, i;
@@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
return true;
+ global_constraint = cpu_latency_qos_limit();
/*
* Find the next wakeup for any of the online CPUs within the PM domain
* and its subdomains. Note, we only need the genpd->cpus, as it already
@@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
if (ktime_before(next_hrtimer, domain_wakeup))
domain_wakeup = next_hrtimer;
}
+
+ cpu_dev = get_cpu_device(cpu);
+ if (cpu_dev) {
+ cpu_constraint = dev_pm_qos_raw_resume_latency(cpu_dev);
+ if (cpu_constraint < global_constraint)
+ global_constraint = cpu_constraint;
+ }
}
+ global_constraint *= NSEC_PER_USEC;
/* The minimum idle duration is from now - until the next wakeup. */
idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
if (idle_duration_ns <= 0)
@@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
*/
i = genpd->state_idx;
do {
- if (idle_duration_ns >= (genpd->states[i].residency_ns +
- genpd->states[i].power_off_latency_ns)) {
+ if ((idle_duration_ns >= (genpd->states[i].residency_ns +
+ genpd->states[i].power_off_latency_ns)) &&
+ (global_constraint >= (genpd->states[i].power_on_latency_ns +
+ genpd->states[i].power_off_latency_ns))) {
genpd->state_idx = i;
genpd->gd->last_enter = now;
genpd->gd->reflect_residency = true;
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 33a5bfce4604..cfb0e3e0d4aa 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -1235,9 +1235,8 @@ bool power_supply_has_property(struct power_supply *psy,
return false;
}
-int power_supply_get_property(struct power_supply *psy,
- enum power_supply_property psp,
- union power_supply_propval *val)
+static int __power_supply_get_property(struct power_supply *psy, enum power_supply_property psp,
+ union power_supply_propval *val, bool use_extensions)
{
struct power_supply_ext_registration *reg;
@@ -1247,10 +1246,14 @@ int power_supply_get_property(struct power_supply *psy,
return -ENODEV;
}
- scoped_guard(rwsem_read, &psy->extensions_sem) {
- power_supply_for_each_extension(reg, psy) {
- if (power_supply_ext_has_property(reg->ext, psp))
+ if (use_extensions) {
+ scoped_guard(rwsem_read, &psy->extensions_sem) {
+ power_supply_for_each_extension(reg, psy) {
+ if (!power_supply_ext_has_property(reg->ext, psp))
+ continue;
+
return reg->ext->get_property(psy, reg->ext, reg->data, psp, val);
+ }
}
}
@@ -1261,20 +1264,49 @@ int power_supply_get_property(struct power_supply *psy,
else
return -EINVAL;
}
+
+int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ return __power_supply_get_property(psy, psp, val, true);
+}
EXPORT_SYMBOL_GPL(power_supply_get_property);
-int power_supply_set_property(struct power_supply *psy,
- enum power_supply_property psp,
- const union power_supply_propval *val)
+/**
+ * power_supply_get_property_direct - Read a power supply property without checking for extensions
+ * @psy: The power supply
+ * @psp: The power supply property to read
+ * @val: The resulting value of the power supply property
+ *
+ * Read a power supply property without taking into account any power supply extensions registered
+ * on the given power supply. This is mostly useful for power supply extensions that want to access
+ * their own power supply as using power_supply_get_property() directly will result in a potential
+ * deadlock.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ return __power_supply_get_property(psy, psp, val, false);
+}
+EXPORT_SYMBOL_GPL(power_supply_get_property_direct);
+
+
+static int __power_supply_set_property(struct power_supply *psy, enum power_supply_property psp,
+ const union power_supply_propval *val, bool use_extensions)
{
struct power_supply_ext_registration *reg;
if (atomic_read(&psy->use_cnt) <= 0)
return -ENODEV;
- scoped_guard(rwsem_read, &psy->extensions_sem) {
- power_supply_for_each_extension(reg, psy) {
- if (power_supply_ext_has_property(reg->ext, psp)) {
+ if (use_extensions) {
+ scoped_guard(rwsem_read, &psy->extensions_sem) {
+ power_supply_for_each_extension(reg, psy) {
+ if (!power_supply_ext_has_property(reg->ext, psp))
+ continue;
+
if (reg->ext->set_property)
return reg->ext->set_property(psy, reg->ext, reg->data,
psp, val);
@@ -1289,8 +1321,34 @@ int power_supply_set_property(struct power_supply *psy,
return psy->desc->set_property(psy, psp, val);
}
+
+int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ return __power_supply_set_property(psy, psp, val, true);
+}
EXPORT_SYMBOL_GPL(power_supply_set_property);
+/**
+ * power_supply_set_property_direct - Write a power supply property without checking for extensions
+ * @psy: The power supply
+ * @psp: The power supply property to write
+ * @val: The value to write to the power supply property
+ *
+ * Write a power supply property without taking into account any power supply extensions registered
+ * on the given power supply. This is mostly useful for power supply extensions that want to access
+ * their own power supply as using power_supply_set_property() directly will result in a potential
+ * deadlock.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp,
+ const union power_supply_propval *val)
+{
+ return __power_supply_set_property(psy, psp, val, false);
+}
+EXPORT_SYMBOL_GPL(power_supply_set_property_direct);
+
int power_supply_property_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c
index 5bfdfcf6013b..2c0e9ad820c0 100644
--- a/drivers/power/supply/test_power.c
+++ b/drivers/power/supply/test_power.c
@@ -259,6 +259,7 @@ static const struct power_supply_config test_power_configs[] = {
static int test_power_battery_extmanufacture_year = 1234;
static int test_power_battery_exttemp_max = 1000;
static const enum power_supply_property test_power_battery_extprops[] = {
+ POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
POWER_SUPPLY_PROP_MANUFACTURE_YEAR,
POWER_SUPPLY_PROP_TEMP_MAX,
};
@@ -270,6 +271,9 @@ static int test_power_battery_extget_property(struct power_supply *psy,
union power_supply_propval *val)
{
switch (psp) {
+ case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
+ return power_supply_get_property_direct(psy, POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+ val);
case POWER_SUPPLY_PROP_MANUFACTURE_YEAR:
val->intval = test_power_battery_extmanufacture_year;
break;
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index e3be40adc0d7..faa0b6bc5b53 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -341,12 +341,28 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
struct rapl_defaults *defaults = get_defaults(rd->rp);
+ u64 val;
int ret;
cpus_read_lock();
ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
- if (!ret && defaults->set_floor_freq)
+ if (ret)
+ goto end;
+
+ ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val);
+ if (ret)
+ goto end;
+
+ if (mode != val) {
+ pr_debug("%s cannot be %s\n", power_zone->name,
+ str_enabled_disabled(mode));
+ goto end;
+ }
+
+ if (defaults->set_floor_freq)
defaults->set_floor_freq(rd, mode);
+
+end:
cpus_read_unlock();
return ret;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 35a5994bf64f..36f57d7b4a66 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -121,7 +121,8 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
struct ptp_clock_info *ops;
int err = -EOPNOTSUPP;
- if (ptp_clock_freerun(ptp)) {
+ if (tx->modes & (ADJ_SETOFFSET | ADJ_FREQUENCY | ADJ_OFFSET) &&
+ ptp_clock_freerun(ptp)) {
pr_err("ptp: physical clock is free running\n");
return -EBUSY;
}
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 18934e28469e..a6aad743c282 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -100,10 +100,20 @@ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp)
{
bool in_use = false;
+ /* Virtual clocks can't be stacked on top of virtual clocks.
+ * Avoid acquiring the n_vclocks_mux on virtual clocks, to allow this
+ * function to be called from code paths where the n_vclocks_mux of the
+ * parent physical clock is already held. Functionally that's not an
+ * issue, but lockdep would complain, because they have the same lock
+ * class.
+ */
+ if (ptp->is_virtual_clock)
+ return false;
+
if (mutex_lock_interruptible(&ptp->n_vclocks_mux))
return true;
- if (!ptp->is_virtual_clock && ptp->n_vclocks)
+ if (ptp->n_vclocks)
in_use = true;
mutex_unlock(&ptp->n_vclocks_mux);
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 4d842c692194..edf776b8ad53 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -596,7 +596,7 @@ static bool pwm_state_valid(const struct pwm_state *state)
* and supposed to be ignored. So also ignore any strange values and
* consider the state ok.
*/
- if (state->enabled)
+ if (!state->enabled)
return true;
if (!state->period)
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index 7eaab5831499..33d3554b9197 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -130,8 +130,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
return ret;
clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]);
- if (!clk_rate)
- return -EINVAL;
+ if (!clk_rate) {
+ ret = -EINVAL;
+ goto out;
+ }
/* Make sure we use the bus clock and not the 26MHz clock */
if (pc->soc->has_ck_26m_sel)
@@ -150,9 +152,9 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
}
if (clkdiv > PWM_CLK_DIV_MAX) {
- pwm_mediatek_clk_disable(chip, pwm);
dev_err(pwmchip_parent(chip), "period of %d ns not supported\n", period_ns);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) {
@@ -169,9 +171,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period);
pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
+out:
pwm_mediatek_clk_disable(chip, pwm);
- return 0;
+ return ret;
}
static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm)
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index 97287e838ce1..66464674223f 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -783,6 +783,9 @@ static int riocm_ch_send(u16 ch_id, void *buf, int len)
if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE)
return -EINVAL;
+ if (len < sizeof(struct rio_ch_chan_hdr))
+ return -EINVAL; /* insufficient data from user */
+
ch = riocm_get_channel(ch_id);
if (!ch) {
riocm_error("%s(%d) ch_%d not found", current->comm,
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7a248dc8d2e2..cbd6d53ebfb5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -5639,6 +5639,7 @@ static void regulator_remove_coupling(struct regulator_dev *rdev)
ERR_PTR(err));
}
+ rdev->coupling_desc.n_coupled = 0;
kfree(rdev->coupling_desc.coupled_rdevs);
rdev->coupling_desc.coupled_rdevs = NULL;
}
diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index bd9447dac596..c282236959b1 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -147,6 +147,7 @@ struct fan53555_device_info {
unsigned int slew_mask;
const unsigned int *ramp_delay_table;
unsigned int n_ramp_values;
+ unsigned int enable_time;
unsigned int slew_rate;
};
@@ -282,6 +283,7 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di)
di->slew_mask = CTL_SLEW_MASK;
di->ramp_delay_table = slew_rates;
di->n_ramp_values = ARRAY_SIZE(slew_rates);
+ di->enable_time = 250;
di->vsel_count = FAN53526_NVOLTAGES;
return 0;
@@ -296,10 +298,12 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di)
case FAN53555_CHIP_REV_00:
di->vsel_min = 600000;
di->vsel_step = 10000;
+ di->enable_time = 400;
break;
case FAN53555_CHIP_REV_13:
di->vsel_min = 800000;
di->vsel_step = 10000;
+ di->enable_time = 400;
break;
default:
dev_err(di->dev,
@@ -311,13 +315,19 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di)
case FAN53555_CHIP_ID_01:
case FAN53555_CHIP_ID_03:
case FAN53555_CHIP_ID_05:
+ di->vsel_min = 600000;
+ di->vsel_step = 10000;
+ di->enable_time = 400;
+ break;
case FAN53555_CHIP_ID_08:
di->vsel_min = 600000;
di->vsel_step = 10000;
+ di->enable_time = 175;
break;
case FAN53555_CHIP_ID_04:
di->vsel_min = 603000;
di->vsel_step = 12826;
+ di->enable_time = 400;
break;
default:
dev_err(di->dev,
@@ -350,6 +360,7 @@ static int fan53555_voltages_setup_rockchip(struct fan53555_device_info *di)
di->slew_mask = CTL_SLEW_MASK;
di->ramp_delay_table = slew_rates;
di->n_ramp_values = ARRAY_SIZE(slew_rates);
+ di->enable_time = 360;
di->vsel_count = FAN53555_NVOLTAGES;
return 0;
@@ -372,6 +383,7 @@ static int rk8602_voltages_setup_rockchip(struct fan53555_device_info *di)
di->slew_mask = CTL_SLEW_MASK;
di->ramp_delay_table = slew_rates;
di->n_ramp_values = ARRAY_SIZE(slew_rates);
+ di->enable_time = 360;
di->vsel_count = RK8602_NVOLTAGES;
return 0;
@@ -395,6 +407,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di)
di->slew_mask = CTL_SLEW_MASK;
di->ramp_delay_table = slew_rates;
di->n_ramp_values = ARRAY_SIZE(slew_rates);
+ di->enable_time = 400;
di->vsel_count = FAN53555_NVOLTAGES;
return 0;
@@ -594,6 +607,7 @@ static int fan53555_regulator_register(struct fan53555_device_info *di,
rdesc->ramp_mask = di->slew_mask;
rdesc->ramp_delay_table = di->ramp_delay_table;
rdesc->n_ramp_values = di->n_ramp_values;
+ rdesc->enable_time = di->enable_time;
rdesc->owner = THIS_MODULE;
rdev = devm_regulator_register(di->dev, &di->desc, config);
diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index 75bd53445ba7..6351ceefdb3e 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -260,8 +260,10 @@ static int gpio_regulator_probe(struct platform_device *pdev)
return -ENOMEM;
}
- drvdata->gpiods = devm_kzalloc(dev, sizeof(struct gpio_desc *),
- GFP_KERNEL);
+ drvdata->gpiods = devm_kcalloc(dev, config->ngpios,
+ sizeof(struct gpio_desc *), GFP_KERNEL);
+ if (!drvdata->gpiods)
+ return -ENOMEM;
if (config->input_supply) {
drvdata->desc.supply_name = devm_kstrdup(&pdev->dev,
@@ -274,8 +276,6 @@ static int gpio_regulator_probe(struct platform_device *pdev)
}
}
- if (!drvdata->gpiods)
- return -ENOMEM;
for (i = 0; i < config->ngpios; i++) {
drvdata->gpiods[i] = devm_gpiod_get_index(dev,
NULL,
diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c
index b4fe76e33ff2..fcdd2d0317a5 100644
--- a/drivers/regulator/max20086-regulator.c
+++ b/drivers/regulator/max20086-regulator.c
@@ -5,6 +5,7 @@
// Copyright (C) 2022 Laurent Pinchart <laurent.pinchart@idesonboard.com>
// Copyright (C) 2018 Avnet, Inc.
+#include <linux/cleanup.h>
#include <linux/err.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
@@ -133,11 +134,11 @@ static int max20086_regulators_register(struct max20086 *chip)
static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on)
{
struct of_regulator_match *matches;
- struct device_node *node;
unsigned int i;
int ret;
- node = of_get_child_by_name(chip->dev->of_node, "regulators");
+ struct device_node *node __free(device_node) =
+ of_get_child_by_name(chip->dev->of_node, "regulators");
if (!node) {
dev_err(chip->dev, "regulators node not found\n");
return -ENODEV;
@@ -153,7 +154,6 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on)
ret = of_regulator_match(chip->dev, node, matches,
chip->info->num_outputs);
- of_node_put(node);
if (ret < 0) {
dev_err(chip->dev, "Failed to match regulators\n");
return -EINVAL;
diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c
index 48dcee5287f3..9ad16b04c913 100644
--- a/drivers/regulator/mp886x.c
+++ b/drivers/regulator/mp886x.c
@@ -348,7 +348,8 @@ static const struct of_device_id mp886x_dt_ids[] = {
MODULE_DEVICE_TABLE(of, mp886x_dt_ids);
static const struct i2c_device_id mp886x_id[] = {
- { "mp886x", (kernel_ulong_t)&mp8869_ci },
+ { "mp8867", (kernel_ulong_t)&mp8867_ci },
+ { "mp8869", (kernel_ulong_t)&mp8869_ci },
{ },
};
MODULE_DEVICE_TABLE(i2c, mp886x_id);
diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c
index c05b67e26ac8..5bec84db25f1 100644
--- a/drivers/regulator/sy8824x.c
+++ b/drivers/regulator/sy8824x.c
@@ -213,7 +213,10 @@ static const struct of_device_id sy8824_dt_ids[] = {
MODULE_DEVICE_TABLE(of, sy8824_dt_ids);
static const struct i2c_device_id sy8824_id[] = {
- { "sy8824", (kernel_ulong_t)&sy8824c_cfg },
+ { "sy8824c", (kernel_ulong_t)&sy8824c_cfg },
+ { "sy8824e", (kernel_ulong_t)&sy8824e_cfg },
+ { "sy20276", (kernel_ulong_t)&sy20276_cfg },
+ { "sy20278", (kernel_ulong_t)&sy20278_cfg },
{ }
};
MODULE_DEVICE_TABLE(i2c, sy8824_id);
diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c
index b16b300d7f45..5e67fdc88f49 100644
--- a/drivers/regulator/tps65219-regulator.c
+++ b/drivers/regulator/tps65219-regulator.c
@@ -436,46 +436,46 @@ static int tps65219_regulator_probe(struct platform_device *pdev)
pmic->rdesc[i].name);
}
- irq_data = devm_kmalloc(tps->dev, pmic->common_irq_size, GFP_KERNEL);
- if (!irq_data)
- return -ENOMEM;
-
for (i = 0; i < pmic->common_irq_size; ++i) {
irq_type = &pmic->common_irq_types[i];
irq = platform_get_irq_byname(pdev, irq_type->irq_name);
if (irq < 0)
return -EINVAL;
- irq_data[i].dev = tps->dev;
- irq_data[i].type = irq_type;
+ irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL);
+ if (!irq_data)
+ return -ENOMEM;
+
+ irq_data->dev = tps->dev;
+ irq_data->type = irq_type;
error = devm_request_threaded_irq(tps->dev, irq, NULL,
tps65219_regulator_irq_handler,
IRQF_ONESHOT,
irq_type->irq_name,
- &irq_data[i]);
+ irq_data);
if (error)
return dev_err_probe(tps->dev, PTR_ERR(rdev),
"Failed to request %s IRQ %d: %d\n",
irq_type->irq_name, irq, error);
}
- irq_data = devm_kmalloc(tps->dev, pmic->dev_irq_size, GFP_KERNEL);
- if (!irq_data)
- return -ENOMEM;
-
for (i = 0; i < pmic->dev_irq_size; ++i) {
irq_type = &pmic->irq_types[i];
irq = platform_get_irq_byname(pdev, irq_type->irq_name);
if (irq < 0)
return -EINVAL;
- irq_data[i].dev = tps->dev;
- irq_data[i].type = irq_type;
+ irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL);
+ if (!irq_data)
+ return -ENOMEM;
+
+ irq_data->dev = tps->dev;
+ irq_data->type = irq_type;
error = devm_request_threaded_irq(tps->dev, irq, NULL,
tps65219_regulator_irq_handler,
IRQF_ONESHOT,
irq_type->irq_name,
- &irq_data[i]);
+ irq_data);
if (error)
return dev_err_probe(tps->dev, PTR_ERR(rdev),
"Failed to request %s IRQ %d: %d\n",
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 8172869bd3d7..0743c6acd6e2 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -692,8 +692,12 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
{
u8 irqstat;
u8 rtc_control;
+ unsigned long flags;
- spin_lock(&rtc_lock);
+ /* We cannot use spin_lock() here, as cmos_interrupt() is also called
+ * in a non-irq context.
+ */
+ spin_lock_irqsave(&rtc_lock, flags);
/* When the HPET interrupt handler calls us, the interrupt
* status is passed as arg1 instead of the irq number. But
@@ -727,7 +731,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
hpet_mask_rtc_irq_bit(RTC_AIE);
CMOS_READ(RTC_INTR_FLAGS);
}
- spin_unlock(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
if (is_intr(irqstat)) {
rtc_update_irq(p, 1, irqstat);
@@ -1295,9 +1299,7 @@ static void cmos_check_wkalrm(struct device *dev)
* ACK the rtc irq here
*/
if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) {
- local_irq_disable();
cmos_interrupt(0, (void *)cmos->rtc);
- local_irq_enable();
return;
}
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 31c7dca8f469..2e1ac0c42e93 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -1538,7 +1538,12 @@ static int pcf2127_spi_probe(struct spi_device *spi)
variant = &pcf21xx_cfg[type];
}
- config.max_register = variant->max_register,
+ if (variant->type == PCF2131) {
+ config.read_flag_mask = 0x0;
+ config.write_flag_mask = 0x0;
+ }
+
+ config.max_register = variant->max_register;
regmap = devm_regmap_init_spi(spi, &config);
if (IS_ERR(regmap)) {
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index db5c9b641277..a7220b4d0e8d 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/i2c.h>
#include <linux/bcd.h>
+#include <linux/reboot.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/platform_device.h>
@@ -53,6 +54,7 @@ enum {
* Device | Write time | Read time | Write alarm
* =================================================
* S5M8767 | UDR + TIME | | UDR
+ * S2MPG10 | WUDR | RUDR | AUDR
* S2MPS11/14 | WUDR | RUDR | WUDR + RUDR
* S2MPS13 | WUDR | RUDR | WUDR + AUDR
* S2MPS15 | WUDR | RUDR | AUDR
@@ -99,6 +101,20 @@ static const struct s5m_rtc_reg_config s5m_rtc_regs = {
.write_alarm_udr_mask = S5M_RTC_UDR_MASK,
};
+/* Register map for S2MPG10 */
+static const struct s5m_rtc_reg_config s2mpg10_rtc_regs = {
+ .regs_count = 7,
+ .time = S2MPG10_RTC_SEC,
+ .ctrl = S2MPG10_RTC_CTRL,
+ .alarm0 = S2MPG10_RTC_A0SEC,
+ .alarm1 = S2MPG10_RTC_A1SEC,
+ .udr_update = S2MPG10_RTC_UPDATE,
+ .autoclear_udr_mask = S2MPS15_RTC_WUDR_MASK | S2MPS15_RTC_AUDR_MASK,
+ .read_time_udr_mask = S2MPS_RTC_RUDR_MASK,
+ .write_time_udr_mask = S2MPS15_RTC_WUDR_MASK,
+ .write_alarm_udr_mask = S2MPS15_RTC_AUDR_MASK,
+};
+
/* Register map for S2MPS13 */
static const struct s5m_rtc_reg_config s2mps13_rtc_regs = {
.regs_count = 7,
@@ -227,8 +243,8 @@ static int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
return ret;
}
-static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
- struct rtc_wkalrm *alarm)
+static int s5m_check_pending_alarm_interrupt(struct s5m_rtc_info *info,
+ struct rtc_wkalrm *alarm)
{
int ret;
unsigned int val;
@@ -238,6 +254,7 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
val &= S5M_ALARM0_STATUS;
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -262,17 +279,9 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info,
static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
{
int ret;
- unsigned int data;
- ret = regmap_read(info->regmap, info->regs->udr_update, &data);
- if (ret < 0) {
- dev_err(info->dev, "failed to read update reg(%d)\n", ret);
- return ret;
- }
-
- data |= info->regs->write_time_udr_mask;
-
- ret = regmap_write(info->regmap, info->regs->udr_update, data);
+ ret = regmap_set_bits(info->regmap, info->regs->udr_update,
+ info->regs->write_time_udr_mask);
if (ret < 0) {
dev_err(info->dev, "failed to write update reg(%d)\n", ret);
return ret;
@@ -286,20 +295,14 @@ static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
{
int ret;
- unsigned int data;
+ unsigned int udr_mask;
- ret = regmap_read(info->regmap, info->regs->udr_update, &data);
- if (ret < 0) {
- dev_err(info->dev, "%s: fail to read update reg(%d)\n",
- __func__, ret);
- return ret;
- }
-
- data |= info->regs->write_alarm_udr_mask;
+ udr_mask = info->regs->write_alarm_udr_mask;
switch (info->device_type) {
case S5M8767X:
- data &= ~S5M_RTC_TIME_EN_MASK;
+ udr_mask |= S5M_RTC_TIME_EN_MASK;
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -309,7 +312,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
return -EINVAL;
}
- ret = regmap_write(info->regmap, info->regs->udr_update, data);
+ ret = regmap_update_bits(info->regmap, info->regs->udr_update,
+ udr_mask, info->regs->write_alarm_udr_mask);
if (ret < 0) {
dev_err(info->dev, "%s: fail to write update reg(%d)\n",
__func__, ret);
@@ -320,8 +324,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
/* On S2MPS13 the AUDR is not auto-cleared */
if (info->device_type == S2MPS13X)
- regmap_update_bits(info->regmap, info->regs->udr_update,
- S2MPS13_RTC_AUDR_MASK, 0);
+ regmap_clear_bits(info->regmap, info->regs->udr_update,
+ S2MPS13_RTC_AUDR_MASK);
return ret;
}
@@ -333,10 +337,8 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
int ret;
if (info->regs->read_time_udr_mask) {
- ret = regmap_update_bits(info->regmap,
- info->regs->udr_update,
- info->regs->read_time_udr_mask,
- info->regs->read_time_udr_mask);
+ ret = regmap_set_bits(info->regmap, info->regs->udr_update,
+ info->regs->read_time_udr_mask);
if (ret) {
dev_err(dev,
"Failed to prepare registers for time reading: %d\n",
@@ -351,6 +353,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -374,6 +377,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -411,6 +415,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -430,7 +435,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
dev_dbg(dev, "%s: %ptR(%d)\n", __func__, &alrm->time, alrm->time.tm_wday);
- return s5m_check_peding_alarm_interrupt(info, alrm);
+ return s5m_check_pending_alarm_interrupt(info, alrm);
}
static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
@@ -449,6 +454,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -487,6 +493,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -524,6 +531,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
switch (info->device_type) {
case S5M8767X:
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -604,6 +612,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
break;
+ case S2MPG10:
case S2MPS15X:
case S2MPS14X:
case S2MPS13X:
@@ -634,59 +643,92 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
return ret;
}
+static int s5m_rtc_restart_s2mpg10(struct sys_off_data *data)
+{
+ struct s5m_rtc_info *info = data->cb_data;
+ int ret;
+
+ if (data->mode != REBOOT_COLD && data->mode != REBOOT_HARD)
+ return NOTIFY_DONE;
+
+ /*
+ * Arm watchdog with maximum timeout (2 seconds), and perform full reset
+ * on expiry.
+ */
+ ret = regmap_set_bits(info->regmap, S2MPG10_RTC_WTSR,
+ (S2MPG10_WTSR_COLDTIMER | S2MPG10_WTSR_COLDRST
+ | S2MPG10_WTSR_WTSRT | S2MPG10_WTSR_WTSR_EN));
+
+ return ret ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
static int s5m_rtc_probe(struct platform_device *pdev)
{
struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent);
+ enum sec_device_type device_type =
+ platform_get_device_id(pdev)->driver_data;
struct s5m_rtc_info *info;
- struct i2c_client *i2c;
- const struct regmap_config *regmap_cfg;
int ret, alarm_irq;
info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
- switch (platform_get_device_id(pdev)->driver_data) {
- case S2MPS15X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps15_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S2MPS14X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps14_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S2MPS13X:
- regmap_cfg = &s2mps14_rtc_regmap_config;
- info->regs = &s2mps13_rtc_regs;
- alarm_irq = S2MPS14_IRQ_RTCA0;
- break;
- case S5M8767X:
- regmap_cfg = &s5m_rtc_regmap_config;
- info->regs = &s5m_rtc_regs;
- alarm_irq = S5M8767_IRQ_RTCA1;
- break;
- default:
- return dev_err_probe(&pdev->dev, -ENODEV,
- "Device type %lu is not supported by RTC driver\n",
- platform_get_device_id(pdev)->driver_data);
- }
+ info->regmap = dev_get_regmap(pdev->dev.parent, "rtc");
+ if (!info->regmap) {
+ const struct regmap_config *regmap_cfg;
+ struct i2c_client *i2c;
- i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter,
- RTC_I2C_ADDR);
- if (IS_ERR(i2c))
- return dev_err_probe(&pdev->dev, PTR_ERR(i2c),
- "Failed to allocate I2C for RTC\n");
+ switch (device_type) {
+ case S2MPS15X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps15_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S2MPS14X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps14_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S2MPS13X:
+ regmap_cfg = &s2mps14_rtc_regmap_config;
+ info->regs = &s2mps13_rtc_regs;
+ alarm_irq = S2MPS14_IRQ_RTCA0;
+ break;
+ case S5M8767X:
+ regmap_cfg = &s5m_rtc_regmap_config;
+ info->regs = &s5m_rtc_regs;
+ alarm_irq = S5M8767_IRQ_RTCA1;
+ break;
+ default:
+ return dev_err_probe(&pdev->dev, -ENODEV,
+ "Unsupported device type %d\n",
+ device_type);
+ }
- info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg);
- if (IS_ERR(info->regmap))
- return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap),
- "Failed to allocate RTC register map\n");
+ i2c = devm_i2c_new_dummy_device(&pdev->dev,
+ s5m87xx->i2c->adapter,
+ RTC_I2C_ADDR);
+ if (IS_ERR(i2c))
+ return dev_err_probe(&pdev->dev, PTR_ERR(i2c),
+ "Failed to allocate I2C\n");
+
+ info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg);
+ if (IS_ERR(info->regmap))
+ return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap),
+ "Failed to allocate regmap\n");
+ } else if (device_type == S2MPG10) {
+ info->regs = &s2mpg10_rtc_regs;
+ alarm_irq = S2MPG10_IRQ_RTCA0;
+ } else {
+ return dev_err_probe(&pdev->dev, -ENODEV,
+ "Unsupported device type %d\n",
+ device_type);
+ }
info->dev = &pdev->dev;
info->s5m87xx = s5m87xx;
- info->device_type = platform_get_device_id(pdev)->driver_data;
+ info->device_type = device_type;
if (s5m87xx->irq_data) {
info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq);
@@ -721,7 +763,23 @@ static int s5m_rtc_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, ret,
"Failed to request alarm IRQ %d\n",
info->irq);
- device_init_wakeup(&pdev->dev, true);
+
+ ret = devm_device_init_wakeup(&pdev->dev);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to init wakeup\n");
+ }
+
+ if (of_device_is_system_power_controller(pdev->dev.parent->of_node) &&
+ info->device_type == S2MPG10) {
+ ret = devm_register_sys_off_handler(&pdev->dev,
+ SYS_OFF_MODE_RESTART,
+ SYS_OFF_PRIO_HIGH + 1,
+ s5m_rtc_restart_s2mpg10,
+ info);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to register restart handler\n");
}
return devm_rtc_register_device(info->rtc_dev);
@@ -755,6 +813,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume);
static const struct platform_device_id s5m_rtc_id[] = {
{ "s5m-rtc", S5M8767X },
+ { "s2mpg10-rtc", S2MPG10 },
{ "s2mps13-rtc", S2MPS13X },
{ "s2mps14-rtc", S2MPS14X },
{ "s2mps15-rtc", S2MPS15X },
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index cef60770f68b..b3fcdcae379e 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -86,7 +86,7 @@ static void *_copy_apqns_from_user(void __user *uapqns, size_t nr_apqns)
if (!uapqns || nr_apqns == 0)
return NULL;
- return memdup_user(uapqns, nr_apqns * sizeof(struct pkey_apqn));
+ return memdup_array_user(uapqns, nr_apqns, sizeof(struct pkey_apqn));
}
static int pkey_ioctl_genseck(struct pkey_genseck __user *ugs)
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index b7f15f303ea2..967dc4f9eea8 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -130,6 +130,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd)
struct ism_req_hdr *req = cmd;
struct ism_resp_hdr *resp = cmd;
+ spin_lock(&ism->cmd_lock);
__ism_write_cmd(ism, req + 1, sizeof(*req), req->len - sizeof(*req));
__ism_write_cmd(ism, req, 0, sizeof(*req));
@@ -143,6 +144,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd)
}
__ism_read_cmd(ism, resp + 1, sizeof(*resp), resp->len - sizeof(*resp));
out:
+ spin_unlock(&ism->cmd_lock);
return resp->ret;
}
@@ -606,6 +608,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
spin_lock_init(&ism->lock);
+ spin_lock_init(&ism->cmd_lock);
dev_set_drvdata(&pdev->dev, ism);
ism->pdev = pdev;
ism->dev.parent = &pdev->dev;
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
index 41e36af35488..90a84ae98b97 100644
--- a/drivers/s390/scsi/zfcp_sysfs.c
+++ b/drivers/s390/scsi/zfcp_sysfs.c
@@ -449,6 +449,8 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev,
if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun))
return -EINVAL;
+ flush_work(&port->rport_work);
+
retval = zfcp_unit_add(port, fcp_lun);
if (retval)
return retval;
diff --git a/drivers/scsi/elx/efct/efct_hw.c b/drivers/scsi/elx/efct/efct_hw.c
index 5a5525054d71..5b079b8b7a08 100644
--- a/drivers/scsi/elx/efct/efct_hw.c
+++ b/drivers/scsi/elx/efct/efct_hw.c
@@ -1120,7 +1120,7 @@ int
efct_hw_parse_filter(struct efct_hw *hw, void *value)
{
int rc = 0;
- char *p = NULL;
+ char *p = NULL, *pp = NULL;
char *token;
u32 idx = 0;
@@ -1132,6 +1132,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value)
efc_log_err(hw->os, "p is NULL\n");
return -ENOMEM;
}
+ pp = p;
idx = 0;
while ((token = strsep(&p, ",")) && *token) {
@@ -1144,7 +1145,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value)
if (idx == ARRAY_SIZE(hw->config.filter_def))
break;
}
- kfree(p);
+ kfree(pp);
return rc;
}
diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
index f8ab69c51dab..ae37f85f618b 100644
--- a/drivers/scsi/fnic/fdls_disc.c
+++ b/drivers/scsi/fnic/fdls_disc.c
@@ -763,50 +763,86 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
iport->fabric.timer_pending = 1;
}
-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
+ uint16_t oxid)
{
- uint8_t *frame;
+ struct fc_frame_header *pfdmi_abts;
uint8_t d_id[3];
+ uint8_t *frame;
struct fnic *fnic = iport->fnic;
- struct fc_frame_header *pfabric_abts;
- unsigned long fdmi_tov;
- uint16_t oxid;
- uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
- sizeof(struct fc_frame_header);
frame = fdls_alloc_frame(iport);
if (frame == NULL) {
FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
"Failed to allocate frame to send FDMI ABTS");
- return;
+ return NULL;
}
- pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
+ pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
fdls_init_fabric_abts_frame(frame, iport);
hton24(d_id, FC_FID_MGMT_SERV);
- FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
+ FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
+ FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
+
+ return frame;
+}
+
+static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+{
+ uint8_t *frame;
+ struct fnic *fnic = iport->fnic;
+ unsigned long fdmi_tov;
+ uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
+ sizeof(struct fc_frame_header);
if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
- oxid = iport->active_oxid_fdmi_plogi;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_plogi);
+ if (frame == NULL)
+ return;
+
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: FDLS send FDMI PLOGI abts. iport->fabric.state: %d oxid: 0x%x",
+ iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_plogi);
fnic_send_fcoe_frame(iport, frame, frame_size);
} else {
if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
- oxid = iport->active_oxid_fdmi_rhba;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_rhba);
+ if (frame == NULL)
+ return;
+
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: FDLS send FDMI RHBA abts. iport->fabric.state: %d oxid: 0x%x",
+ iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rhba);
fnic_send_fcoe_frame(iport, frame, frame_size);
}
if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
- oxid = iport->active_oxid_fdmi_rpa;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_rpa);
+ if (frame == NULL) {
+ if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+ goto arm_timer;
+ else
+ return;
+ }
+
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: FDLS send FDMI RPA abts. iport->fabric.state: %d oxid: 0x%x",
+ iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rpa);
fnic_send_fcoe_frame(iport, frame, frame_size);
}
}
+arm_timer:
fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
+
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
}
static void fdls_send_fabric_flogi(struct fnic_iport_s *iport)
@@ -2245,6 +2281,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
}
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
+{
+ struct fnic *fnic = iport->fnic;
+
+ iport->fabric.fdmi_pending = 0;
+ /* If max retries not exhausted, start over from fdmi plogi */
+ if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
+ iport->fabric.fdmi_retry++;
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "Retry FDMI PLOGI. FDMI retry: %d",
+ iport->fabric.fdmi_retry);
+ fdls_send_fdmi_plogi(iport);
+ }
+}
+
void fdls_fdmi_timer_callback(struct timer_list *t)
{
struct fnic_fdls_fabric_s *fabric = timer_container_of(fabric, t,
@@ -2257,7 +2308,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
spin_lock_irqsave(&fnic->fnic_lock, flags);
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+ "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
if (!iport->fabric.fdmi_pending) {
/* timer expired after fdmi responses received. */
@@ -2265,7 +2316,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
return;
}
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+ "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
/* if not abort pending, send an abort */
if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
@@ -2274,33 +2325,37 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
return;
}
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+ "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
/* ABTS pending for an active fdmi request that is pending.
* That means FDMI ABTS timed out
* Schedule to free the OXID after 2*r_a_tov and proceed
*/
if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "FDMI PLOGI ABTS timed out. Schedule oxid free: 0x%x\n",
+ iport->active_oxid_fdmi_plogi);
fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_plogi);
} else {
- if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+ if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "FDMI RHBA ABTS timed out. Schedule oxid free: 0x%x\n",
+ iport->active_oxid_fdmi_rhba);
fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rhba);
- if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)
+ }
+ if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "FDMI RPA ABTS timed out. Schedule oxid free: 0x%x\n",
+ iport->active_oxid_fdmi_rpa);
fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rpa);
+ }
}
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+ "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
- iport->fabric.fdmi_pending = 0;
- /* If max retries not exhaused, start over from fdmi plogi */
- if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
- iport->fabric.fdmi_retry++;
- FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "retry fdmi timer %d", iport->fabric.fdmi_retry);
- fdls_send_fdmi_plogi(iport);
- }
+ fdls_fdmi_retry_plogi(iport);
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+ "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
}
@@ -3715,13 +3770,60 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
switch (FNIC_FRAME_TYPE(oxid)) {
case FNIC_FRAME_TYPE_FDMI_PLOGI:
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "Received FDMI PLOGI ABTS rsp with oxid: 0x%x", oxid);
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
+
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
break;
case FNIC_FRAME_TYPE_FDMI_RHBA:
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "Received FDMI RHBA ABTS rsp with oxid: 0x%x", oxid);
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
+
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
+
+ /* If RPA is still pending, don't turn off ABORT PENDING.
+ * We count on the timer to detect the ABTS timeout and take
+ * corrective action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
break;
case FNIC_FRAME_TYPE_FDMI_RPA:
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "Received FDMI RPA ABTS rsp with oxid: 0x%x", oxid);
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
+
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
+
+ /* If RHBA is still pending, don't turn off ABORT PENDING.
+ * We count on the timer to detect the ABTS timeout and take
+ * corrective action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "0x%x: iport->fabric.fdmi_pending: 0x%x",
+ iport->fcid, iport->fabric.fdmi_pending);
break;
default:
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
@@ -3730,10 +3832,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
break;
}
- timer_delete_sync(&iport->fabric.fdmi_timer);
- iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
-
- fdls_send_fdmi_plogi(iport);
+ /*
+ * Only if ABORT PENDING is off, delete the timer, and if no other
+ * operations are pending, retry FDMI.
+ * Otherwise, let the timer pop and take the appropriate action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
+ timer_delete_sync(&iport->fabric.fdmi_timer);
+ if (!iport->fabric.fdmi_pending)
+ fdls_fdmi_retry_plogi(iport);
+ }
}
static void
@@ -4972,9 +5080,12 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport)
fdls_delete_tport(iport, tport);
}
- if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) {
- timer_delete_sync(&iport->fabric.fdmi_timer);
- iport->fabric.fdmi_pending = 0;
+ if (fnic_fdmi_support == 1) {
+ if (iport->fabric.fdmi_pending > 0) {
+ timer_delete_sync(&iport->fabric.fdmi_timer);
+ iport->fabric.fdmi_pending = 0;
+ }
+ iport->flags &= ~FNIC_FDMI_ACTIVE;
}
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 6c5f6046b1f5..c2fdc6553e62 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -30,7 +30,7 @@
#define DRV_NAME "fnic"
#define DRV_DESCRIPTION "Cisco FCoE HBA Driver"
-#define DRV_VERSION "1.8.0.0"
+#define DRV_VERSION "1.8.0.2"
#define PFX DRV_NAME ": "
#define DFX DRV_NAME "%d: "
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 1e8cd64f9a5c..103ab6f1f7cd 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -636,6 +636,8 @@ static int fnic_send_frame(struct fnic *fnic, void *frame, int frame_len)
unsigned long flags;
pa = dma_map_single(&fnic->pdev->dev, frame, frame_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fnic->pdev->dev, pa))
+ return -ENOMEM;
if ((fnic_fc_trace_set_data(fnic->fnic_num,
FNIC_FC_SEND | 0x80, (char *) frame,
diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
index 8e610b65ad57..531d0b37e450 100644
--- a/drivers/scsi/fnic/fnic_fdls.h
+++ b/drivers/scsi/fnic/fnic_fdls.h
@@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
bool fdls_delete_tport(struct fnic_iport_s *iport,
struct fnic_tport_s *tport);
void fdls_fdmi_timer_callback(struct timer_list *t);
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
/* fnic_fcs.c */
void fnic_fdls_init(struct fnic *fnic, int usefip);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 7133b254cbe4..75b29a018d1f 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1046,7 +1046,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, unsigned int cq_ind
if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL)
atomic64_inc(&fnic_stats->misc_stats.queue_fulls);
- FNIC_SCSI_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->host, fnic->fnic_num,
"xfer_len: %llu", xfer_len);
break;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index e021f1106bea..cc5d05dc395c 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
else
shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS;
- if (sht->max_segment_size)
- shost->max_segment_size = sht->max_segment_size;
- else
- shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+ shost->virt_boundary_mask = sht->virt_boundary_mask;
+ if (shost->virt_boundary_mask) {
+ WARN_ON_ONCE(sht->max_segment_size &&
+ sht->max_segment_size != UINT_MAX);
+ shost->max_segment_size = UINT_MAX;
+ } else {
+ if (sht->max_segment_size)
+ shost->max_segment_size = sht->max_segment_size;
+ else
+ shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;
+ }
/* 32-byte (dword) is a common minimum for HBAs. */
if (sht->dma_alignment)
@@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
else
shost->dma_boundary = 0xffffffff;
- if (sht->virt_boundary_mask)
- shost->virt_boundary_mask = sht->virt_boundary_mask;
-
device_initialize(&shost->shost_gendev);
dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 3aac0e17cb00..9179f8aee964 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5910,7 +5910,11 @@ megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance)
const struct cpumask *mask;
if (instance->perf_mode == MR_BALANCED_PERF_MODE) {
- mask = cpumask_of_node(dev_to_node(&instance->pdev->dev));
+ int nid = dev_to_node(&instance->pdev->dev);
+
+ if (nid == NUMA_NO_NODE)
+ nid = 0;
+ mask = cpumask_of_node(nid);
for (i = 0; i < instance->low_latency_index_start; i++) {
irq = pci_irq_vector(instance->pdev, i);
diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
index 8ef174cd4d37..3e4124177b2a 100644
--- a/drivers/scsi/mvsas/mv_defs.h
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -215,7 +215,7 @@ enum hw_register_bits {
/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
PHYEV_DEC_ERR = (1U << 24), /* Phy Decoding Error */
- PHYEV_DCDR_ERR = (1U << 23), /* STP Deocder Error */
+ PHYEV_DCDR_ERR = (1U << 23), /* STP Decoder Error */
PHYEV_CRC_ERR = (1U << 22), /* STP CRC Error */
PHYEV_UNASSOC_FIS = (1U << 19), /* unassociated FIS rx'd */
PHYEV_AN = (1U << 18), /* SATA async notification */
@@ -347,7 +347,7 @@ enum sas_cmd_port_registers {
CMD_SATA_PORT_MEM_CTL0 = 0x158, /* SATA Port Memory Control 0 */
CMD_SATA_PORT_MEM_CTL1 = 0x15c, /* SATA Port Memory Control 1 */
CMD_XOR_MEM_BIST_CTL = 0x160, /* XOR Memory BIST Control */
- CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memroy BIST Status */
+ CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memory BIST Status */
CMD_DMA_MEM_BIST_CTL = 0x168, /* DMA Memory BIST Control */
CMD_DMA_MEM_BIST_STAT = 0x16c, /* DMA Memory BIST Status */
CMD_PORT_MEM_BIST_CTL = 0x170, /* Port Memory BIST Control */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 0cd6f3e14882..13b6cb1b93ac 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle,
pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb,
sizeof(*pdb), DMA_FROM_DEVICE);
- if (!pdb_dma) {
+ if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) {
ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n");
return QLA_MEMORY_ALLOC_FAILED;
}
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index d4141656b204..a39f1da4ce47 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode)
task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data,
task->data_count,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma))
+ return -ENOMEM;
}
DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n",
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 376b8897ab90..746ff6a1f309 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -665,7 +665,8 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
* if the device is in the process of becoming ready, we
* should retry.
*/
- if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
+ if ((sshdr.asc == 0x04) &&
+ (sshdr.ascq == 0x01 || sshdr.ascq == 0x0a))
return NEEDS_RETRY;
/*
* if the device is not started, we need to wake
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0b8c91bf793f..c75a806496d6 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -3499,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport,
pr_err("%s could not find host no %u\n",
__func__, ev->u.new_flashnode.host_no);
err = -ENODEV;
- goto put_host;
+ goto exit_new_fnode;
}
index = transport->new_flashnode(shost, data, len);
@@ -3509,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport,
else
err = -EIO;
-put_host:
scsi_host_put(shost);
exit_new_fnode:
@@ -3534,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport,
pr_err("%s could not find host no %u\n",
__func__, ev->u.del_flashnode.host_no);
err = -ENODEV;
- goto put_host;
+ goto exit_del_fnode;
}
idx = ev->u.del_flashnode.flashnode_idx;
@@ -3576,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport,
pr_err("%s could not find host no %u\n",
__func__, ev->u.login_flashnode.host_no);
err = -ENODEV;
- goto put_host;
+ goto exit_login_fnode;
}
idx = ev->u.login_flashnode.flashnode_idx;
@@ -3628,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport,
pr_err("%s could not find host no %u\n",
__func__, ev->u.logout_flashnode.host_no);
err = -ENODEV;
- goto put_host;
+ goto exit_logout_fnode;
}
idx = ev->u.logout_flashnode.flashnode_idx;
@@ -3678,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport,
pr_err("%s could not find host no %u\n",
__func__, ev->u.logout_flashnode.host_no);
err = -ENODEV;
- goto put_host;
+ goto exit_logout_sid;
}
session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3f6e87705b62..eeaa6af294b8 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp)
rcu_read_lock();
vpd = rcu_dereference(sdkp->device->vpd_pgb7);
- if (vpd && vpd->len >= 2)
+ if (vpd && vpd->len >= 6)
sdkp->rscs = vpd->data[5] & 1;
rcu_read_unlock();
}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 2e6b2412d2c9..d9e59204a9c3 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -362,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater,
/*
* Timeout in seconds for all devices managed by this driver.
*/
-static int storvsc_timeout = 180;
+static const int storvsc_timeout = 180;
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
static struct scsi_transport_template *fc_transport_template;
@@ -768,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
return;
}
- t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0) {
dev_err(dev, "Failed to create sub-channel: timed out\n");
return;
@@ -833,7 +833,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device,
if (ret != 0)
return ret;
- t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0)
return -ETIMEDOUT;
@@ -1350,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
return ret;
ret = storvsc_channel_init(device, is_fc);
+ if (ret)
+ vmbus_close(device->channel);
return ret;
}
@@ -1668,7 +1670,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
if (ret != 0)
return FAILED;
- t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ);
if (t == 0)
return TIMEOUT_ERROR;
diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c
index ef8f355589a5..fc3a2c41cc10 100644
--- a/drivers/soc/aspeed/aspeed-lpc-snoop.c
+++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c
@@ -58,6 +58,7 @@ struct aspeed_lpc_snoop_model_data {
};
struct aspeed_lpc_snoop_channel {
+ bool enabled;
struct kfifo fifo;
wait_queue_head_t wq;
struct miscdevice miscdev;
@@ -190,6 +191,9 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
const struct aspeed_lpc_snoop_model_data *model_data =
of_device_get_match_data(dev);
+ if (WARN_ON(lpc_snoop->chan[channel].enabled))
+ return -EBUSY;
+
init_waitqueue_head(&lpc_snoop->chan[channel].wq);
/* Create FIFO datastructure */
rc = kfifo_alloc(&lpc_snoop->chan[channel].fifo,
@@ -236,6 +240,8 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
regmap_update_bits(lpc_snoop->regmap, HICRB,
hicrb_en, hicrb_en);
+ lpc_snoop->chan[channel].enabled = true;
+
return 0;
err_misc_deregister:
@@ -248,6 +254,9 @@ err_free_fifo:
static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
int channel)
{
+ if (!lpc_snoop->chan[channel].enabled)
+ return;
+
switch (channel) {
case 0:
regmap_update_bits(lpc_snoop->regmap, HICR5,
@@ -263,8 +272,10 @@ static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
return;
}
- kfifo_free(&lpc_snoop->chan[channel].fifo);
+ lpc_snoop->chan[channel].enabled = false;
+ /* Consider improving safety wrt concurrent reader(s) */
misc_deregister(&lpc_snoop->chan[channel].miscdev);
+ kfifo_free(&lpc_snoop->chan[channel].fifo);
}
static int aspeed_lpc_snoop_probe(struct platform_device *pdev)
diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c
index a12c68b93b1c..7a671a786197 100644
--- a/drivers/soundwire/amd_manager.c
+++ b/drivers/soundwire/amd_manager.c
@@ -238,7 +238,7 @@ static u64 amd_sdw_send_cmd_get_resp(struct amd_sdw_manager *amd_manager, u32 lo
if (sts & AMD_SDW_IMM_RES_VALID) {
dev_err(amd_manager->dev, "SDW%x manager is in bad state\n", amd_manager->instance);
- writel(0x00, amd_manager->mmio + ACP_SW_IMM_CMD_STS);
+ writel(AMD_SDW_IMM_RES_VALID, amd_manager->mmio + ACP_SW_IMM_CMD_STS);
}
writel(upper_data, amd_manager->mmio + ACP_SW_IMM_CMD_UPPER_WORD);
writel(lower_data, amd_manager->mmio + ACP_SW_IMM_CMD_LOWER_QWORD);
@@ -1209,6 +1209,7 @@ static int __maybe_unused amd_suspend(struct device *dev)
}
if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) {
+ cancel_work_sync(&amd_manager->amd_sdw_work);
amd_sdw_wake_enable(amd_manager, false);
if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) {
ret = amd_sdw_host_wake_enable(amd_manager, false);
@@ -1219,6 +1220,7 @@ static int __maybe_unused amd_suspend(struct device *dev)
if (ret)
return ret;
} else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) {
+ cancel_work_sync(&amd_manager->amd_sdw_work);
amd_sdw_wake_enable(amd_manager, false);
if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) {
ret = amd_sdw_host_wake_enable(amd_manager, false);
diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 295a46dc2be7..0f45e3404756 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -156,7 +156,6 @@ struct qcom_swrm_port_config {
u8 word_length;
u8 blk_group_count;
u8 lane_control;
- u8 ch_mask;
};
/*
@@ -1049,13 +1048,9 @@ static int qcom_swrm_port_enable(struct sdw_bus *bus,
{
u32 reg = SWRM_DP_PORT_CTRL_BANK(enable_ch->port_num, bank);
struct qcom_swrm_ctrl *ctrl = to_qcom_sdw(bus);
- struct qcom_swrm_port_config *pcfg;
u32 val;
- pcfg = &ctrl->pconfig[enable_ch->port_num];
ctrl->reg_read(ctrl, reg, &val);
- if (pcfg->ch_mask != SWR_INVALID_PARAM && pcfg->ch_mask != 0)
- enable_ch->ch_mask = pcfg->ch_mask;
if (enable_ch->enable)
val |= (enable_ch->ch_mask << SWRM_DP_PORT_CTRL_EN_CHAN_SHFT);
@@ -1275,26 +1270,6 @@ static void *qcom_swrm_get_sdw_stream(struct snd_soc_dai *dai, int direction)
return ctrl->sruntime[dai->id];
}
-static int qcom_swrm_set_channel_map(struct snd_soc_dai *dai,
- unsigned int tx_num, const unsigned int *tx_slot,
- unsigned int rx_num, const unsigned int *rx_slot)
-{
- struct qcom_swrm_ctrl *ctrl = dev_get_drvdata(dai->dev);
- int i;
-
- if (tx_slot) {
- for (i = 0; i < tx_num; i++)
- ctrl->pconfig[i].ch_mask = tx_slot[i];
- }
-
- if (rx_slot) {
- for (i = 0; i < rx_num; i++)
- ctrl->pconfig[i].ch_mask = rx_slot[i];
- }
-
- return 0;
-}
-
static int qcom_swrm_startup(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
@@ -1331,7 +1306,6 @@ static const struct snd_soc_dai_ops qcom_swrm_pdm_dai_ops = {
.shutdown = qcom_swrm_shutdown,
.set_stream = qcom_swrm_set_sdw_stream,
.get_stream = qcom_swrm_get_sdw_stream,
- .set_channel_map = qcom_swrm_set_channel_map,
};
static const struct snd_soc_component_driver qcom_swrm_dai_component = {
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index fe0f122f07b0..d3c78f59b22c 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1958,12 +1958,7 @@ static int cqspi_probe(struct platform_device *pdev)
goto probe_setup_failed;
}
- ret = devm_pm_runtime_enable(dev);
- if (ret) {
- if (cqspi->rx_chan)
- dma_release_channel(cqspi->rx_chan);
- goto probe_setup_failed;
- }
+ pm_runtime_enable(dev);
pm_runtime_set_autosuspend_delay(dev, CQSPI_AUTOSUSPEND_TIMEOUT);
pm_runtime_use_autosuspend(dev);
@@ -1981,6 +1976,7 @@ static int cqspi_probe(struct platform_device *pdev)
return 0;
probe_setup_failed:
cqspi_controller_enable(cqspi, 0);
+ pm_runtime_disable(dev);
probe_reset_failed:
if (cqspi->is_jh7110)
cqspi_jh7110_disable_clk(pdev, cqspi);
@@ -1999,7 +1995,8 @@ static void cqspi_remove(struct platform_device *pdev)
if (cqspi->rx_chan)
dma_release_channel(cqspi->rx_chan);
- clk_disable_unprepare(cqspi->clk);
+ if (pm_runtime_get_sync(&pdev->dev) >= 0)
+ clk_disable(cqspi->clk);
if (cqspi->is_jh7110)
cqspi_jh7110_disable_clk(pdev, cqspi);
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 863781ba6c16..0dcd49114095 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -983,11 +983,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
status = dspi_dma_xfer(dspi);
} else {
+ /*
+ * Reinitialize the completion before transferring data
+ * to avoid the case where it might remain in the done
+ * state due to a spurious interrupt from a previous
+ * transfer. This could falsely signal that the current
+ * transfer has completed.
+ */
+ if (dspi->irq)
+ reinit_completion(&dspi->xfer_done);
+
dspi_fifo_write(dspi);
if (dspi->irq) {
wait_for_completion(&dspi->xfer_done);
- reinit_completion(&dspi->xfer_done);
} else {
do {
status = dspi_poll(dspi);
diff --git a/drivers/spi/spi-loongson-core.c b/drivers/spi/spi-loongson-core.c
index 4fec226456d1..b46f072a0387 100644
--- a/drivers/spi/spi-loongson-core.c
+++ b/drivers/spi/spi-loongson-core.c
@@ -5,6 +5,7 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
diff --git a/drivers/spi/spi-offload.c b/drivers/spi/spi-offload.c
index e674097bf3be..d336f4d228d5 100644
--- a/drivers/spi/spi-offload.c
+++ b/drivers/spi/spi-offload.c
@@ -297,7 +297,7 @@ int spi_offload_trigger_enable(struct spi_offload *offload,
if (trigger->ops->enable) {
ret = trigger->ops->enable(trigger, config);
if (ret) {
- if (offload->ops->trigger_disable)
+ if (offload->ops && offload->ops->trigger_disable)
offload->ops->trigger_disable(offload);
return ret;
}
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 29c616e2c408..70bb74b3bd9c 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -134,6 +134,7 @@ struct omap2_mcspi {
size_t max_xfer_len;
u32 ref_clk_hz;
bool use_multi_mode;
+ bool last_msg_kept_cs;
};
struct omap2_mcspi_cs {
@@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr,
* multi-mode is applicable.
*/
mcspi->use_multi_mode = true;
+
+ if (mcspi->last_msg_kept_cs)
+ mcspi->use_multi_mode = false;
+
list_for_each_entry(tr, &msg->transfers, transfer_list) {
if (!tr->bits_per_word)
bits_per_word = msg->spi->bits_per_word;
@@ -1287,18 +1292,19 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr,
mcspi->use_multi_mode = false;
}
- /* Check if transfer asks to change the CS status after the transfer */
- if (!tr->cs_change)
- mcspi->use_multi_mode = false;
-
- /*
- * If at least one message is not compatible, switch back to single mode
- *
- * The bits_per_word of certain transfer can be different, but it will have no
- * impact on the signal itself.
- */
- if (!mcspi->use_multi_mode)
- break;
+ if (list_is_last(&tr->transfer_list, &msg->transfers)) {
+ /* Check if transfer asks to keep the CS status after the whole message */
+ if (tr->cs_change) {
+ mcspi->use_multi_mode = false;
+ mcspi->last_msg_kept_cs = true;
+ } else {
+ mcspi->last_msg_kept_cs = false;
+ }
+ } else {
+ /* Check if transfer asks to change the CS status after the transfer */
+ if (!tr->cs_change)
+ mcspi->use_multi_mode = false;
+ }
}
omap2_mcspi_set_mode(ctlr);
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index c6489e90b8b9..e27642c4dea4 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -762,10 +762,10 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
return -EINVAL;
num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt,
- PCI_IRQ_ALL_TYPES);
+ PCI_IRQ_INTX | PCI_IRQ_MSI);
if (num_vector < 0) {
dev_err(&pdev->dev, "Error allocating MSI vectors\n");
- return ret;
+ return num_vector;
}
init_completion(&spi_sub_ptr->spi_xfer_done);
diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c
index 77d9cc65477a..3b757e3d00c0 100644
--- a/drivers/spi/spi-qpic-snand.c
+++ b/drivers/spi/spi-qpic-snand.c
@@ -308,13 +308,29 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand)
ecc_cfg->bch_enabled = true;
ecc_cfg->bytes = ecc_cfg->ecc_bytes_hw + ecc_cfg->spare_bytes + ecc_cfg->bbm_size;
- ecc_cfg->steps = 4;
+ ecc_cfg->steps = cwperpage;
ecc_cfg->cw_data = 516;
ecc_cfg->cw_size = ecc_cfg->cw_data + ecc_cfg->bytes;
bad_block_byte = mtd->writesize - ecc_cfg->cw_size * (cwperpage - 1) + 1;
mtd_set_ooblayout(mtd, &qcom_spi_ooblayout);
+ /*
+ * Free the temporary BAM transaction allocated initially by
+ * qcom_nandc_alloc(), and allocate a new one based on the
+ * updated max_cwperpage value.
+ */
+ qcom_free_bam_transaction(snandc);
+
+ snandc->max_cwperpage = cwperpage;
+
+ snandc->bam_txn = qcom_alloc_bam_transaction(snandc);
+ if (!snandc->bam_txn) {
+ dev_err(snandc->dev, "failed to allocate BAM transaction\n");
+ ret = -ENOMEM;
+ goto err_free_ecc_cfg;
+ }
+
ecc_cfg->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, (cwperpage - 1)) |
FIELD_PREP(UD_SIZE_BYTES_MASK, ecc_cfg->cw_data) |
FIELD_PREP(DISABLE_STATUS_AFTER_WRITE, 1) |
diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c
index 7c1fa55fbc47..4ab7e86f4bd5 100644
--- a/drivers/spi/spi-stm32-ospi.c
+++ b/drivers/spi/spi-stm32-ospi.c
@@ -804,7 +804,7 @@ static int stm32_ospi_get_resources(struct platform_device *pdev)
return ret;
}
- ospi->rstc = devm_reset_control_array_get_exclusive(dev);
+ ospi->rstc = devm_reset_control_array_get_exclusive_released(dev);
if (IS_ERR(ospi->rstc))
return dev_err_probe(dev, PTR_ERR(ospi->rstc),
"Can't get reset\n");
@@ -936,12 +936,16 @@ static int stm32_ospi_probe(struct platform_device *pdev)
if (ret < 0)
goto err_pm_enable;
- if (ospi->rstc) {
- reset_control_assert(ospi->rstc);
- udelay(2);
- reset_control_deassert(ospi->rstc);
+ ret = reset_control_acquire(ospi->rstc);
+ if (ret) {
+ dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret);
+ goto err_pm_resume;
}
+ reset_control_assert(ospi->rstc);
+ udelay(2);
+ reset_control_deassert(ospi->rstc);
+
ret = spi_register_controller(ctrl);
if (ret) {
/* Disable ospi */
@@ -987,6 +991,8 @@ static void stm32_ospi_remove(struct platform_device *pdev)
if (ospi->dma_chrx)
dma_release_channel(ospi->dma_chrx);
+ reset_control_release(ospi->rstc);
+
pm_runtime_put_sync_suspend(ospi->dev);
pm_runtime_force_suspend(ospi->dev);
}
@@ -997,6 +1003,8 @@ static int __maybe_unused stm32_ospi_suspend(struct device *dev)
pinctrl_pm_select_sleep_state(dev);
+ reset_control_release(ospi->rstc);
+
return pm_runtime_force_suspend(ospi->dev);
}
@@ -1016,6 +1024,12 @@ static int __maybe_unused stm32_ospi_resume(struct device *dev)
if (ret < 0)
return ret;
+ ret = reset_control_acquire(ospi->rstc);
+ if (ret) {
+ dev_err(dev, "Can not acquire reset\n");
+ return ret;
+ }
+
writel_relaxed(ospi->cr_reg, regs_base + OSPI_CR);
writel_relaxed(ospi->dcr_reg, regs_base + OSPI_DCR1);
pm_runtime_mark_last_busy(ospi->dev);
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 3581757a269b..3be7499db21e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -407,9 +407,6 @@ tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_tra
static void
tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
{
- dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys,
- tqspi->dma_buf_size, DMA_TO_DEVICE);
-
/*
* In packed mode, each word in FIFO may contain multiple packets
* based on bits per word. So all bytes in each FIFO word are valid.
@@ -442,17 +439,11 @@ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_
tqspi->cur_tx_pos += write_bytes;
}
-
- dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys,
- tqspi->dma_buf_size, DMA_TO_DEVICE);
}
static void
tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
{
- dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys,
- tqspi->dma_buf_size, DMA_FROM_DEVICE);
-
if (tqspi->is_packed) {
tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word;
} else {
@@ -478,9 +469,6 @@ tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_
tqspi->cur_rx_pos += read_bytes;
}
-
- dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
- tqspi->dma_buf_size, DMA_FROM_DEVICE);
}
static void tegra_qspi_dma_complete(void *args)
@@ -701,8 +689,6 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct
return ret;
}
- dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
- tqspi->dma_buf_size, DMA_FROM_DEVICE);
ret = tegra_qspi_start_rx_dma(tqspi, t, len);
if (ret < 0) {
dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1bc0fdbb1bd7..0ffa3f9f2870 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -4138,10 +4138,13 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
xfer->tx_nbits != SPI_NBITS_OCTAL)
return -EINVAL;
if ((xfer->tx_nbits == SPI_NBITS_DUAL) &&
- !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
+ !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL)))
return -EINVAL;
if ((xfer->tx_nbits == SPI_NBITS_QUAD) &&
- !(spi->mode & SPI_TX_QUAD))
+ !(spi->mode & (SPI_TX_QUAD | SPI_TX_OCTAL)))
+ return -EINVAL;
+ if ((xfer->tx_nbits == SPI_NBITS_OCTAL) &&
+ !(spi->mode & SPI_TX_OCTAL))
return -EINVAL;
}
/* Check transfer rx_nbits */
@@ -4154,10 +4157,13 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
xfer->rx_nbits != SPI_NBITS_OCTAL)
return -EINVAL;
if ((xfer->rx_nbits == SPI_NBITS_DUAL) &&
- !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
+ !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL)))
return -EINVAL;
if ((xfer->rx_nbits == SPI_NBITS_QUAD) &&
- !(spi->mode & SPI_RX_QUAD))
+ !(spi->mode & (SPI_RX_QUAD | SPI_RX_OCTAL)))
+ return -EINVAL;
+ if ((xfer->rx_nbits == SPI_NBITS_OCTAL) &&
+ !(spi->mode & SPI_RX_OCTAL))
return -EINVAL;
}
diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c
index a193d64db033..38c6abc2ffd2 100644
--- a/drivers/staging/gpib/common/gpib_os.c
+++ b/drivers/staging/gpib/common/gpib_os.c
@@ -610,7 +610,7 @@ int ibclose(struct inode *inode, struct file *filep)
long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
- unsigned int minor = iminor(filep->f_path.dentry->d_inode);
+ unsigned int minor = iminor(file_inode(filep));
struct gpib_board *board;
struct gpib_file_private *file_priv = filep->private_data;
long retval = -ENOTTY;
diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c b/drivers/staging/rtl8723bs/core/rtw_security.c
index 1e9eff01b1aa..e9f382c280d9 100644
--- a/drivers/staging/rtl8723bs/core/rtw_security.c
+++ b/drivers/staging/rtl8723bs/core/rtw_security.c
@@ -868,29 +868,21 @@ static signed int aes_cipher(u8 *key, uint hdrlen,
num_blocks, payload_index;
u8 pn_vector[6];
- u8 mic_iv[16];
- u8 mic_header1[16];
- u8 mic_header2[16];
- u8 ctr_preload[16];
+ u8 mic_iv[16] = {};
+ u8 mic_header1[16] = {};
+ u8 mic_header2[16] = {};
+ u8 ctr_preload[16] = {};
/* Intermediate Buffers */
- u8 chain_buffer[16];
- u8 aes_out[16];
- u8 padded_buffer[16];
+ u8 chain_buffer[16] = {};
+ u8 aes_out[16] = {};
+ u8 padded_buffer[16] = {};
u8 mic[8];
uint frtype = GetFrameType(pframe);
uint frsubtype = GetFrameSubType(pframe);
frsubtype = frsubtype>>4;
- memset((void *)mic_iv, 0, 16);
- memset((void *)mic_header1, 0, 16);
- memset((void *)mic_header2, 0, 16);
- memset((void *)ctr_preload, 0, 16);
- memset((void *)chain_buffer, 0, 16);
- memset((void *)aes_out, 0, 16);
- memset((void *)padded_buffer, 0, 16);
-
if ((hdrlen == WLAN_HDR_A3_LEN) || (hdrlen == WLAN_HDR_A3_QOS_LEN))
a4_exists = 0;
else
@@ -1080,15 +1072,15 @@ static signed int aes_decipher(u8 *key, uint hdrlen,
num_blocks, payload_index;
signed int res = _SUCCESS;
u8 pn_vector[6];
- u8 mic_iv[16];
- u8 mic_header1[16];
- u8 mic_header2[16];
- u8 ctr_preload[16];
+ u8 mic_iv[16] = {};
+ u8 mic_header1[16] = {};
+ u8 mic_header2[16] = {};
+ u8 ctr_preload[16] = {};
/* Intermediate Buffers */
- u8 chain_buffer[16];
- u8 aes_out[16];
- u8 padded_buffer[16];
+ u8 chain_buffer[16] = {};
+ u8 aes_out[16] = {};
+ u8 padded_buffer[16] = {};
u8 mic[8];
uint frtype = GetFrameType(pframe);
@@ -1096,14 +1088,6 @@ static signed int aes_decipher(u8 *key, uint hdrlen,
frsubtype = frsubtype>>4;
- memset((void *)mic_iv, 0, 16);
- memset((void *)mic_header1, 0, 16);
- memset((void *)mic_header2, 0, 16);
- memset((void *)ctr_preload, 0, 16);
- memset((void *)chain_buffer, 0, 16);
- memset((void *)aes_out, 0, 16);
- memset((void *)padded_buffer, 0, 16);
-
/* start to decrypt the payload */
num_blocks = (plen-8) / 16; /* plen including LLC, payload_length and mic) */
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 5dbf8d53db09..721b15b7e13b 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -97,6 +97,13 @@ struct vchiq_arm_state {
* tracked separately with the state.
*/
int peer_use_count;
+
+ /*
+ * Flag to indicate that the first vchiq connect has made it through.
+ * This means that both sides should be fully ready, and we should
+ * be able to suspend after this point.
+ */
+ int first_connect;
};
static int
@@ -273,6 +280,29 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state
return 0;
}
+int
+vchiq_platform_init_state(struct vchiq_state *state)
+{
+ struct vchiq_arm_state *platform_state;
+
+ platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL);
+ if (!platform_state)
+ return -ENOMEM;
+
+ rwlock_init(&platform_state->susp_res_lock);
+
+ init_completion(&platform_state->ka_evt);
+ atomic_set(&platform_state->ka_use_count, 0);
+ atomic_set(&platform_state->ka_use_ack_count, 0);
+ atomic_set(&platform_state->ka_release_count, 0);
+
+ platform_state->state = state;
+
+ state->platform_state = (struct opaque_platform_state *)platform_state;
+
+ return 0;
+}
+
static struct vchiq_arm_state *vchiq_platform_get_arm_state(struct vchiq_state *state)
{
return (struct vchiq_arm_state *)state->platform_state;
@@ -363,8 +393,7 @@ int vchiq_shutdown(struct vchiq_instance *instance)
struct vchiq_state *state = instance->state;
int ret = 0;
- if (mutex_lock_killable(&state->mutex))
- return -EAGAIN;
+ mutex_lock(&state->mutex);
/* Remove all services */
vchiq_shutdown_internal(state, instance);
@@ -982,39 +1011,6 @@ exit:
}
int
-vchiq_platform_init_state(struct vchiq_state *state)
-{
- struct vchiq_arm_state *platform_state;
- char threadname[16];
-
- platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL);
- if (!platform_state)
- return -ENOMEM;
-
- snprintf(threadname, sizeof(threadname), "vchiq-keep/%d",
- state->id);
- platform_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func,
- (void *)state, threadname);
- if (IS_ERR(platform_state->ka_thread)) {
- dev_err(state->dev, "couldn't create thread %s\n", threadname);
- return PTR_ERR(platform_state->ka_thread);
- }
-
- rwlock_init(&platform_state->susp_res_lock);
-
- init_completion(&platform_state->ka_evt);
- atomic_set(&platform_state->ka_use_count, 0);
- atomic_set(&platform_state->ka_use_ack_count, 0);
- atomic_set(&platform_state->ka_release_count, 0);
-
- platform_state->state = state;
-
- state->platform_state = (struct opaque_platform_state *)platform_state;
-
- return 0;
-}
-
-int
vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service,
enum USE_TYPE_E use_type)
{
@@ -1329,19 +1325,37 @@ out:
return ret;
}
-void vchiq_platform_connected(struct vchiq_state *state)
-{
- struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state);
-
- wake_up_process(arm_state->ka_thread);
-}
-
void vchiq_platform_conn_state_changed(struct vchiq_state *state,
enum vchiq_connstate oldstate,
enum vchiq_connstate newstate)
{
+ struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state);
+ char threadname[16];
+
dev_dbg(state->dev, "suspend: %d: %s->%s\n",
state->id, get_conn_state_name(oldstate), get_conn_state_name(newstate));
+ if (state->conn_state != VCHIQ_CONNSTATE_CONNECTED)
+ return;
+
+ write_lock_bh(&arm_state->susp_res_lock);
+ if (arm_state->first_connect) {
+ write_unlock_bh(&arm_state->susp_res_lock);
+ return;
+ }
+
+ arm_state->first_connect = 1;
+ write_unlock_bh(&arm_state->susp_res_lock);
+ snprintf(threadname, sizeof(threadname), "vchiq-keep/%d",
+ state->id);
+ arm_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func,
+ (void *)state,
+ threadname);
+ if (IS_ERR(arm_state->ka_thread)) {
+ dev_err(state->dev, "suspend: Couldn't create thread %s\n",
+ threadname);
+ } else {
+ wake_up_process(arm_state->ka_thread);
+ }
}
static const struct of_device_id vchiq_of_match[] = {
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
index e7b0c800a205..e2cac0898b8f 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
@@ -3343,7 +3343,6 @@ vchiq_connect_internal(struct vchiq_state *state, struct vchiq_instance *instanc
return -EAGAIN;
vchiq_set_conn_state(state, VCHIQ_CONNSTATE_CONNECTED);
- vchiq_platform_connected(state);
complete(&state->connect);
}
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
index 3b5c0618e567..9b4e766990a4 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
@@ -575,8 +575,6 @@ int vchiq_send_remote_use(struct vchiq_state *state);
int vchiq_send_remote_use_active(struct vchiq_state *state);
-void vchiq_platform_connected(struct vchiq_state *state);
-
void vchiq_platform_conn_state_changed(struct vchiq_state *state,
enum vchiq_connstate oldstate,
enum vchiq_connstate newstate);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 34cf2c399b39..70905805cb17 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -1842,7 +1842,9 @@ out:
}
kmem_cache_free(t10_pr_reg_cache, dest_pr_reg);
- core_scsi3_lunacl_undepend_item(dest_se_deve);
+
+ if (dest_se_deve)
+ core_scsi3_lunacl_undepend_item(dest_se_deve);
if (is_local)
continue;
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index f3af5666bb11..f9ef7d94cebd 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
return true;
}
+static void notif_work_fn(struct work_struct *work)
+{
+ struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa,
+ notif_work);
+ struct optee *optee = container_of(optee_ffa, struct optee, ffa);
+
+ optee_do_bottom_half(optee->ctx);
+}
+
static void notif_callback(int notify_id, void *cb_data)
{
struct optee *optee = cb_data;
if (notify_id == optee->ffa.bottom_half_value)
- optee_do_bottom_half(optee->ctx);
+ queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work);
else
optee_notif_send(optee, notify_id);
}
@@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev)
struct optee *optee = ffa_dev_get_drvdata(ffa_dev);
u32 bottom_half_id = optee->ffa.bottom_half_value;
- if (bottom_half_id != U32_MAX)
+ if (bottom_half_id != U32_MAX) {
ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
bottom_half_id);
+ destroy_workqueue(optee->ffa.notif_wq);
+ }
optee_remove_common(optee);
mutex_destroy(&optee->ffa.mutex);
@@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
u32 notif_id = 0;
int rc;
+ INIT_WORK(&optee->ffa.notif_work, notif_work_fn);
+ optee->ffa.notif_wq = create_workqueue("optee_notification");
+ if (!optee->ffa.notif_wq) {
+ rc = -EINVAL;
+ goto err;
+ }
+
while (true) {
rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev,
is_per_vcpu,
@@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
* notifications in that case.
*/
if (rc != -EACCES)
- return rc;
+ goto err_wq;
notif_id++;
if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE)
- return rc;
+ goto err_wq;
}
optee->ffa.bottom_half_value = notif_id;
rc = enable_async_notif(optee);
- if (rc < 0) {
- ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
- notif_id);
- optee->ffa.bottom_half_value = U32_MAX;
- }
+ if (rc < 0)
+ goto err_rel;
+
+ return 0;
+err_rel:
+ ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id);
+err_wq:
+ destroy_workqueue(optee->ffa.notif_wq);
+err:
+ optee->ffa.bottom_half_value = U32_MAX;
return rc;
}
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
index dc0f355ef72a..9526087f0e68 100644
--- a/drivers/tee/optee/optee_private.h
+++ b/drivers/tee/optee/optee_private.h
@@ -165,6 +165,8 @@ struct optee_ffa {
/* Serializes access to @global_ids */
struct mutex mutex;
struct rhashtable global_ids;
+ struct workqueue_struct *notif_wq;
+ struct work_struct notif_work;
};
struct optee;
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 28febb95f8fa..b6c58a7e7b6a 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -1450,7 +1450,7 @@ int tb_dp_port_set_hops(struct tb_port *port, unsigned int video,
return ret;
data[0] &= ~ADP_DP_CS_0_VIDEO_HOPID_MASK;
- data[1] &= ~ADP_DP_CS_1_AUX_RX_HOPID_MASK;
+ data[1] &= ~ADP_DP_CS_1_AUX_TX_HOPID_MASK;
data[1] &= ~ADP_DP_CS_1_AUX_RX_HOPID_MASK;
data[0] |= (video << ADP_DP_CS_0_VIDEO_HOPID_SHIFT) &
@@ -3437,7 +3437,7 @@ void tb_sw_set_unplugged(struct tb_switch *sw)
}
}
-static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags)
+static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime)
{
if (flags)
tb_sw_dbg(sw, "enabling wakeup: %#x\n", flags);
@@ -3445,7 +3445,7 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags)
tb_sw_dbg(sw, "disabling wakeup\n");
if (tb_switch_is_usb4(sw))
- return usb4_switch_set_wake(sw, flags);
+ return usb4_switch_set_wake(sw, flags, runtime);
return tb_lc_set_wake(sw, flags);
}
@@ -3521,7 +3521,7 @@ int tb_switch_resume(struct tb_switch *sw, bool runtime)
tb_switch_check_wakes(sw);
/* Disable wakes */
- tb_switch_set_wake(sw, 0);
+ tb_switch_set_wake(sw, 0, true);
err = tb_switch_tmu_init(sw);
if (err)
@@ -3603,7 +3603,7 @@ void tb_switch_suspend(struct tb_switch *sw, bool runtime)
flags |= TB_WAKE_ON_USB4 | TB_WAKE_ON_USB3 | TB_WAKE_ON_PCIE;
}
- tb_switch_set_wake(sw, flags);
+ tb_switch_set_wake(sw, flags, runtime);
if (tb_switch_is_usb4(sw))
usb4_switch_set_sleep(sw);
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index 87afd5a7c504..f503bad86413 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -1317,7 +1317,7 @@ int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid);
int usb4_switch_drom_read(struct tb_switch *sw, unsigned int address, void *buf,
size_t size);
bool usb4_switch_lane_bonding_possible(struct tb_switch *sw);
-int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags);
+int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime);
int usb4_switch_set_sleep(struct tb_switch *sw);
int usb4_switch_nvm_sector_size(struct tb_switch *sw);
int usb4_switch_nvm_read(struct tb_switch *sw, unsigned int address, void *buf,
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index fce3c0f2354a..fdae76c8f728 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -403,12 +403,12 @@ bool usb4_switch_lane_bonding_possible(struct tb_switch *sw)
* usb4_switch_set_wake() - Enabled/disable wake
* @sw: USB4 router
* @flags: Wakeup flags (%0 to disable)
+ * @runtime: Wake is being programmed during system runtime
*
* Enables/disables router to wake up from sleep.
*/
-int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags)
+int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime)
{
- struct usb4_port *usb4;
struct tb_port *port;
u64 route = tb_route(sw);
u32 val;
@@ -438,13 +438,11 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags)
val |= PORT_CS_19_WOU4;
} else {
bool configured = val & PORT_CS_19_PC;
- usb4 = port->usb4;
+ bool wakeup = runtime || device_may_wakeup(&port->usb4->dev);
- if (((flags & TB_WAKE_ON_CONNECT) &&
- device_may_wakeup(&usb4->dev)) && !configured)
+ if ((flags & TB_WAKE_ON_CONNECT) && wakeup && !configured)
val |= PORT_CS_19_WOC;
- if (((flags & TB_WAKE_ON_DISCONNECT) &&
- device_may_wakeup(&usb4->dev)) && configured)
+ if ((flags & TB_WAKE_ON_DISCONNECT) && wakeup && configured)
val |= PORT_CS_19_WOD;
if ((flags & TB_WAKE_ON_USB4) && configured)
val |= PORT_CS_19_WOU4;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index bd02ee898f5d..500dfc009d03 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -235,6 +235,7 @@ struct imx_port {
enum imx_tx_state tx_state;
struct hrtimer trigger_start_tx;
struct hrtimer trigger_stop_tx;
+ unsigned int rxtl;
};
struct imx_port_ucrs {
@@ -1339,6 +1340,7 @@ static void imx_uart_clear_rx_errors(struct imx_port *sport)
#define TXTL_DEFAULT 8
#define RXTL_DEFAULT 8 /* 8 characters or aging timer */
+#define RXTL_CONSOLE_DEFAULT 1
#define TXTL_DMA 8 /* DMA burst setting */
#define RXTL_DMA 9 /* DMA burst setting */
@@ -1457,7 +1459,7 @@ static void imx_uart_disable_dma(struct imx_port *sport)
ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN);
imx_uart_writel(sport, ucr1, UCR1);
- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl);
sport->dma_is_enabled = 0;
}
@@ -1482,7 +1484,12 @@ static int imx_uart_startup(struct uart_port *port)
return retval;
}
- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
+ if (uart_console(&sport->port))
+ sport->rxtl = RXTL_CONSOLE_DEFAULT;
+ else
+ sport->rxtl = RXTL_DEFAULT;
+
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl);
/* disable the DREN bit (Data Ready interrupt enable) before
* requesting IRQs
@@ -1948,7 +1955,7 @@ static int imx_uart_poll_init(struct uart_port *port)
if (retval)
clk_disable_unprepare(sport->clk_ipg);
- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl);
uart_port_lock_irqsave(&sport->port, &flags);
@@ -2040,7 +2047,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio
/* If the receiver trigger is 0, set it to a default value */
ufcr = imx_uart_readl(sport, UFCR);
if ((ufcr & UFCR_RXTL_MASK) == 0)
- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl);
imx_uart_start_rx(port);
}
@@ -2302,7 +2309,7 @@ imx_uart_console_setup(struct console *co, char *options)
else
imx_uart_console_get_options(sport, &baud, &parity, &bits);
- imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl);
retval = uart_set_options(&sport->port, co, baud, parity, bits, flow);
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 508e8c6f01d4..884fefbfd5a1 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -954,7 +954,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv)
__func__);
return 0;
}
- dma_sync_sg_for_device(port->dev, priv->sg_tx_p, nent, DMA_TO_DEVICE);
+ dma_sync_sg_for_device(port->dev, priv->sg_tx_p, num, DMA_TO_DEVICE);
priv->desc_tx = desc;
desc->callback = pch_dma_tx_complete;
desc->callback_param = priv;
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
index 5d1677f1b651..22749ab0428a 100644
--- a/drivers/tty/serial/serial_base_bus.c
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -13,6 +13,7 @@
#include <linux/device.h>
#include <linux/idr.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -72,6 +73,7 @@ static int serial_base_device_init(struct uart_port *port,
dev->parent = parent_dev;
dev->bus = &serial_base_bus_type;
dev->release = release;
+ device_set_of_node_from_dev(dev, parent_dev);
if (!serial_base_initialized) {
dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n");
@@ -92,6 +94,7 @@ static void serial_base_ctrl_release(struct device *dev)
{
struct serial_ctrl_device *ctrl_dev = to_serial_base_ctrl_device(dev);
+ of_node_put(dev->of_node);
kfree(ctrl_dev);
}
@@ -139,6 +142,7 @@ static void serial_base_port_release(struct device *dev)
{
struct serial_port_device *port_dev = to_serial_base_port_device(dev);
+ of_node_put(dev->of_node);
kfree(port_dev);
}
diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c
index 6ead622b7713..03877485dfb7 100644
--- a/drivers/tty/vt/ucs.c
+++ b/drivers/tty/vt/ucs.c
@@ -206,7 +206,7 @@ static int ucs_page_entry_cmp(const void *key, const void *element)
/**
* ucs_get_fallback() - Get a substitution for the provided Unicode character
- * @base: Base Unicode code point (UCS-4)
+ * @cp: Unicode code point (UCS-4)
*
* Get a simpler fallback character for the provided Unicode character.
* This is used for terminal display when corresponding glyph is unavailable.
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index ed39d9cb4432..62049ceb34de 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4650,6 +4650,7 @@ void do_unblank_screen(int leaving_gfx)
set_palette(vc);
set_cursor(vc);
vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
+ notify_update(vc);
}
EXPORT_SYMBOL(do_unblank_screen);
diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index de8b6acd4058..fcb4b14a710f 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1);
UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8);
UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4);
UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1);
-UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8);
+UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8);
UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2);
UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1);
UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4);
@@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = {
&dev_attr_logical_block_count.attr,
&dev_attr_erase_block_size.attr,
&dev_attr_provisioning_type.attr,
- &dev_attr_physical_memory_resourse_count.attr,
+ &dev_attr_physical_memory_resource_count.attr,
&dev_attr_context_capabilities.attr,
&dev_attr_large_unit_granularity.attr,
&dev_attr_wb_buf_alloc_units.attr,
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 4410e7d93b7d..50adfb8b335b 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6623,9 +6623,14 @@ static void ufshcd_err_handler(struct work_struct *work)
up(&hba->host_sem);
return;
}
- ufshcd_set_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
+
ufshcd_err_handling_prepare(hba);
+
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ ufshcd_set_eh_in_progress(hba);
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+
/* Complete requests that have door-bell cleared by h/w */
ufshcd_complete_requests(hba, false);
spin_lock_irqsave(hba->host->host_lock, flags);
@@ -7802,7 +7807,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
hba->silence_err_logs = false;
/* scale up clocks to max frequency before full reinitialization */
- ufshcd_scale_clks(hba, ULONG_MAX, true);
+ if (ufshcd_is_clkscaling_supported(hba))
+ ufshcd_scale_clks(hba, ULONG_MAX, true);
err = ufshcd_hba_enable(hba);
diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h
index cd138acdcce1..86860686d836 100644
--- a/drivers/usb/cdns3/cdnsp-debug.h
+++ b/drivers/usb/cdns3/cdnsp-debug.h
@@ -327,12 +327,13 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0,
case TRB_RESET_EP:
case TRB_HALT_ENDPOINT:
ret = scnprintf(str, size,
- "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c",
+ "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c %c",
cdnsp_trb_type_string(type),
ep_num, ep_id % 2 ? "out" : "in",
TRB_TO_EP_INDEX(field3), field1, field0,
TRB_TO_SLOT_ID(field3),
- field3 & TRB_CYCLE ? 'C' : 'c');
+ field3 & TRB_CYCLE ? 'C' : 'c',
+ field3 & TRB_ESP ? 'P' : 'p');
break;
case TRB_STOP_RING:
ret = scnprintf(str, size,
diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c
index f317d3c84781..5cd9b898ce97 100644
--- a/drivers/usb/cdns3/cdnsp-ep0.c
+++ b/drivers/usb/cdns3/cdnsp-ep0.c
@@ -414,6 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev,
void cdnsp_setup_analyze(struct cdnsp_device *pdev)
{
struct usb_ctrlrequest *ctrl = &pdev->setup;
+ struct cdnsp_ep *pep;
int ret = -EINVAL;
u16 len;
@@ -427,10 +428,21 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
goto out;
}
+ pep = &pdev->eps[0];
+
/* Restore the ep0 to Stopped/Running state. */
- if (pdev->eps[0].ep_state & EP_HALTED) {
- trace_cdnsp_ep0_halted("Restore to normal state");
- cdnsp_halt_endpoint(pdev, &pdev->eps[0], 0);
+ if (pep->ep_state & EP_HALTED) {
+ if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_HALTED)
+ cdnsp_halt_endpoint(pdev, pep, 0);
+
+ /*
+ * Halt Endpoint Command for SSP2 for ep0 preserve current
+ * endpoint state and driver has to synchronize the
+ * software endpoint state with endpoint output context
+ * state.
+ */
+ pep->ep_state &= ~EP_HALTED;
+ pep->ep_state |= EP_STOPPED;
}
/*
diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h
index 2afa3e558f85..a91cca509db0 100644
--- a/drivers/usb/cdns3/cdnsp-gadget.h
+++ b/drivers/usb/cdns3/cdnsp-gadget.h
@@ -987,6 +987,12 @@ enum cdnsp_setup_dev {
#define STREAM_ID_FOR_TRB(p) ((((p)) << 16) & GENMASK(31, 16))
#define SCT_FOR_TRB(p) (((p) << 1) & 0x7)
+/*
+ * Halt Endpoint Command TRB field.
+ * The ESP bit only exists in the SSP2 controller.
+ */
+#define TRB_ESP BIT(9)
+
/* Link TRB specific fields. */
#define TRB_TC BIT(1)
diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index fd06cb85c4ea..0758f171f73e 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -772,7 +772,9 @@ static int cdnsp_update_port_id(struct cdnsp_device *pdev, u32 port_id)
}
if (port_id != old_port) {
- cdnsp_disable_slot(pdev);
+ if (pdev->slot_id)
+ cdnsp_disable_slot(pdev);
+
pdev->active_port = port;
cdnsp_enable_slot(pdev);
}
@@ -2483,7 +2485,8 @@ void cdnsp_queue_halt_endpoint(struct cdnsp_device *pdev, unsigned int ep_index)
{
cdnsp_queue_command(pdev, 0, 0, 0, TRB_TYPE(TRB_HALT_ENDPOINT) |
SLOT_ID_FOR_TRB(pdev->slot_id) |
- EP_ID_FOR_TRB(ep_index));
+ EP_ID_FOR_TRB(ep_index) |
+ (!ep_index ? TRB_ESP : 0));
}
void cdnsp_force_header_wakeup(struct cdnsp_device *pdev, int intf_num)
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 8a9b31fd5c89..1a48e6440e6c 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -2374,6 +2374,10 @@ static void udc_suspend(struct ci_hdrc *ci)
*/
if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0)
hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0);
+
+ if (ci->gadget.connected &&
+ (!ci->suspended || !device_may_wakeup(ci->dev)))
+ usb_gadget_disconnect(&ci->gadget);
}
static void udc_resume(struct ci_hdrc *ci, bool power_lost)
@@ -2384,6 +2388,9 @@ static void udc_resume(struct ci_hdrc *ci, bool power_lost)
OTGSC_BSVIS | OTGSC_BSVIE);
if (ci->vbus_active)
usb_gadget_vbus_disconnect(&ci->gadget);
+ } else if (ci->vbus_active && ci->driver &&
+ !ci->gadget.connected) {
+ usb_gadget_connect(&ci->gadget);
}
/* Restore value 0 if it was set for power lost check */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 770d1e91183c..256fe8c86828 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -68,6 +68,12 @@
*/
#define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */
+/*
+ * Give SS hubs 200ms time after wake to train downstream links before
+ * assuming no port activity and allowing hub to runtime suspend back.
+ */
+#define USB_SS_PORT_U0_WAKE_TIME 200 /* ms */
+
/* Protect struct usb_device->state and ->children members
* Note: Both are also protected by ->dev.sem, except that ->state can
* change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
@@ -1095,6 +1101,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
goto init2;
goto init3;
}
+
hub_get(hub);
/* The superspeed hub except for root hub has to use Hub Depth
@@ -1343,6 +1350,17 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
device_unlock(&hdev->dev);
}
+ if (type == HUB_RESUME && hub_is_superspeed(hub->hdev)) {
+ /* give usb3 downstream links training time after hub resume */
+ usb_autopm_get_interface_no_resume(
+ to_usb_interface(hub->intfdev));
+
+ queue_delayed_work(system_power_efficient_wq,
+ &hub->post_resume_work,
+ msecs_to_jiffies(USB_SS_PORT_U0_WAKE_TIME));
+ return;
+ }
+
hub_put(hub);
}
@@ -1361,6 +1379,14 @@ static void hub_init_func3(struct work_struct *ws)
hub_activate(hub, HUB_INIT3);
}
+static void hub_post_resume(struct work_struct *ws)
+{
+ struct usb_hub *hub = container_of(ws, struct usb_hub, post_resume_work.work);
+
+ usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
+ hub_put(hub);
+}
+
enum hub_quiescing_type {
HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND
};
@@ -1386,6 +1412,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
/* Stop hub_wq and related activity */
timer_delete_sync(&hub->irq_urb_retry);
+ flush_delayed_work(&hub->post_resume_work);
usb_kill_urb(hub->urb);
if (hub->has_indicators)
cancel_delayed_work_sync(&hub->leds);
@@ -1944,6 +1971,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
hub->hdev = hdev;
INIT_DELAYED_WORK(&hub->leds, led_work);
INIT_DELAYED_WORK(&hub->init_work, NULL);
+ INIT_DELAYED_WORK(&hub->post_resume_work, hub_post_resume);
INIT_WORK(&hub->events, hub_event);
INIT_LIST_HEAD(&hub->onboard_devs);
spin_lock_init(&hub->irq_urb_lock);
@@ -2337,6 +2365,9 @@ void usb_disconnect(struct usb_device **pdev)
usb_remove_ep_devs(&udev->ep0);
usb_unlock_device(udev);
+ if (udev->usb4_link)
+ device_link_del(udev->usb4_link);
+
/* Unregister the device. The device driver is responsible
* for de-configuring the device and invoking the remove-device
* notifier chain (used by usbfs and possibly others).
@@ -5720,6 +5751,7 @@ static void port_event(struct usb_hub *hub, int port1)
struct usb_device *hdev = hub->hdev;
u16 portstatus, portchange;
int i = 0;
+ int err;
connect_change = test_bit(port1, hub->change_bits);
clear_bit(port1, hub->event_bits);
@@ -5816,8 +5848,11 @@ static void port_event(struct usb_hub *hub, int port1)
} else if (!udev || !(portstatus & USB_PORT_STAT_CONNECTION)
|| udev->state == USB_STATE_NOTATTACHED) {
dev_dbg(&port_dev->dev, "do warm reset, port only\n");
- if (hub_port_reset(hub, port1, NULL,
- HUB_BH_RESET_TIME, true) < 0)
+ err = hub_port_reset(hub, port1, NULL,
+ HUB_BH_RESET_TIME, true);
+ if (!udev && err == -ENOTCONN)
+ connect_change = 0;
+ else if (err < 0)
hub_port_disable(hub, port1, 1);
} else {
dev_dbg(&port_dev->dev, "do warm reset, full device\n");
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index e6ae73f8a95d..9ebc5ef54a32 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -70,6 +70,7 @@ struct usb_hub {
u8 indicator[USB_MAXCHILDREN];
struct delayed_work leds;
struct delayed_work init_work;
+ struct delayed_work post_resume_work;
struct work_struct events;
spinlock_t irq_urb_lock;
struct timer_list irq_urb_retry;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 53d68d20fb62..0cf94c7a2c9c 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -227,7 +227,8 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
/* Logitech HD Webcam C270 */
- { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME },
+ { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME |
+ USB_QUIRK_NO_LPM},
/* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index ea1ce8beb0cb..489dbdc96f94 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(usb_acpi_set_power_state);
*/
static int usb_acpi_add_usb4_devlink(struct usb_device *udev)
{
- const struct device_link *link;
+ struct device_link *link;
struct usb_port *port_dev;
struct usb_hub *hub;
@@ -188,6 +188,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev)
dev_dbg(&port_dev->dev, "Created device link from %s to %s\n",
dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev));
+ udev->usb4_link = link;
+
return 0;
}
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index d5b622f78cf3..0637bfbc054e 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -5389,20 +5389,34 @@ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg)
if (gusbcfg & GUSBCFG_ULPI_UTMI_SEL) {
/* ULPI interface */
gpwrdn |= GPWRDN_ULPI_LATCH_EN_DURING_HIB_ENTRY;
- }
- dwc2_writel(hsotg, gpwrdn, GPWRDN);
- udelay(10);
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
- /* Suspend the Phy Clock */
- pcgcctl = dwc2_readl(hsotg, PCGCTL);
- pcgcctl |= PCGCTL_STOPPCLK;
- dwc2_writel(hsotg, pcgcctl, PCGCTL);
- udelay(10);
+ /* Suspend the Phy Clock */
+ pcgcctl = dwc2_readl(hsotg, PCGCTL);
+ pcgcctl |= PCGCTL_STOPPCLK;
+ dwc2_writel(hsotg, pcgcctl, PCGCTL);
+ udelay(10);
- gpwrdn = dwc2_readl(hsotg, GPWRDN);
- gpwrdn |= GPWRDN_PMUACTV;
- dwc2_writel(hsotg, gpwrdn, GPWRDN);
- udelay(10);
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn |= GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ } else {
+ /* UTMI+ Interface */
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn |= GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+
+ pcgcctl = dwc2_readl(hsotg, PCGCTL);
+ pcgcctl |= PCGCTL_STOPPCLK;
+ dwc2_writel(hsotg, pcgcctl, PCGCTL);
+ udelay(10);
+ }
/* Set flag to indicate that we are in hibernation */
hsotg->hibernated = 1;
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 2bc775a747f2..8002c23a5a02 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2422,6 +2422,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2440,7 +2441,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2475,7 +2478,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index 7334de85ad10..ca7e1c02773a 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -680,12 +680,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
ret = reset_control_deassert(qcom->resets);
if (ret) {
dev_err(&pdev->dev, "failed to deassert resets, err=%d\n", ret);
- goto reset_assert;
+ return ret;
}
ret = clk_bulk_prepare_enable(qcom->num_clocks, qcom->clks);
if (ret < 0)
- goto reset_assert;
+ return ret;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
@@ -755,8 +755,6 @@ remove_core:
dwc3_core_remove(&qcom->dwc);
clk_disable:
clk_bulk_disable_unprepare(qcom->num_clocks, qcom->clks);
-reset_assert:
- reset_control_assert(qcom->resets);
return ret;
}
@@ -771,7 +769,6 @@ static void dwc3_qcom_remove(struct platform_device *pdev)
clk_bulk_disable_unprepare(qcom->num_clocks, qcom->clks);
dwc3_qcom_interconnect_exit(qcom);
- reset_control_assert(qcom->resets);
}
static int dwc3_qcom_pm_suspend(struct device *dev)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 321361288935..74968f93d4a3 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3516,7 +3516,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
* We're going to do that here to avoid problems of HW trying
* to use bogus TRBs for transfers.
*/
- if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+ if (trb->ctrl & DWC3_TRB_CTRL_HWO)
trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
/*
@@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return -EAGAIN;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index fba2a56dae97..f94ea196ce54 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1065,6 +1065,8 @@ static ssize_t webusb_landingPage_store(struct config_item *item, const char *pa
unsigned int bytes_to_strip = 0;
int l = len;
+ if (!len)
+ return len;
if (page[l - 1] == '\n') {
--l;
++bytes_to_strip;
@@ -1188,6 +1190,8 @@ static ssize_t os_desc_qw_sign_store(struct config_item *item, const char *page,
struct gadget_info *gi = os_desc_item_to_gadget_info(item);
int res, l;
+ if (!len)
+ return len;
l = min_t(int, len, OS_STRING_QW_SIGN_LEN >> 1);
if (page[l - 1] == '\n')
--l;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 2dea9e42a0f8..ea5f0af1e8d2 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2369,8 +2369,7 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count)
for (; count; --count, ++epfile) {
BUG_ON(mutex_is_locked(&epfile->mutex));
if (epfile->dentry) {
- d_delete(epfile->dentry);
- dput(epfile->dentry);
+ simple_recursive_removal(epfile->dentry, NULL);
epfile->dentry = NULL;
}
}
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index ab544f6824be..540dc5ab96fc 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -295,8 +295,8 @@ __acquires(&port->port_lock)
break;
}
- if (do_tty_wake && port->port.tty)
- tty_wakeup(port->port.tty);
+ if (do_tty_wake)
+ tty_port_tty_wakeup(&port->port);
return status;
}
@@ -544,20 +544,16 @@ static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head,
static int gs_start_io(struct gs_port *port)
{
struct list_head *head = &port->read_pool;
- struct usb_ep *ep;
+ struct usb_ep *ep = port->port_usb->out;
int status;
unsigned started;
- if (!port->port_usb || !port->port.tty)
- return -EIO;
-
/* Allocate RX and TX I/O buffers. We can't easily do this much
* earlier (with GFP_KERNEL) because the requests are coupled to
* endpoints, as are the packet sizes we'll be using. Different
* configurations may use different endpoints with a given port;
* and high speed vs full speed changes packet sizes too.
*/
- ep = port->port_usb->out;
status = gs_alloc_requests(ep, head, gs_read_complete,
&port->read_allocated);
if (status)
@@ -578,7 +574,7 @@ static int gs_start_io(struct gs_port *port)
gs_start_tx(port);
/* Unblock any pending writes into our circular buffer, in case
* we didn't in gs_start_tx() */
- tty_wakeup(port->port.tty);
+ tty_port_tty_wakeup(&port->port);
} else {
/* Free reqs only if we are still connected */
if (port->port_usb) {
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index fcce84a726f2..b51e132b0cd2 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1561,7 +1561,6 @@ static void destroy_ep_files (struct dev_data *dev)
spin_lock_irq (&dev->lock);
while (!list_empty(&dev->epfiles)) {
struct ep_data *ep;
- struct inode *parent;
struct dentry *dentry;
/* break link to FS */
@@ -1571,7 +1570,6 @@ static void destroy_ep_files (struct dev_data *dev)
dentry = ep->dentry;
ep->dentry = NULL;
- parent = d_inode(dentry->d_parent);
/* break link to controller */
mutex_lock(&ep->lock);
@@ -1586,10 +1584,7 @@ static void destroy_ep_files (struct dev_data *dev)
put_ep (ep);
/* break link to dcache */
- inode_lock(parent);
- d_delete (dentry);
- dput (dentry);
- inode_unlock(parent);
+ simple_recursive_removal(dentry, NULL);
spin_lock_irq (&dev->lock);
}
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index 0d4ce5734165..06a2edb9e86e 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -652,6 +652,10 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc)
case DS_DISABLED:
return;
case DS_CONFIGURED:
+ spin_lock(&dbc->lock);
+ xhci_dbc_flush_requests(dbc);
+ spin_unlock(&dbc->lock);
+
if (dbc->driver->disconnect)
dbc->driver->disconnect(dbc);
break;
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index 60ed753c85bb..d894081d8d15 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -617,6 +617,7 @@ int dbc_tty_init(void)
dbc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
dbc_tty_driver->subtype = SERIAL_TYPE_NORMAL;
dbc_tty_driver->init_termios = tty_std_termios;
+ dbc_tty_driver->init_termios.c_lflag &= ~ECHO;
dbc_tty_driver->init_termios.c_cflag =
B9600 | CS8 | CREAD | HUPCL | CLOCAL;
dbc_tty_driver->init_termios.c_ispeed = 9600;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index bd745a0f2f78..6680afa4f596 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
/* Periodic endpoint bInterval limit quirk */
if (usb_endpoint_xfer_int(&ep->desc) ||
usb_endpoint_xfer_isoc(&ep->desc)) {
+ if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) &&
+ interval >= 9) {
+ interval = 8;
+ }
if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) &&
udev->speed >= USB_SPEED_HIGH &&
interval >= 7) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 0c481cbc8f08..00fac8b233d2 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -71,12 +71,22 @@
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec
#define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed
+#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee
+#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4
+#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5
+#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0
+#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1
+#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5
#define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
+#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316
+
#define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042
#define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
#define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242
@@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI ||
+ pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI))
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
+ if (pdev->vendor == PCI_VENDOR_ID_ATI &&
+ pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI)
+ xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9;
+
if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
xhci->quirks |= XHCI_AMD_0x96_HOST;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 6dab142e7278..c79d5ed48a08 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -328,7 +328,8 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
}
usb3_hcd = xhci_get_usb3_hcd(xhci);
- if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4)
+ if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 &&
+ !(xhci->quirks & XHCI_BROKEN_STREAMS))
usb3_hcd->can_do_streams = 1;
if (xhci->shared_hcd) {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index e038ad3375dc..94c9c9271658 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
* In the future we should distinguish between -ENODEV and -ETIMEDOUT
* and try to recover a -ETIMEDOUT with a host controller reset.
*/
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring,
- CMD_RING_RUNNING, 0, 5 * 1000 * 1000,
- XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->cmd_ring,
+ CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
if (ret < 0) {
xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret);
xhci_halt(xhci);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 4e6dbd2375c3..8a819e853288 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -85,29 +85,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us)
}
/*
- * xhci_handshake_check_state - same as xhci_handshake but takes an additional
- * exit_state parameter, and bails out with an error immediately when xhc_state
- * has exit_state flag set.
- */
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state)
-{
- u32 result;
- int ret;
-
- ret = readl_poll_timeout_atomic(ptr, result,
- (result & mask) == done ||
- result == U32_MAX ||
- xhci->xhc_state & exit_state,
- 1, usec);
-
- if (result == U32_MAX || xhci->xhc_state & exit_state)
- return -ENODEV;
-
- return ret;
-}
-
-/*
* Disable interrupts and begin the xHCI halting process.
*/
void xhci_quiesce(struct xhci_hcd *xhci)
@@ -227,8 +204,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
if (xhci->quirks & XHCI_INTEL_HOST)
udelay(1000);
- ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command,
- CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING);
+ ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us);
if (ret)
return ret;
@@ -1182,7 +1158,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume)
xhci_dbg(xhci, "Stop HCD\n");
xhci_halt(xhci);
xhci_zero_64b_regs(xhci);
- retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC);
+ if (xhci->xhc_state & XHCI_STATE_REMOVING)
+ retval = -ENODEV;
+ else
+ retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC);
spin_unlock_irq(&xhci->lock);
if (retval)
return retval;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 49887a303e43..a20f4e7cd43a 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1643,6 +1643,7 @@ struct xhci_hcd {
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
#define XHCI_ETRON_HOST BIT_ULL(49)
+#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50)
unsigned int num_active_eps;
unsigned int limit_active_eps;
@@ -1868,8 +1869,6 @@ void xhci_skip_sec_intr_events(struct xhci_hcd *xhci,
/* xHCI host controller glue */
typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us);
-int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr,
- u32 mask, u32 done, int usec, unsigned int exit_state);
void xhci_quiesce(struct xhci_hcd *xhci);
int xhci_halt(struct xhci_hcd *xhci);
int xhci_start(struct xhci_hcd *xhci);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6869c58367f2..caf4d4cd4b75 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1913,6 +1913,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
* gadget driver here and have everything work;
* that currently misbehaves.
*/
+ usb_gadget_set_state(g, USB_STATE_NOTATTACHED);
/* Force check of devctl register for PM runtime */
pm_runtime_mark_last_busy(musb->controller);
@@ -2019,6 +2020,7 @@ void musb_g_disconnect(struct musb *musb)
case OTG_STATE_B_PERIPHERAL:
case OTG_STATE_B_IDLE:
musb_set_state(musb, OTG_STATE_B_IDLE);
+ usb_gadget_set_state(&musb->g, USB_STATE_NOTATTACHED);
break;
case OTG_STATE_B_SRP_INIT:
break;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 6ac7a0a5cf07..abfcfca3f971 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -803,6 +803,8 @@ static const struct usb_device_id id_table_combined[] = {
.driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
{ USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID),
.driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
+ { USB_DEVICE(FTDI_NDI_VID, FTDI_NDI_EMGUIDE_GEMINI_PID),
+ .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
{ USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) },
{ USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) },
{ USB_DEVICE(FTDI_VID, RTSYSTEMS_USB_VX8_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 9acb6f837327..4cc1fae8acb9 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -204,6 +204,9 @@
#define FTDI_NDI_FUTURE_3_PID 0xDA73 /* NDI future device #3 */
#define FTDI_NDI_AURORA_SCU_PID 0xDA74 /* NDI Aurora SCU */
+#define FTDI_NDI_VID 0x23F2
+#define FTDI_NDI_EMGUIDE_GEMINI_PID 0x0003 /* NDI Emguide Gemini */
+
/*
* ChamSys Limited (www.chamsys.co.uk) USB wing/interface product IDs
*/
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 27879cc57536..147ca50c94be 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1415,6 +1415,9 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(5) },
{ USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10c7, 0xff, 0xff, 0x30), /* Telit FE910C04 (ECM) */
+ .driver_info = NCTRL(4) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10c7, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x30), /* Telit FN990B (MBIM) */
.driver_info = NCTRL(6) },
{ USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x40) },
@@ -2343,6 +2346,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe145, 0xff), /* Foxconn T99W651 RNDIS */
.driver_info = RSVD(5) | RSVD(6) },
+ { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe167, 0xff), /* Foxconn T99W640 MBIM */
+ .driver_info = RSVD(3) },
{ USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */
.driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
{ USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index b09b58d7311d..d8b906ec4d1c 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -394,8 +394,7 @@ static int dp_altmode_vdm(struct typec_altmode *alt,
case CMDT_RSP_NAK:
switch (cmd) {
case DP_CMD_STATUS_UPDATE:
- if (typec_altmode_exit(alt))
- dev_err(&dp->alt->dev, "Exit Mode Failed!\n");
+ dp->state = DP_STATE_EXIT;
break;
case DP_CMD_CONFIGURE:
dp->data.conf = 0;
@@ -677,7 +676,7 @@ static ssize_t pin_assignment_show(struct device *dev,
assignments = get_current_pin_assignments(dp);
- for (i = 0; assignments; assignments >>= 1, i++) {
+ for (i = 0; assignments && i < DP_PIN_ASSIGN_MAX; assignments >>= 1, i++) {
if (assignments & 1) {
if (i == cur)
len += sprintf(buf + len, "[%s] ",
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 1a1f9e1f8e4e..1f6fdfaa34bf 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4410,17 +4410,6 @@ static int tcpm_src_attach(struct tcpm_port *port)
tcpm_enable_auto_vbus_discharge(port, true);
- ret = tcpm_set_roles(port, true, TYPEC_STATE_USB,
- TYPEC_SOURCE, tcpm_data_role_for_source(port));
- if (ret < 0)
- return ret;
-
- if (port->pd_supported) {
- ret = port->tcpc->set_pd_rx(port->tcpc, true);
- if (ret < 0)
- goto out_disable_mux;
- }
-
/*
* USB Type-C specification, version 1.2,
* chapter 4.5.2.2.8.1 (Attached.SRC Requirements)
@@ -4430,13 +4419,24 @@ static int tcpm_src_attach(struct tcpm_port *port)
(polarity == TYPEC_POLARITY_CC2 && port->cc1 == TYPEC_CC_RA)) {
ret = tcpm_set_vconn(port, true);
if (ret < 0)
- goto out_disable_pd;
+ return ret;
}
ret = tcpm_set_vbus(port, true);
if (ret < 0)
goto out_disable_vconn;
+ ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, TYPEC_SOURCE,
+ tcpm_data_role_for_source(port));
+ if (ret < 0)
+ goto out_disable_vbus;
+
+ if (port->pd_supported) {
+ ret = port->tcpc->set_pd_rx(port->tcpc, true);
+ if (ret < 0)
+ goto out_disable_mux;
+ }
+
port->pd_capable = false;
port->partner = NULL;
@@ -4447,14 +4447,14 @@ static int tcpm_src_attach(struct tcpm_port *port)
return 0;
-out_disable_vconn:
- tcpm_set_vconn(port, false);
-out_disable_pd:
- if (port->pd_supported)
- port->tcpc->set_pd_rx(port->tcpc, false);
out_disable_mux:
tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE,
TYPEC_ORIENTATION_NONE);
+out_disable_vbus:
+ tcpm_set_vbus(port, false);
+out_disable_vconn:
+ tcpm_set_vconn(port, false);
+
return ret;
}
diff --git a/drivers/virt/coco/efi_secret/efi_secret.c b/drivers/virt/coco/efi_secret/efi_secret.c
index 1864f9f80617..5946c5abeae8 100644
--- a/drivers/virt/coco/efi_secret/efi_secret.c
+++ b/drivers/virt/coco/efi_secret/efi_secret.c
@@ -31,8 +31,6 @@
struct efi_secret {
struct dentry *secrets_dir;
- struct dentry *fs_dir;
- struct dentry *fs_files[EFI_SECRET_NUM_FILES];
void __iomem *secret_data;
u64 secret_data_len;
};
@@ -119,10 +117,8 @@ static void wipe_memory(void *addr, size_t size)
static int efi_secret_unlink(struct inode *dir, struct dentry *dentry)
{
- struct efi_secret *s = efi_secret_get();
struct inode *inode = d_inode(dentry);
struct secret_entry *e = (struct secret_entry *)inode->i_private;
- int i;
if (e) {
/* Zero out the secret data */
@@ -132,19 +128,7 @@ static int efi_secret_unlink(struct inode *dir, struct dentry *dentry)
inode->i_private = NULL;
- for (i = 0; i < EFI_SECRET_NUM_FILES; i++)
- if (s->fs_files[i] == dentry)
- s->fs_files[i] = NULL;
-
- /*
- * securityfs_remove tries to lock the directory's inode, but we reach
- * the unlink callback when it's already locked
- */
- inode_unlock(dir);
- securityfs_remove(dentry);
- inode_lock(dir);
-
- return 0;
+ return simple_unlink(inode, dentry);
}
static const struct inode_operations efi_secret_dir_inode_operations = {
@@ -194,15 +178,6 @@ unmap:
static void efi_secret_securityfs_teardown(struct platform_device *dev)
{
struct efi_secret *s = efi_secret_get();
- int i;
-
- for (i = (EFI_SECRET_NUM_FILES - 1); i >= 0; i--) {
- securityfs_remove(s->fs_files[i]);
- s->fs_files[i] = NULL;
- }
-
- securityfs_remove(s->fs_dir);
- s->fs_dir = NULL;
securityfs_remove(s->secrets_dir);
s->secrets_dir = NULL;
@@ -217,7 +192,7 @@ static int efi_secret_securityfs_setup(struct platform_device *dev)
unsigned char *ptr;
struct secret_header *h;
struct secret_entry *e;
- struct dentry *dent;
+ struct dentry *dent, *dir;
char guid_str[EFI_VARIABLE_GUID_LEN + 1];
ptr = (void __force *)s->secret_data;
@@ -240,8 +215,6 @@ static int efi_secret_securityfs_setup(struct platform_device *dev)
}
s->secrets_dir = NULL;
- s->fs_dir = NULL;
- memset(s->fs_files, 0, sizeof(s->fs_files));
dent = securityfs_create_dir("secrets", NULL);
if (IS_ERR(dent)) {
@@ -251,14 +224,13 @@ static int efi_secret_securityfs_setup(struct platform_device *dev)
}
s->secrets_dir = dent;
- dent = securityfs_create_dir("coco", s->secrets_dir);
- if (IS_ERR(dent)) {
+ dir = securityfs_create_dir("coco", s->secrets_dir);
+ if (IS_ERR(dir)) {
dev_err(&dev->dev, "Error creating coco securityfs directory entry err=%ld\n",
- PTR_ERR(dent));
- return PTR_ERR(dent);
+ PTR_ERR(dir));
+ return PTR_ERR(dir);
}
- d_inode(dent)->i_op = &efi_secret_dir_inode_operations;
- s->fs_dir = dent;
+ d_inode(dir)->i_op = &efi_secret_dir_inode_operations;
bytes_left = h->len - sizeof(*h);
ptr += sizeof(*h);
@@ -274,15 +246,14 @@ static int efi_secret_securityfs_setup(struct platform_device *dev)
if (efi_guidcmp(e->guid, NULL_GUID)) {
efi_guid_to_str(&e->guid, guid_str);
- dent = securityfs_create_file(guid_str, 0440, s->fs_dir, (void *)e,
+ dent = securityfs_create_file(guid_str, 0440, dir, (void *)e,
&efi_secret_bin_file_fops);
if (IS_ERR(dent)) {
dev_err(&dev->dev, "Error creating efi_secret securityfs entry\n");
ret = PTR_ERR(dent);
goto err_cleanup;
}
-
- s->fs_files[i++] = dent;
+ i++;
}
ptr += e->len;
bytes_left -= e->len;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b784aab66867..4397392bfef0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle_done)(struct virtqueue *vq))
{
struct vring_virtqueue *vq = to_vvq(_vq);
- int err;
+ int err, err_reset;
if (num > vq->vq.num_max)
return -E2BIG;
@@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
else
err = virtqueue_resize_split(_vq, num);
- return virtqueue_enable_after_reset(_vq);
+ err_reset = virtqueue_enable_after_reset(_vq);
+ if (err_reset)
+ return err_reset;
+
+ return err;
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 5061f192eafd..04795508a795 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -127,7 +127,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
};
const struct dentry_operations v9fs_dentry_operations = {
- .d_delete = always_delete_dentry,
.d_release = v9fs_dentry_release,
.d_unalias_trylock = v9fs_dentry_unalias_trylock,
.d_unalias_unlock = v9fs_dentry_unalias_unlock,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 489db161abc9..795c6388744c 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -134,10 +134,12 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
if (retval)
goto release_sb;
- if (v9ses->cache & (CACHE_META|CACHE_LOOSE))
- sb->s_d_op = &v9fs_cached_dentry_operations;
- else
- sb->s_d_op = &v9fs_dentry_operations;
+ if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
+ set_default_d_op(sb, &v9fs_cached_dentry_operations);
+ } else {
+ set_default_d_op(sb, &v9fs_dentry_operations);
+ sb->s_d_flags |= DCACHE_DONTCACHE;
+ }
inode = v9fs_get_new_inode_from_fid(v9ses, fid, sb);
if (IS_ERR(inode)) {
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 21527189e430..6830f8bc8d4e 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -53,13 +53,14 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to)
truncate_pagecache(inode, inode->i_size);
}
-static int adfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int adfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
+ ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
adfs_get_block,
&ADFS_I(mapping->host)->mmu_private);
if (unlikely(ret))
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 017c48a80203..fdccdbbfc213 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -397,7 +397,7 @@ static int adfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (asb->s_ftsuffix)
asb->s_namelen += 4;
- sb->s_d_op = &adfs_dentry_operations;
+ set_default_d_op(sb, &adfs_dentry_operations);
root = adfs_iget(sb, &root_obj);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 7a71018e3f67..219ea0353906 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -415,13 +415,14 @@ affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
-static int affs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int affs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
+ ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
affs_get_block,
&AFFS_I(mapping->host)->mmu_private);
if (unlikely(ret))
@@ -430,14 +431,15 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int affs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
+static int affs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned int len, unsigned int copied,
struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
/* Clear Archived bit on file writes, as AmigaOS would do */
if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
@@ -645,7 +647,8 @@ static int affs_read_folio_ofs(struct file *file, struct folio *folio)
return err;
}
-static int affs_write_begin_ofs(struct file *file, struct address_space *mapping,
+static int affs_write_begin_ofs(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -684,9 +687,10 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
return err;
}
-static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int affs_write_end_ofs(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 2fa40337776d..44f8aa883100 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -500,9 +500,9 @@ got_root:
return PTR_ERR(root_inode);
if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL))
- sb->s_d_op = &affs_intl_dentry_operations;
+ set_default_d_op(sb, &affs_intl_dentry_operations);
else
- sb->s_d_op = &affs_dentry_operations;
+ set_default_d_op(sb, &affs_dentry_operations);
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c
index c0384201b8fe..133736412c3d 100644
--- a/fs/afs/addr_prefs.c
+++ b/fs/afs/addr_prefs.c
@@ -48,7 +48,7 @@ static int afs_split_string(char **pbuf, char *strv[], unsigned int maxstrv)
strv[count++] = p;
/* Skip over word */
- while (!isspace(*p))
+ while (!isspace(*p) && *p)
p++;
if (!*p)
break;
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 0168bbf53fe0..f31359922e98 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -177,6 +177,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
VL_SERVICE, AFS_VL_PORT);
if (IS_ERR(vllist)) {
ret = PTR_ERR(vllist);
+ vllist = NULL;
goto parse_failed;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 25b306db6992..da407f2d6f0d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -483,9 +483,9 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
goto error;
if (as->dyn_root) {
- sb->s_d_op = &afs_dynroot_dentry_operations;
+ set_default_d_op(sb, &afs_dynroot_dentry_operations);
} else {
- sb->s_d_op = &afs_fs_dentry_operations;
+ set_default_d_op(sb, &afs_fs_dentry_operations);
rcu_assign_pointer(as->volume->sb, sb);
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e51e7d88980a..1d847a939f29 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
-static struct inode *anon_inode_make_secure_inode(
- const char *name,
- const struct inode *context_inode)
+/**
+ * anon_inode_make_secure_inode - allocate an anonymous inode with security context
+ * @sb: [in] Superblock to allocate from
+ * @name: [in] Name of the class of the newfile (e.g., "secretmem")
+ * @context_inode:
+ * [in] Optional parent inode for security inheritance
+ *
+ * The function ensures proper security initialization through the LSM hook
+ * security_inode_init_security_anon().
+ *
+ * Return: Pointer to new inode on success, ERR_PTR on failure.
+ */
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode)
{
struct inode *inode;
int error;
- inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ inode = alloc_anon_inode(sb);
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode(
}
return inode;
}
+EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
@@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
if (make_inode) {
- inode = anon_inode_make_secure_inode(name, context_inode);
+ inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb,
+ name, context_inode);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err;
diff --git a/fs/attr.c b/fs/attr.c
index 9caf63d20d03..5425c1dbbff9 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL(setattr_prepare);
* @inode: the inode to be truncated
* @offset: the new size to assign to the inode
*
- * inode_newsize_ok must be called with i_mutex held.
+ * inode_newsize_ok must be called with i_rwsem held exclusively.
*
* inode_newsize_ok will check filesystem limits and ulimits to check that the
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
@@ -318,7 +318,7 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
* @inode: the inode to be updated
* @attr: the new attributes
*
- * setattr_copy must be called with i_mutex held.
+ * setattr_copy must be called with i_rwsem held exclusively.
*
* setattr_copy updates the inode's metadata with that specified
* in attr on idmapped mounts. Necessary permission checks to determine
@@ -403,13 +403,13 @@ EXPORT_SYMBOL(may_setattr);
* @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
*
- * The caller must hold the i_mutex on the affected object.
+ * The caller must hold the i_rwsem exclusively on the affected object.
*
* If notify_change discovers a delegation in need of breaking,
* it will return -EWOULDBLOCK and return a reference to the inode in
* delegated_inode. The caller should then break the delegation and
* retry. Because breaking a delegation may take a long time, the
- * caller should drop the i_mutex before doing so.
+ * caller should drop the i_rwsem before doing so.
*
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
@@ -456,7 +456,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- /* Flag setting protected by i_mutex */
+ /* Flag setting protected by i_rwsem */
if (is_sxid(attr->ia_mode))
inode->i_flags &= ~S_NOSEC;
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index ee2edccaef70..f5c16ffba013 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -311,7 +311,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs_sops;
- s->s_d_op = &autofs_dentry_operations;
+ set_default_d_op(s, &autofs_dentry_operations);
s->s_time_gran = 1;
/*
diff --git a/fs/backing-file.c b/fs/backing-file.c
index 763fbe9b72b2..8c7396bff121 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -41,7 +41,7 @@ struct file *backing_file_open(const struct path *user_path, int flags,
return f;
path_get(user_path);
- *backing_file_user_path(f) = *user_path;
+ backing_file_set_user_path(f, user_path);
error = vfs_open(real_path, f);
if (error) {
fput(f);
@@ -65,7 +65,7 @@ struct file *backing_tmpfile_open(const struct path *user_path, int flags,
return f;
path_get(user_path);
- *backing_file_user_path(f) = *user_path;
+ backing_file_set_user_path(f, user_path);
error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
if (error) {
fput(f);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index b228a5a64479..66de46318620 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1406,6 +1406,9 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
: BCH_DATA_free;
struct printbuf buf = PRINTBUF;
+ unsigned fsck_flags = (async_repair ? FSCK_ERR_NO_LOG : 0)|
+ FSCK_CAN_FIX|FSCK_CAN_IGNORE;
+
struct bpos bucket = iter->pos;
bucket.offset &= ~(~0ULL << 56);
u64 genbits = iter->pos.offset & (~0ULL << 56);
@@ -1419,9 +1422,10 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
return ret;
if (!bch2_dev_bucket_exists(c, bucket)) {
- if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket,
- "entry in %s btree for nonexistant dev:bucket %llu:%llu",
- bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset))
+ if (__fsck_err(trans, fsck_flags,
+ need_discard_freespace_key_to_invalid_dev_bucket,
+ "entry in %s btree for nonexistant dev:bucket %llu:%llu",
+ bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset))
goto delete;
ret = 1;
goto out;
@@ -1433,7 +1437,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
if (a->data_type != state ||
(state == BCH_DATA_free &&
genbits != alloc_freespace_genbits(*a))) {
- if (fsck_err(trans, need_discard_freespace_key_bad,
+ if (__fsck_err(trans, fsck_flags,
+ need_discard_freespace_key_bad,
"%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
bch2_btree_id_str(iter->btree_id),
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index b375ad610acd..b58525ec7b4d 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -511,7 +511,8 @@ again:
bch2_dev_usage_read_fast(ca, &req->usage);
avail = dev_buckets_free(ca, req->usage, req->watermark);
- if (req->usage.buckets[BCH_DATA_need_discard] > avail)
+ if (req->usage.buckets[BCH_DATA_need_discard] >
+ min(avail, ca->mi.nbuckets >> 7))
bch2_dev_do_discards(ca);
if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail)
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index e76809e71858..77d93beb3c8f 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -353,7 +353,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} else {
struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
- if (b == ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node)))
+ if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
return bkey_s_c_null;
if (IS_ERR_OR_NULL(b))
return ((struct bkey_s_c) { .k = ERR_CAST(b) });
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 3651a296d506..ddfacad0f70c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -296,7 +296,6 @@ do { \
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
void bch2_print_str(struct bch_fs *, const char *, const char *);
-void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);
__printf(2, 3)
void bch2_print_opts(struct bch_opts *, const char *, ...);
@@ -768,7 +767,8 @@ struct btree_trans_buf {
x(sysfs) \
x(btree_write_buffer) \
x(btree_node_scrub) \
- x(async_recovery_passes)
+ x(async_recovery_passes) \
+ x(ioctl_data)
enum bch_write_ref {
#define x(n) BCH_WRITE_REF_##n,
@@ -863,9 +863,7 @@ struct bch_fs {
DARRAY(enum bcachefs_metadata_version)
incompat_versions_requested;
-#ifdef CONFIG_UNICODE
struct unicode_map *cf_encoding;
-#endif
struct bch_sb_handle disk_sb;
@@ -1285,4 +1283,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca
: ca->mi.discard;
}
+static inline bool bch2_fs_casefold_enabled(struct bch_fs *c)
+{
+#ifdef CONFIG_UNICODE
+ return !c->opts.casefold_disabled;
+#else
+ return false;
+#endif
+}
+
#endif /* _BCACHEFS_H */
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 91e0aa796e6b..83c9860e6b82 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -85,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
six_unlock_intent(&b->c.lock);
}
-static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
+void __btree_node_data_free(struct btree *b)
{
BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b));
@@ -112,16 +112,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
b->aux_data = NULL;
-
- btree_node_to_freedlist(bc, b);
}
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
BUG_ON(list_empty(&b->list));
list_del_init(&b->list);
+
+ __btree_node_data_free(b);
+
--bc->nr_freeable;
- __btree_node_data_free(bc, b);
+ btree_node_to_freedlist(bc, b);
}
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
@@ -185,10 +186,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
{
- struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
-
- b = __btree_node_mem_alloc(c, GFP_KERNEL);
+ struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL);
if (!b)
return NULL;
@@ -198,8 +196,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
}
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
-
- __bch2_btree_node_to_freelist(bc, b);
return b;
}
@@ -524,7 +520,8 @@ restart:
--touched;;
} else if (!btree_node_reclaim(c, b)) {
__bch2_btree_node_hash_remove(bc, b);
- __btree_node_data_free(bc, b);
+ __btree_node_data_free(b);
+ btree_node_to_freedlist(bc, b);
freed++;
bc->nr_freed++;
@@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bch2_recalc_btree_reserve(c);
- for (i = 0; i < bc->nr_reserve; i++)
- if (!__bch2_btree_node_mem_alloc(c))
+ for (i = 0; i < bc->nr_reserve; i++) {
+ struct btree *b = __bch2_btree_node_mem_alloc(c);
+ if (!b)
goto err;
+ __bch2_btree_node_to_freelist(bc, b);
+ }
list_splice_init(&bc->live[0].list, &bc->freeable);
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index ca3c1b145330..be275f87a60e 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
+void __btree_node_data_free(struct btree *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 9ddcbe1bda78..bac108e93823 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -397,7 +397,11 @@ again:
continue;
}
- ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan);
+ ret = lockrestart_do(trans,
+ btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan));
+ if (ret < 0)
+ goto err;
+
if (ret == DID_FILL_FROM_SCAN) {
new_pass = true;
ret = 0;
@@ -438,7 +442,8 @@ again:
if (!ret && !IS_ERR_OR_NULL(prev)) {
BUG_ON(cur);
- ret = btree_repair_node_end(trans, b, prev, pulled_from_scan);
+ ret = lockrestart_do(trans,
+ btree_repair_node_end(trans, b, prev, pulled_from_scan));
if (ret == DID_FILL_FROM_SCAN) {
new_pass = true;
ret = 0;
@@ -498,8 +503,14 @@ again:
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ /*
+ * XXX: we're not passing the trans object here because we're not set up
+ * to handle a transaction restart - this code needs to be rewritten
+ * when we start doing online topology repair
+ */
+ bch2_trans_unlock_long(trans);
if (mustfix_fsck_err_on(!have_child,
- trans, btree_node_topology_interior_node_empty,
+ c, btree_node_topology_interior_node_empty,
"empty interior btree node at %s", buf.buf))
ret = DROP_THIS_NODE;
err:
@@ -519,49 +530,72 @@ fsck_err:
bch2_bkey_buf_exit(&prev_k, c);
bch2_bkey_buf_exit(&cur_k, c);
printbuf_exit(&buf);
+ bch_err_fn(c, ret);
return ret;
}
-int bch2_check_topology(struct bch_fs *c)
+static int bch2_check_root(struct btree_trans *trans, enum btree_id btree,
+ bool *reconstructed_root)
{
- struct btree_trans *trans = bch2_trans_get(c);
- struct bpos pulled_from_scan = POS_MIN;
+ struct bch_fs *c = trans->c;
+ struct btree_root *r = bch2_btree_id_root(c, btree);
struct printbuf buf = PRINTBUF;
int ret = 0;
- bch2_trans_srcu_unlock(trans);
+ bch2_btree_id_to_text(&buf, btree);
- for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
- struct btree_root *r = bch2_btree_id_root(c, i);
- bool reconstructed_root = false;
+ if (r->error) {
+ bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
- printbuf_reset(&buf);
- bch2_btree_id_to_text(&buf, i);
+ ret = bch2_btree_has_scanned_nodes(c, btree);
+ if (ret < 0)
+ goto err;
- if (r->error) {
-reconstruct_root:
- bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
+ if (!ret) {
+ __fsck_err(trans,
+ FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0),
+ btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", buf.buf);
r->alive = false;
r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 0);
+ } else {
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 1);
- if (!bch2_btree_has_scanned_nodes(c, i)) {
- __fsck_err(trans,
- FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0),
- btree_root_unreadable_and_scan_found_nothing,
- "no nodes found for btree %s, continue?", buf.buf);
- bch2_btree_root_alloc_fake_trans(trans, i, 0);
- } else {
- bch2_btree_root_alloc_fake_trans(trans, i, 1);
- bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
- if (ret)
- break;
- }
-
- reconstructed_root = true;
+ bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX);
+ if (ret)
+ goto err;
}
+ *reconstructed_root = true;
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_check_topology(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bpos pulled_from_scan = POS_MIN;
+ int ret = 0;
+
+ bch2_trans_srcu_unlock(trans);
+
+ for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ bool reconstructed_root = false;
+recover:
+ ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root));
+ if (ret)
+ break;
+
+ struct btree_root *r = bch2_btree_id_root(c, i);
struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
@@ -575,17 +609,21 @@ reconstruct_root:
r->b = NULL;
- if (!reconstructed_root)
- goto reconstruct_root;
+ if (!reconstructed_root) {
+ r->error = -EIO;
+ goto recover;
+ }
+ struct printbuf buf = PRINTBUF;
+ bch2_btree_id_to_text(&buf, i);
bch_err(c, "empty btree root %s", buf.buf);
+ printbuf_exit(&buf);
bch2_btree_root_alloc_fake_trans(trans, i, 0);
r->alive = false;
ret = 0;
}
}
-fsck_err:
- printbuf_exit(&buf);
+
bch2_trans_put(trans);
return ret;
}
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 57eff3012a7b..590cd29f3e86 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -557,7 +557,9 @@ static int __btree_err(int ret,
const char *fmt, ...)
{
if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
- return bch_err_throw(c, fsck_fix);
+ return ret == -BCH_ERR_btree_node_read_err_fixable
+ ? bch_err_throw(c, fsck_fix)
+ : ret;
bool have_retry = false;
int ret2;
@@ -566,9 +568,9 @@ static int __btree_err(int ret,
bch2_mark_btree_validate_failure(failed, ca->dev_idx);
struct extent_ptr_decoded pick;
- have_retry = !bch2_bkey_pick_read_device(c,
+ have_retry = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
- failed, &pick, -1);
+ failed, &pick, -1) == 1;
}
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
@@ -613,7 +615,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
@@ -642,7 +643,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
print:
@@ -723,12 +723,11 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
struct btree *b, struct bset *i,
- unsigned offset, unsigned sectors, int write,
+ unsigned offset, int write,
struct bch_io_failures *failed,
struct printbuf *err_msg)
{
unsigned version = le16_to_cpu(i->version);
- unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
int ret = 0;
@@ -741,16 +740,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BCH_VERSION_MAJOR(version),
BCH_VERSION_MINOR(version));
- if (btree_err_on(version < c->sb.version_min,
+ if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes &&
+ btree_err_on(version < c->sb.version_min,
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, NULL,
btree_node_bset_older_than_sb_min,
"bset version %u older than superblock version_min %u",
version, c->sb.version_min)) {
- mutex_lock(&c->sb_lock);
- c->disk_sb.sb->version_min = cpu_to_le16(version);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ if (bch2_version_compatible(version)) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->version_min = cpu_to_le16(version);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ } else {
+ /* We have no idea what's going on: */
+ i->version = cpu_to_le16(c->sb.version);
+ }
}
if (btree_err_on(BCH_VERSION_MAJOR(version) >
@@ -772,15 +777,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_node_unsupported_version,
"BSET_SEPARATE_WHITEOUTS no longer supported");
- if (!write &&
- btree_err_on(offset + sectors > (ptr_written ?: btree_sectors(c)),
- -BCH_ERR_btree_node_read_err_fixable,
- c, ca, b, i, NULL,
- bset_past_end_of_btree_node,
- "bset past end of btree node (offset %u len %u but written %zu)",
- offset, sectors, ptr_written ?: btree_sectors(c)))
- i->u64s = 0;
-
btree_err_on(offset && !i->u64s,
-BCH_ERR_btree_node_read_err_fixable,
c, ca, b, i, NULL,
@@ -1045,6 +1041,7 @@ got_good_key:
le16_add_cpu(&i->u64s, -next_good_key);
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_error(b);
}
fsck_err:
printbuf_exit(&buf);
@@ -1144,6 +1141,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
"unknown checksum type %llu", BSET_CSUM_TYPE(i));
if (first) {
+ sectors = vstruct_sectors(b->data, c->block_bits);
+ if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i, NULL,
+ bset_past_end_of_btree_node,
+ "bset past end of btree node (offset %u len %u but written %zu)",
+ b->written, sectors, ptr_written ?: btree_sectors(c)))
+ i->u64s = 0;
if (good_csum_type) {
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
bool csum_bad = bch2_crc_cmp(b->data->csum, csum);
@@ -1171,9 +1176,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
c, NULL, b, NULL, NULL,
btree_node_unsupported_version,
"btree node does not have NEW_EXTENT_OVERWRITE set");
-
- sectors = vstruct_sectors(b->data, c->block_bits);
} else {
+ sectors = vstruct_sectors(bne, c->block_bits);
+ if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i, NULL,
+ bset_past_end_of_btree_node,
+ "bset past end of btree node (offset %u len %u but written %zu)",
+ b->written, sectors, ptr_written ?: btree_sectors(c)))
+ i->u64s = 0;
if (good_csum_type) {
struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
bool csum_bad = bch2_crc_cmp(bne->csum, csum);
@@ -1194,14 +1205,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
"decrypting btree node: %s", bch2_err_str(ret)))
goto fsck_err;
}
-
- sectors = vstruct_sectors(bne, c->block_bits);
}
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
- ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg);
+ ret = validate_bset(c, ca, b, i, b->written, READ, failed, err_msg);
if (ret)
goto fsck_err;
@@ -1286,9 +1295,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
- if (updated_range)
- bch2_btree_node_drop_keys_outside_node(b);
-
i = &b->data->keys;
for (k = i->start; k != vstruct_last(i);) {
struct bkey tmp;
@@ -1305,6 +1311,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
(u64 *) vstruct_end(i) - (u64 *) k);
set_btree_bset_end(b, b->set);
set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_error(b);
continue;
}
if (ret)
@@ -1325,16 +1332,50 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_node_reset_sib_u64s(b);
- scoped_guard(rcu)
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
- struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
+ if (updated_range)
+ bch2_btree_node_drop_keys_outside_node(b);
- if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
- set_btree_node_need_rewrite(b);
- }
+ /*
+ * XXX:
+ *
+ * We deadlock if too many btree updates require node rewrites while
+ * we're still in journal replay.
+ *
+ * This is because btree node rewrites generate more updates for the
+ * interior updates (alloc, backpointers), and if those updates touch
+ * new nodes and generate more rewrites - well, you see the problem.
+ *
+ * The biggest cause is that we don't use the btree write buffer (for
+ * the backpointer updates - this needs some real thought on locking in
+ * order to fix.
+ *
+ * The problem with this workaround (not doing the rewrite for degraded
+ * nodes in journal replay) is that those degraded nodes persist, and we
+ * don't want that (this is a real bug when a btree node write completes
+ * with fewer replicas than we wanted and leaves a degraded node due to
+ * device _removal_, i.e. the device went away mid write).
+ *
+ * It's less of a bug here, but still a problem because we don't yet
+ * have a way of tracking degraded data - we another index (all
+ * extents/btree nodes, by replicas entry) in order to fix properly
+ * (re-replicate degraded data at the earliest possible time).
+ */
+ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) {
+ scoped_guard(rcu)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+ struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
+
+ if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
+ set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_degraded(b);
+ }
+ }
+ }
- if (!ptr_written)
+ if (!ptr_written) {
set_btree_node_need_rewrite(b);
+ set_btree_node_need_rewrite_ptr_written_zero(b);
+ }
fsck_err:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);
@@ -1365,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work)
ret = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
&failed, &rb->pick, -1);
- if (ret) {
+ if (ret <= 0) {
set_btree_node_read_error(b);
break;
}
@@ -1970,28 +2011,12 @@ static void btree_node_scrub_work(struct work_struct *work)
prt_newline(&err);
if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
- struct btree_trans *trans = bch2_trans_get(c);
-
- struct btree_iter iter;
- bch2_trans_node_iter_init(trans, &iter, scrub->btree,
- scrub->key.k->k.p, 0, scrub->level - 1, 0);
-
- struct btree *b;
- int ret = lockrestart_do(trans,
- PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter)));
- if (ret)
- goto err;
-
- if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
- bch_err(c, "error validating btree node during scrub on %s at btree %s",
- scrub->ca->name, err.buf);
-
- ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0);
- }
-err:
- bch2_trans_iter_exit(trans, &iter);
- bch2_trans_begin(trans);
- bch2_trans_put(trans);
+ int ret = bch2_trans_do(c,
+ bch2_btree_node_rewrite_key(trans, scrub->btree, scrub->level - 1,
+ scrub->key.k, 0));
+ if (!bch2_err_matches(ret, ENOENT) &&
+ !bch2_err_matches(ret, EROFS))
+ bch_err_fn_ratelimited(c, ret);
}
printbuf_exit(&err);
@@ -2255,7 +2280,7 @@ static void btree_node_write_endio(struct bio *bio)
}
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
- struct bset *i, unsigned sectors)
+ struct bset *i)
{
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
(struct bkey_validate_context) {
@@ -2270,7 +2295,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
}
ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?:
- validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL);
+ validate_bset(c, NULL, b, i, b->written, WRITE, NULL, NULL);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
@@ -2463,7 +2488,7 @@ do_write:
/* if we're going to be encrypting, check metadata validity first: */
if (validate_before_checksum &&
- validate_bset_for_write(c, b, i, sectors_to_write))
+ validate_bset_for_write(c, b, i))
goto err;
ret = bset_encrypt(c, i, b->written << 9);
@@ -2480,7 +2505,7 @@ do_write:
/* if we're not encrypting, check metadata after checksumming: */
if (!validate_before_checksum &&
- validate_bset_for_write(c, b, i, sectors_to_write))
+ validate_bset_for_write(c, b, i))
goto err;
/*
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index b78403376c07..f8829b667ad3 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2076,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter
static noinline
void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter,
- struct bkey_s_c *k)
+ struct bpos search_key, struct bkey_s_c *k)
{
struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key;
trans_for_each_update(trans, i)
if (!i->key_cache_already_flushed &&
i->btree_id == iter->btree_id &&
- bpos_le(i->k->k.p, iter->pos) &&
+ bpos_le(i->k->k.p, search_key) &&
bpos_ge(i->k->k.p, k->k ? k->k->p : end)) {
iter->k = i->k->k;
*k = bkey_i_to_s_c(i->k);
@@ -2092,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_
static noinline
void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
@@ -2100,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter
trans_for_each_update(trans, i)
if (!i->key_cache_already_flushed &&
i->btree_id == iter->btree_id &&
- bpos_ge(i->k->k.p, path->pos) &&
+ bpos_ge(i->k->k.p, search_key) &&
bpos_le(i->k->k.p, k->k ? k->k->p : end)) {
iter->k = i->k->k;
*k = bkey_i_to_s_c(i->k);
@@ -2122,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_
static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_pos,
struct bpos end_pos)
{
struct btree_path *path = btree_iter_path(trans, iter);
return bch2_journal_keys_peek_max(trans->c, iter->btree_id,
path->level,
- path->pos,
+ search_pos,
end_pos,
&iter->journal_idx);
}
@@ -2138,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
struct btree_iter *iter)
{
struct btree_path *path = btree_iter_path(trans, iter);
- struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos);
+ struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos);
if (k) {
iter->k = k->k;
@@ -2151,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
static noinline
void btree_trans_peek_journal(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
- bch2_btree_journal_peek(trans, iter,
+ bch2_btree_journal_peek(trans, iter, search_key,
k->k ? k->k->p : path_l(path)->b->key.k.p);
if (next_journal) {
iter->k = next_journal->k;
@@ -2165,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans,
static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bpos end_pos)
{
struct btree_path *path = btree_iter_path(trans, iter);
return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id,
path->level,
- path->pos,
+ search_key,
end_pos,
&iter->journal_idx);
}
@@ -2179,12 +2183,13 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans,
static noinline
void btree_trans_peek_prev_journal(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bpos search_key,
struct bkey_s_c *k)
{
struct btree_path *path = btree_iter_path(trans, iter);
struct bkey_i *next_journal =
- bch2_btree_journal_peek_prev(trans, iter,
- k->k ? k->k->p : path_l(path)->b->key.k.p);
+ bch2_btree_journal_peek_prev(trans, iter, search_key,
+ k->k ? k->k->p : path_l(path)->b->data->min_key);
if (next_journal) {
iter->k = next_journal->k;
@@ -2292,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
- btree_trans_peek_journal(trans, iter, &k);
+ btree_trans_peek_journal(trans, iter, search_key, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
- bch2_btree_trans_peek_updates(trans, iter, &k);
+ bch2_btree_trans_peek_updates(trans, iter, search_key, &k);
if (k.k && bkey_deleted(k.k)) {
/*
@@ -2326,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct
}
bch2_btree_iter_verify(trans, iter);
+
+ if (trace___btree_iter_peek_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace___btree_iter_peek(trans->c, buf.buf);
+ }
+
return k;
}
@@ -2484,6 +2503,19 @@ out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
+ if (trace_btree_iter_peek_max_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_max(trans->c, buf.buf);
+ }
+
return k;
end:
bch2_btree_iter_set_pos(trans, iter, end);
@@ -2557,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st
}
if (unlikely(iter->flags & BTREE_ITER_with_journal))
- btree_trans_peek_prev_journal(trans, iter, &k);
+ btree_trans_peek_prev_journal(trans, iter, search_key, &k);
if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
trans->nr_updates))
- bch2_btree_trans_peek_prev_updates(trans, iter, &k);
+ bch2_btree_trans_peek_prev_updates(trans, iter, search_key, &k);
if (likely(k.k && !bkey_deleted(k.k))) {
break;
@@ -2724,6 +2756,19 @@ out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(trans, iter);
+
+ if (trace_btree_iter_peek_prev_min_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_prev_min(trans->c, buf.buf);
+ }
return k;
end:
bch2_btree_iter_set_pos(trans, iter, end);
@@ -2767,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
- if (iter->pos.inode == KEY_INODE_MAX)
- return bkey_s_c_null;
+ if (iter->pos.inode == KEY_INODE_MAX) {
+ k = bkey_s_c_null;
+ goto out2;
+ }
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos));
}
@@ -2785,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre
}
struct btree_path *path = btree_iter_path(trans, iter);
- if (unlikely(!btree_path_node(path, path->level)))
- return bkey_s_c_null;
+ if (unlikely(!btree_path_node(path, path->level))) {
+ k = bkey_s_c_null;
+ goto out2;
+ }
btree_path_set_should_be_locked(trans, path);
@@ -2879,7 +2928,20 @@ out:
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_iter_verify_ret(trans, iter, k);
if (unlikely(ret))
- return bkey_s_c_err(ret);
+ k = bkey_s_c_err(ret);
+out2:
+ if (trace_btree_iter_peek_slot_enabled()) {
+ CLASS(printbuf, buf)();
+
+ int ret = bkey_err(k);
+ if (ret)
+ prt_str(&buf, bch2_err_str(ret));
+ else if (k.k)
+ bch2_bkey_val_to_text(&buf, trans->c, k);
+ else
+ prt_str(&buf, "(null)");
+ trace_btree_iter_peek_slot(trans->c, buf.buf);
+ }
return k;
}
@@ -3132,6 +3194,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n",
+ BTREE_TRANS_MEM_MAX);
+
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
@@ -3159,46 +3225,32 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
mutex_unlock(&s->lock);
}
- if (trans->used_mempool) {
- if (trans->mem_bytes >= new_bytes)
- goto out_change_top;
-
- /* No more space from mempool item, need malloc new one */
- new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
- if (unlikely(!new_mem)) {
- bch2_trans_unlock(trans);
-
- new_mem = kmalloc(new_bytes, GFP_KERNEL);
- if (!new_mem)
- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) {
+ EBUG_ON(trans->mem_bytes >= new_bytes);
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ }
- ret = bch2_trans_relock(trans);
- if (ret) {
- kfree(new_mem);
- return ERR_PTR(ret);
- }
- }
- memcpy(new_mem, trans->mem, trans->mem_top);
- trans->used_mempool = false;
- mempool_free(trans->mem, &c->btree_trans_mem_pool);
- goto out_new_mem;
+ if (old_bytes) {
+ trans->realloc_bytes_required = new_bytes;
+ trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
- new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ EBUG_ON(trans->mem);
+
+ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
if (unlikely(!new_mem)) {
bch2_trans_unlock(trans);
- new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+ new_mem = kmalloc(new_bytes, GFP_KERNEL);
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
- memcpy(new_mem, trans->mem, trans->mem_top);
trans->used_mempool = true;
- kfree(trans->mem);
}
- if (!new_mem)
- return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+ EBUG_ON(!new_mem);
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
@@ -3207,18 +3259,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long
if (ret)
return ERR_PTR(ret);
}
-out_new_mem:
+
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
- if (old_bytes) {
- trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart_ip(trans,
- BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
- }
-out_change_top:
- bch2_trans_kmalloc_trace(trans, size, ip);
-
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@@ -3279,6 +3323,27 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->restart_count++;
trans->mem_top = 0;
+ if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) {
+ EBUG_ON(!trans->mem || !trans->mem_bytes);
+ unsigned new_bytes = trans->realloc_bytes_required;
+ void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+
+ EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX);
+
+ if (!new_mem) {
+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ trans->used_mempool = true;
+ kfree(trans->mem);
+ }
+ }
+ trans->mem = new_mem;
+ trans->mem_bytes = new_bytes;
+ }
+
trans_for_each_path(trans, path, i) {
path->should_be_locked = false;
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index cf7398751644..ea839560a136 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -137,12 +137,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b
struct journal_key *k;
BUG_ON(*idx > keys->nr);
+
+ if (!keys->nr)
+ return NULL;
search:
if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
- while (*idx &&
- __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
+ while (*idx < keys->nr &&
+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
(*idx)++;
iters++;
if (iters == 10) {
@@ -151,18 +154,23 @@ search:
}
}
+ if (*idx == keys->nr)
+ --(*idx);
+
struct bkey_i *ret = NULL;
rcu_read_lock(); /* for overwritten_ranges */
- while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
+ while (true) {
+ k = idx_to_key(keys, *idx);
if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
break;
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->start - 1;
- else
- *idx -= 1;
+ *idx = rcu_dereference(k->overwritten_range)->start;
+ if (!*idx)
+ break;
+ --(*idx);
continue;
}
@@ -171,6 +179,8 @@ search:
break;
}
+ if (!*idx)
+ break;
--(*idx);
iters++;
if (iters == 10) {
@@ -641,10 +651,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
{
const struct journal_key *l = _l;
const struct journal_key *r = _r;
+ int rewind = l->rewind && r->rewind ? -1 : 1;
return journal_key_cmp(l, r) ?:
- cmp_int(l->journal_seq, r->journal_seq) ?:
- cmp_int(l->journal_offset, r->journal_offset);
+ ((cmp_int(l->journal_seq, r->journal_seq) ?:
+ cmp_int(l->journal_offset, r->journal_offset)) * rewind);
}
void bch2_journal_keys_put(struct bch_fs *c)
@@ -713,6 +724,8 @@ int bch2_journal_keys_sort(struct bch_fs *c)
struct journal_keys *keys = &c->journal_keys;
size_t nr_read = 0;
+ u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX;
+
genradix_for_each(&c->journal_entries, iter, _i) {
i = *_i;
@@ -721,28 +734,43 @@ int bch2_journal_keys_sort(struct bch_fs *c)
cond_resched();
- for_each_jset_key(k, entry, &i->j) {
- struct journal_key n = (struct journal_key) {
- .btree_id = entry->btree_id,
- .level = entry->level,
- .k = k,
- .journal_seq = le64_to_cpu(i->j.seq),
- .journal_offset = k->_data - i->j._data,
- };
-
- if (darray_push(keys, n)) {
- __journal_keys_sort(keys);
-
- if (keys->nr * 8 > keys->size * 7) {
- bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
- keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
- return bch_err_throw(c, ENOMEM_journal_keys_sort);
+ vstruct_for_each(&i->j, entry) {
+ bool rewind = !entry->level &&
+ !btree_id_is_alloc(entry->btree_id) &&
+ le64_to_cpu(i->j.seq) >= rewind_seq;
+
+ if (entry->type != (rewind
+ ? BCH_JSET_ENTRY_overwrite
+ : BCH_JSET_ENTRY_btree_keys))
+ continue;
+
+ if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start)
+ continue;
+
+ jset_entry_for_each_key(entry, k) {
+ struct journal_key n = (struct journal_key) {
+ .btree_id = entry->btree_id,
+ .level = entry->level,
+ .rewind = rewind,
+ .k = k,
+ .journal_seq = le64_to_cpu(i->j.seq),
+ .journal_offset = k->_data - i->j._data,
+ };
+
+ if (darray_push(keys, n)) {
+ __journal_keys_sort(keys);
+
+ if (keys->nr * 8 > keys->size * 7) {
+ bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
+ keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
+ return bch_err_throw(c, ENOMEM_journal_keys_sort);
+ }
+
+ BUG_ON(darray_push(keys, n));
}
- BUG_ON(darray_push(keys, n));
+ nr_read++;
}
-
- nr_read++;
}
}
diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h
index 8b773823704f..86aacb254fb2 100644
--- a/fs/bcachefs/btree_journal_iter_types.h
+++ b/fs/bcachefs/btree_journal_iter_types.h
@@ -11,8 +11,9 @@ struct journal_key {
u32 journal_offset;
enum btree_id btree_id:8;
unsigned level:8;
- bool allocated;
- bool overwritten;
+ bool allocated:1;
+ bool overwritten:1;
+ bool rewind:1;
struct journal_key_range_overwritten __rcu *
overwritten_range;
struct bkey_i *k;
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 47035aae232e..bed2b4b6ffb9 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
prt_newline(&buf);
}
- bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
+ bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
BUG();
}
@@ -771,7 +771,7 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans)
}
static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
- struct get_locks_fail *f, bool trace)
+ struct get_locks_fail *f, bool trace, ulong ip)
{
if (!trace)
goto out;
@@ -796,7 +796,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st
prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
}
- trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
+ trace_trans_restart_relock(trans, ip, buf.buf);
printbuf_exit(&buf);
}
@@ -806,7 +806,7 @@ out:
bch2_trans_verify_locks(trans);
}
-static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
+static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulong ip)
{
bch2_trans_verify_locks(trans);
@@ -825,7 +825,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
if (path->should_be_locked &&
(ret = btree_path_get_locks(trans, path, false, &f,
BCH_ERR_transaction_restart_relock))) {
- bch2_trans_relock_fail(trans, path, &f, trace);
+ bch2_trans_relock_fail(trans, path, &f, trace, ip);
return ret;
}
}
@@ -838,12 +838,12 @@ out:
int bch2_trans_relock(struct btree_trans *trans)
{
- return __bch2_trans_relock(trans, true);
+ return __bch2_trans_relock(trans, true, _RET_IP_);
}
int bch2_trans_relock_notrace(struct btree_trans *trans)
{
- return __bch2_trans_relock(trans, false);
+ return __bch2_trans_relock(trans, false, _RET_IP_);
}
void bch2_trans_unlock(struct btree_trans *trans)
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 9adca77e2580..f2173a3316f4 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st
EBUG_ON(!btree_node_locked(path, path->level));
EBUG_ON(path->uptodate);
- path->should_be_locked = true;
- trace_btree_path_should_be_locked(trans, path);
+ if (!path->should_be_locked) {
+ path->should_be_locked = true;
+ trace_btree_path_should_be_locked(trans, path);
+ }
}
static inline void __btree_path_set_level_up(struct btree_trans *trans,
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index a35847734a60..a3fb07c60e25 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k)
}
}
-static bool found_btree_node_is_readable(struct btree_trans *trans,
- struct found_btree_node *f)
-{
- struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
-
- found_btree_node_to_key(&tmp.k, f);
-
- struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
- bool ret = !IS_ERR_OR_NULL(b);
- if (!ret)
- return ret;
-
- f->sectors_written = b->written;
- f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
-
- struct bkey_s_c k;
- struct bkey unpacked;
- struct btree_node_iter iter;
- for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
- f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
-
- six_unlock_read(&b->c.lock);
-
- /*
- * We might update this node's range; if that happens, we need the node
- * to be re-read so the read path can trim keys that are no longer in
- * this node
- */
- if (b != btree_node_root(trans->c, b))
- bch2_btree_node_evict(trans, &tmp.k);
- return ret;
-}
-
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
{
const struct found_btree_node *l = _l;
@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = {
};
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
- struct bio *bio, struct btree_node *bn, u64 offset)
+ struct btree *b, struct bio *bio, u64 offset)
{
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct btree_node *bn = b->data;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = offset;
- bch2_bio_map(bio, bn, PAGE_SIZE);
+ bch2_bio_map(bio, b->data, c->opts.block_size);
u64 submit_time = local_clock();
submit_bio_wait(bio);
-
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
if (bio->bi_status) {
@@ -217,7 +184,28 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
};
rcu_read_unlock();
- if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, b->data, c->opts.btree_node_size);
+
+ submit_time = local_clock();
+ submit_bio_wait(bio);
+ bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
+
+ found_btree_node_to_key(&b->key, &n);
+
+ CLASS(printbuf, buf)();
+ if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
+ /* read_done will swap out b->data for another buffer */
+ bn = b->data;
+ /*
+ * Grab journal_seq here because we want the max journal_seq of
+ * any bset; read_done sorts down to a single set and picks the
+ * max journal_seq
+ */
+ n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
+ n.sectors_written = b->written;
+
mutex_lock(&f->lock);
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
bch_err(c, "try_read_btree_node() can't handle endian conversion");
@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p)
struct find_btree_nodes_worker *w = p;
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
struct bch_dev *ca = w->ca;
- void *buf = (void *) __get_free_page(GFP_KERNEL);
- struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
unsigned long last_print = jiffies;
+ struct btree *b = NULL;
+ struct bio *bio = NULL;
+
+ b = __bch2_btree_node_mem_alloc(c);
+ if (!b) {
+ bch_err(c, "read_btree_nodes_worker: error allocating buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
- if (!buf || !bio) {
- bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
+ if (!bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio");
w->f->ret = -ENOMEM;
goto err;
}
@@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p)
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
continue;
- try_read_btree_node(w->f, ca, bio, buf, sector);
+ try_read_btree_node(w->f, ca, b, bio, sector);
}
err:
+ if (b)
+ __btree_node_data_free(b);
+ kfree(b);
bio_put(bio);
- free_page((unsigned long) buf);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
@@ -521,8 +519,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
return false;
}
-bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
{
+ int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ return ret;
+
struct found_btree_node search = {
.btree_id = btree,
.level = 0,
diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h
index 08687b209787..66e6f9ed19d0 100644
--- a/fs/bcachefs/btree_node_scan.h
+++ b/fs/bcachefs/btree_node_scan.h
@@ -4,7 +4,7 @@
int bch2_scan_for_btree_nodes(struct bch_fs *);
bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
-bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index d9710801e3ee..639ef75b3dbd 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -595,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
int ret = 0;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
-
+#if 0
+ /* todo: bring back dynamic fault injection */
if (race_fault()) {
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
}
-
+#endif
/*
* Check if the insert will fit in the leaf node with the write lock
* held, otherwise another thread could write the node changing the
@@ -757,6 +758,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
btree_trans_journal_entries_start(trans),
trans->journal_entries.u64s);
+ EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s);
+
trans->journal_res.offset += trans->journal_entries.u64s;
trans->journal_res.u64s -= trans->journal_entries.u64s;
@@ -1003,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
{
struct btree_insert_entry *errored_at = NULL;
struct bch_fs *c = trans->c;
+ unsigned journal_u64s = 0;
int ret = 0;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
@@ -1031,10 +1035,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
- trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s);
+ journal_u64s = jset_u64s(trans->accounting.u64s);
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
if (trans->journal_transaction_names)
- trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
+ journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
trans_for_each_update(trans, i) {
struct btree_path *path = trans->paths + i->path;
@@ -1054,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
continue;
/* we're going to journal the key being updated: */
- trans->journal_u64s += jset_u64s(i->k->k.u64s);
+ journal_u64s += jset_u64s(i->k->k.u64s);
/* and we're also going to log the overwrite: */
if (trans->journal_transaction_names)
- trans->journal_u64s += jset_u64s(i->old_k.u64s);
+ journal_u64s += jset_u64s(i->old_k.u64s);
}
if (trans->extra_disk_res) {
@@ -1076,6 +1080,8 @@ retry:
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
+ trans->journal_u64s = journal_u64s + trans->journal_entries.u64s;
+
ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_);
/* make sure we didn't drop or screw up locks: */
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index c61c4171ae50..112170fd9c8f 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -497,6 +497,7 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
+ unsigned realloc_bytes_required;
#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
@@ -617,6 +618,9 @@ enum btree_write_type {
x(dying) \
x(fake) \
x(need_rewrite) \
+ x(need_rewrite_error) \
+ x(need_rewrite_degraded) \
+ x(need_rewrite_ptr_written_zero) \
x(never_write) \
x(pinned)
@@ -641,6 +645,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
BTREE_FLAGS()
#undef x
+#define BTREE_NODE_REWRITE_REASON() \
+ x(none) \
+ x(unknown) \
+ x(error) \
+ x(degraded) \
+ x(ptr_written_zero)
+
+enum btree_node_rewrite_reason {
+#define x(n) BTREE_NODE_REWRITE_##n,
+ BTREE_NODE_REWRITE_REASON()
+#undef x
+};
+
+static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b)
+{
+ if (btree_node_need_rewrite_ptr_written_zero(b))
+ return BTREE_NODE_REWRITE_ptr_written_zero;
+ if (btree_node_need_rewrite_degraded(b))
+ return BTREE_NODE_REWRITE_degraded;
+ if (btree_node_need_rewrite_error(b))
+ return BTREE_NODE_REWRITE_error;
+ if (btree_node_need_rewrite(b))
+ return BTREE_NODE_REWRITE_unknown;
+ return BTREE_NODE_REWRITE_none;
+}
+
static inline struct btree_write *btree_current_write(struct btree *b)
{
return b->writes + btree_node_write_idx(b);
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index e97e78c10f49..ee657b9f4b96 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -549,20 +549,26 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans,
unsigned u64s)
{
unsigned new_top = buf->u64s + u64s;
- unsigned old_size = buf->size;
+ unsigned new_size = buf->size;
- if (new_top > buf->size)
- buf->size = roundup_pow_of_two(new_top);
+ BUG_ON(roundup_pow_of_two(new_top) > U16_MAX);
- void *n = bch2_trans_kmalloc_nomemzero(trans, buf->size * sizeof(u64));
+ if (new_top > new_size)
+ new_size = roundup_pow_of_two(new_top);
+
+ void *n = bch2_trans_kmalloc_nomemzero(trans, new_size * sizeof(u64));
if (IS_ERR(n))
return n;
+ unsigned offset = (u64 *) n - (u64 *) trans->mem;
+ BUG_ON(offset > U16_MAX);
+
if (buf->u64s)
memcpy(n,
btree_trans_subbuf_base(trans, buf),
- old_size * sizeof(u64));
+ buf->size * sizeof(u64));
buf->base = (u64 *) n - (u64 *) trans->mem;
+ buf->size = new_size;
void *p = btree_trans_subbuf_top(trans, buf);
buf->u64s = new_top;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 9feef1dc4de5..0b98ab959719 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -170,8 +170,7 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
-int bch2_btree_write_buffer_insert_err(struct btree_trans *,
- enum btree_id, struct bkey_i *);
+int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *);
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
@@ -182,7 +181,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr
EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
if (unlikely(!btree_type_uses_write_buffer(btree))) {
- int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+ int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k);
dump_stack();
return ret;
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index d2ecb782919b..553059b33bfd 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1138,6 +1138,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
start_time);
}
+static const char * const btree_node_reawrite_reason_strs[] = {
+#define x(n) #n,
+ BTREE_NODE_REWRITE_REASON()
+#undef x
+ NULL,
+};
+
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned level_start, bool split,
@@ -1232,6 +1239,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
list_add_tail(&as->list, &c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
+ struct btree *b = btree_path_node(path, path->level);
+ as->node_start = b->data->min_key;
+ as->node_end = b->data->max_key;
+ as->node_needed_rewrite = btree_node_rewrite_reason(b);
+ as->node_written = b->written;
+ as->node_sectors = btree_buf_bytes(b) >> 9;
+ as->node_remaining = __bch2_btree_u64s_remaining(b,
+ btree_bkey_last(b, bset_tree_last(b)));
+
/*
* We don't want to allocate if we're in an error state, that can cause
* deadlock on emergency shutdown due to open buckets getting stuck in
@@ -1271,10 +1287,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
do {
ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl);
-
+ if (!bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ break;
bch2_trans_unlock(trans);
bch2_wait_on_allocator(c, &cl);
- } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
+ } while (1);
}
if (ret) {
@@ -2108,6 +2125,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if (ret)
goto err;
+ as->node_start = prev->data->min_key;
+ as->node_end = next->data->max_key;
+
trace_and_count(c, btree_node_merge, trans, b);
n = bch2_btree_node_alloc(as, trans, b->c.level);
@@ -2274,9 +2294,9 @@ err:
goto out;
}
-static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
- enum btree_id btree, unsigned level,
- struct bkey_i *k, unsigned flags)
+int bch2_btree_node_rewrite_key(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_i *k, unsigned flags)
{
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter,
@@ -2348,9 +2368,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans,
a->btree_id, a->level, a->key.k, 0));
- if (ret != -ENOENT &&
- !bch2_err_matches(ret, EROFS) &&
- ret != -BCH_ERR_journal_shutdown)
+ if (!bch2_err_matches(ret, ENOENT) &&
+ !bch2_err_matches(ret, EROFS))
bch_err_fn_ratelimited(c, ret);
spin_lock(&c->btree_node_rewrites_lock);
@@ -2681,9 +2700,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update
prt_str(out, " ");
bch2_btree_id_to_text(out, as->btree_id);
- prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ prt_printf(out, " l=%u-%u ",
as->update_level_start,
- as->update_level_end,
+ as->update_level_end);
+ bch2_bpos_to_text(out, as->node_start);
+ prt_char(out, ' ');
+ bch2_bpos_to_text(out, as->node_end);
+ prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s",
+ as->node_written,
+ as->node_sectors,
+ as->node_remaining,
+ btree_node_reawrite_reason_strs[as->node_needed_rewrite]);
+
+ prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
bch2_btree_update_modes[as->mode],
as->nodes_written,
closure_nr_remaining(&as->cl),
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 7fe793788a79..ac04e45a8515 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -57,6 +57,13 @@ struct btree_update {
unsigned took_gc_lock:1;
enum btree_id btree_id;
+ struct bpos node_start;
+ struct bpos node_end;
+ enum btree_node_rewrite_reason node_needed_rewrite;
+ u16 node_written;
+ u16 node_sectors;
+ u16 node_remaining;
+
unsigned update_level_start;
unsigned update_level_end;
@@ -169,6 +176,9 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
struct btree *, unsigned, unsigned);
+int bch2_btree_node_rewrite_key(struct btree_trans *,
+ enum btree_id, unsigned,
+ struct bkey_i *, unsigned);
int bch2_btree_node_rewrite_pos(struct btree_trans *,
enum btree_id, unsigned,
struct bpos, unsigned, unsigned);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 90b21e61d2b6..4b095235a0d2 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -267,10 +267,9 @@ out:
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
}
-int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+int bch2_btree_write_buffer_insert_err(struct bch_fs *c,
enum btree_id btree, struct bkey_i *k)
{
- struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
@@ -332,7 +331,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
- ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+ ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k);
goto err;
}
@@ -676,6 +675,9 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
goto err;
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+
+ /* can we avoid the unconditional restart? */
+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_);
ret = bch_err_throw(c, transaction_restart_write_buffer_flush);
}
err:
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index 05f56fd1eed0..c351d21aca0b 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -89,6 +89,12 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
{
+ if (unlikely(!btree_type_uses_write_buffer(btree))) {
+ int ret = bch2_btree_write_buffer_insert_err(c, btree, k);
+ dump_stack();
+ return ret;
+ }
+
EBUG_ON(!dst->seq);
return k->k.type == KEY_TYPE_accounting
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 2d38466eddfd..5ea89aa2b0c4 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -319,6 +319,7 @@ static int bch2_data_thread(void *arg)
ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
}
+ enumerated_ref_put(&ctx->c->writes, BCH_WRITE_REF_ioctl_data);
return 0;
}
@@ -378,15 +379,24 @@ static long bch2_ioctl_data(struct bch_fs *c,
struct bch_data_ctx *ctx;
int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_ioctl_data))
+ return -EROFS;
- if (arg.op >= BCH_DATA_OP_NR || arg.flags)
- return -EINVAL;
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto put_ref;
+ }
+
+ if (arg.op >= BCH_DATA_OP_NR || arg.flags) {
+ ret = -EINVAL;
+ goto put_ref;
+ }
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto put_ref;
+ }
ctx->c = c;
ctx->arg = arg;
@@ -395,11 +405,16 @@ static long bch2_ioctl_data(struct bch_fs *c,
&bcachefs_data_ops,
bch2_data_thread);
if (ret < 0)
- kfree(ctx);
+ goto cleanup;
+ return ret;
+cleanup:
+ kfree(ctx);
+put_ref:
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_ioctl_data);
return ret;
}
-static long bch2_ioctl_fs_usage(struct bch_fs *c,
+static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
struct bch_ioctl_fs_usage __user *user_arg)
{
struct bch_ioctl_fs_usage arg = {};
@@ -469,7 +484,7 @@ err:
}
/* obsolete, didn't allow for new data types: */
-static long bch2_ioctl_dev_usage(struct bch_fs *c,
+static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c,
struct bch_ioctl_dev_usage __user *user_arg)
{
struct bch_ioctl_dev_usage arg;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 5f1174348974..e848e210a9bf 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -249,6 +249,7 @@ static int data_update_invalid_bkey(struct data_update *m,
bch2_bkey_val_to_text(&buf, c, k);
prt_str(&buf, "\nnew: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+ prt_newline(&buf);
bch2_fs_emergency_read_only2(c, &buf);
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 901f643ead83..07c2a0f73cc2 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
c->verify_data = __bch2_btree_node_mem_alloc(c);
if (!c->verify_data)
goto out;
-
- list_del_init(&c->verify_data->list);
}
BUG_ON(b->nsets != 1);
@@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
i->ubuf = buf;
i->size = size;
i->ret = 0;
+
+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
restart:
seqmutex_lock(&c->btree_trans_lock);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
@@ -599,6 +599,11 @@ restart:
if (!closure_get_not_zero(&trans->ref))
continue;
+ if (!trans->srcu_held) {
+ closure_put(&trans->ref);
+ continue;
+ }
+
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
@@ -620,6 +625,8 @@ restart:
}
seqmutex_unlock(&c->btree_trans_lock);
unlocked:
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 300f7cc8abdf..28875c5c86ad 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -13,12 +13,15 @@
#include <linux/dcache.h>
+#ifdef CONFIG_UNICODE
int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
const struct qstr *str, struct qstr *out_cf)
{
*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
-#ifdef CONFIG_UNICODE
+ if (!bch2_fs_casefold_enabled(trans->c))
+ return -EOPNOTSUPP;
+
unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
int ret = PTR_ERR_OR_ZERO(buf);
if (ret)
@@ -30,10 +33,8 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
*out_cf = (struct qstr) QSTR_INIT(buf, ret);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
+#endif
static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
{
@@ -231,7 +232,8 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
}
-int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
+int bch2_dirent_init_name(struct bch_fs *c,
+ struct bkey_i_dirent *dirent,
const struct bch_hash_info *hash_info,
const struct qstr *name,
const struct qstr *cf_name)
@@ -251,6 +253,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
offsetof(struct bch_dirent, d_name) -
name->len);
} else {
+ if (!bch2_fs_casefold_enabled(c))
+ return -EOPNOTSUPP;
+
#ifdef CONFIG_UNICODE
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
@@ -277,8 +282,6 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent,
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len);
EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len);
-#else
- return -EOPNOTSUPP;
#endif
}
@@ -313,7 +316,7 @@ struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans,
dirent->v.d_type = type;
dirent->v.d_unused = 0;
- int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name);
+ int ret = bch2_dirent_init_name(trans->c, dirent, hash_info, name, cf_name);
if (ret)
return ERR_PTR(ret);
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 70fb0b581221..0417608c18d5 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -23,8 +23,16 @@ struct bch_fs;
struct bch_hash_info;
struct bch_inode_info;
+#ifdef CONFIG_UNICODE
int bch2_casefold(struct btree_trans *, const struct bch_hash_info *,
const struct qstr *, struct qstr *);
+#else
+static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
+{
+ return -EOPNOTSUPP;
+}
+#endif
static inline int bch2_maybe_casefold(struct btree_trans *trans,
const struct bch_hash_info *info,
@@ -59,7 +67,8 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst,
dst->v.d_type = src.v->d_type;
}
-int bch2_dirent_init_name(struct bkey_i_dirent *,
+int bch2_dirent_init_name(struct bch_fs *,
+ struct bkey_i_dirent *,
const struct bch_hash_info *,
const struct qstr *,
const struct qstr *);
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 3d59a57a5256..f7528cd69c73 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -618,7 +618,9 @@ int bch2_gc_accounting_done(struct bch_fs *c)
for (unsigned j = 0; j < nr; j++)
src_v[j] -= dst_v[j];
- if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) {
+ bch2_trans_unlock_long(trans);
+
+ if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) {
percpu_up_write(&c->mark_lock);
ret = commit_do(trans, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index ac3264134a15..acc3b7b67704 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -137,7 +137,6 @@
x(BCH_ERR_transaction_restart, transaction_restart_relock) \
x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \
x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \
- x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \
x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \
x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \
x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \
@@ -148,11 +147,8 @@
x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
- x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
- x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
- x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
@@ -241,7 +237,6 @@
x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \
x(BCH_ERR_journal_res_blocked, journal_stuck) \
x(BCH_ERR_journal_res_blocked, journal_retry_open) \
- x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \
x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \
x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \
@@ -287,7 +282,6 @@
x(EIO, sb_not_downgraded) \
x(EIO, btree_node_write_all_failed) \
x(EIO, btree_node_read_error) \
- x(EIO, btree_node_read_validate_error) \
x(EIO, btree_need_topology_repair) \
x(EIO, bucket_ref_update) \
x(EIO, trigger_alloc) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 63951e293c47..267e73d9d7e6 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -69,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
if (trans)
bch2_trans_updates_to_text(&buf, trans);
bool ret = __bch2_inconsistent_error(c, &buf);
- bch2_print_str_nonblocking(c, KERN_ERR, buf.buf);
+ bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
return ret;
@@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
return bch_err_throw(c, btree_need_topology_repair);
} else {
return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
- bch_err_throw(c, btree_node_read_validate_error);
+ bch_err_throw(c, btree_need_topology_repair);
}
}
@@ -620,6 +620,11 @@ print:
if (s)
s->ret = ret;
+
+ if (trans &&
+ !(flags & FSCK_ERR_NO_LOG) &&
+ ret == -BCH_ERR_fsck_fix)
+ ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret;
err_unlock:
mutex_unlock(&c->fsck_error_msgs_lock);
err:
@@ -628,7 +633,9 @@ err:
* log_fsck_err()s: that would require us to track for every error type
* which recovery pass corrects it, to get the fsck exit status correct:
*/
- if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ /* nothing */
+ } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
set_bit(BCH_FS_errors_fixed, &c->flags);
} else {
set_bit(BCH_FS_errors_not_fixed, &c->flags);
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index b899ee75f5b9..e76e58a568bf 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -139,6 +139,17 @@ int bch2_extent_trim_atomic(struct btree_trans *trans,
if (ret)
return ret;
- bch2_cut_back(end, k);
+ /* tracepoint */
+
+ if (bpos_lt(end, k->k.p)) {
+ if (trace_extent_trim_atomic_enabled()) {
+ CLASS(printbuf, buf)();
+ bch2_bpos_to_text(&buf, end);
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k));
+ trace_extent_trim_atomic(trans->c, buf.buf);
+ }
+ bch2_cut_back(end, k);
+ }
return 0;
}
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 036e4ad95987..83cbd77dcb9c 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out,
struct bch_io_failures *failed)
{
static const char * const error_types[] = {
- "io", "checksum", "ec reconstruct", NULL
+ "btree validate", "io", "checksum", "ec reconstruct", NULL
};
for (struct bch_dev_io_failures *f = failed->devs;
f < failed->devs + failed->nr;
f++) {
unsigned errflags =
- ((!!f->failed_io) << 0) |
- ((!!f->failed_csum_nr) << 1) |
- ((!!f->failed_ec) << 2);
-
- if (!errflags)
- continue;
+ ((!!f->failed_btree_validate) << 0) |
+ ((!!f->failed_io) << 1) |
+ ((!!f->failed_csum_nr) << 2) |
+ ((!!f->failed_ec) << 3);
bch2_printbuf_make_room(out, 1024);
out->atomic++;
@@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out,
prt_char(out, ' ');
- if (is_power_of_2(errflags)) {
+ if (!errflags) {
+ prt_str(out, "no error - confused");
+ } else if (is_power_of_2(errflags)) {
prt_bitflags(out, error_types, errflags);
prt_str(out, " error");
} else {
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 66bacdd49f78..1c54b9b5bd69 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -674,7 +674,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
/* buffered writes: */
-int bch2_write_begin(struct file *file, struct address_space *mapping,
+int bch2_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -757,7 +757,7 @@ err_unlock:
return bch2_err_class(ret);
}
-int bch2_write_end(struct file *file, struct address_space *mapping,
+int bch2_write_end(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h
index 3207ebbb4ab4..14de91c27656 100644
--- a/fs/bcachefs/fs-io-buffered.h
+++ b/fs/bcachefs/fs-io-buffered.h
@@ -10,9 +10,9 @@ int bch2_read_folio(struct file *, struct folio *);
int bch2_writepages(struct address_space *, struct writeback_control *);
void bch2_readahead(struct readahead_control *);
-int bch2_write_begin(struct file *, struct address_space *, loff_t pos,
+int bch2_write_begin(const struct kiocb *, struct address_space *, loff_t pos,
unsigned len, struct folio **, void **);
-int bch2_write_end(struct file *, struct address_space *, loff_t,
+int bch2_write_end(const struct kiocb *, struct address_space *, loff_t,
unsigned len, unsigned copied, struct folio *, void *);
ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 85d13f800165..e54e4f255b22 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (IS_ERR(inode))
inode = NULL;
-#ifdef CONFIG_UNICODE
if (!inode && IS_CASEFOLDED(vdir)) {
/*
* Do not cache a negative dentry in casefolded directories
@@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
*/
return NULL;
}
-#endif
return d_splice_alias(&inode->v, dentry);
}
@@ -1732,7 +1730,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap,
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
- return ret;
+
+ return bch2_err_class(ret);
}
static const struct file_operations bch_file_operations = {
@@ -2490,6 +2489,14 @@ static int bch2_fs_get_tree(struct fs_context *fc)
if (ret)
goto err_stop_fs;
+ /*
+ * We might be doing a RO mount because other options required it, or we
+ * have no alloc info and it's a small image with no room to regenerate
+ * it
+ */
+ if (c->opts.read_only)
+ fc->sb_flags |= SB_RDONLY;
+
sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
ret = PTR_ERR_OR_ZERO(sb);
if (ret)
@@ -2557,9 +2564,10 @@ got_sb:
sb->s_shrink->seeks = 0;
#ifdef CONFIG_UNICODE
- sb->s_encoding = c->cf_encoding;
-#endif
+ if (bch2_fs_casefold_enabled(c))
+ sb->s_encoding = c->cf_encoding;
generic_set_sb_d_ops(sb);
+#endif
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
ret = PTR_ERR_OR_ZERO(vinode);
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 68ed69a255e1..15c1e890d299 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -12,6 +12,7 @@
#include "fs.h"
#include "fsck.h"
#include "inode.h"
+#include "io_misc.h"
#include "keylist.h"
#include "namei.h"
#include "recovery_passes.h"
@@ -327,7 +328,8 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
(inode->bi_flags & BCH_INODE_has_child_snapshot))
return false;
- return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
+ return !bch2_inode_has_backpointer(inode) &&
+ !(inode->bi_flags & BCH_INODE_unlinked);
}
static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot)
@@ -372,6 +374,18 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (inode->bi_subvol) {
inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+ struct btree_iter subvol_iter;
+ struct bkey_i_subvolume *subvol =
+ bch2_bkey_get_mut_typed(trans, &subvol_iter,
+ BTREE_ID_subvolumes, POS(0, inode->bi_subvol),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(subvol);
+ if (ret)
+ return ret;
+
+ subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL;
+ bch2_trans_iter_exit(trans, &subvol_iter);
+
u64 root_inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
&dirent_snapshot, &root_inum);
@@ -387,6 +401,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (ret)
return ret;
+ bch_verbose(c, "got lostfound inum %llu", lostfound.bi_inum);
+
lostfound.bi_nlink += S_ISDIR(inode->bi_mode);
/* ensure lost+found inode is also present in inode snapshot */
@@ -423,6 +439,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (ret)
return ret;
+ {
+ CLASS(printbuf, buf)();
+ ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum,
+ inode->bi_snapshot, NULL, &buf);
+ if (ret)
+ return ret;
+
+ bch_info(c, "reattached at %s", buf.buf);
+ }
+
/*
* Fix up inodes in child snapshots: if they should also be reattached
* update the backpointer field, if they should not be we need to emit
@@ -490,13 +516,21 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode)
{
- if (!inode->bi_dir)
+ if (!bch2_inode_has_backpointer(inode))
return 0;
+ u32 snapshot = inode->bi_snapshot;
+
+ if (inode->bi_parent_subvol) {
+ int ret = bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot);
+ if (ret)
+ return ret;
+ }
+
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter,
- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
int ret = bkey_err(d) ?:
dirent_points_to_inode(c, d, inode) ?:
bch2_fsck_remove_dirent(trans, d.k->p);
@@ -695,14 +729,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
u32 id, u32 ancestor)
{
- ssize_t i;
-
EBUG_ON(id > ancestor);
- /* @ancestor should be the snapshot most recently added to @seen */
- EBUG_ON(ancestor != seen->pos.snapshot);
- EBUG_ON(ancestor != darray_last(seen->ids));
-
if (id == ancestor)
return true;
@@ -718,11 +746,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
* numerically, since snapshot ID lists are kept sorted, so if we find
* an id that's an ancestor of @id we're done:
*/
-
- for (i = seen->ids.nr - 2;
- i >= 0 && seen->ids.data[i] >= id;
- --i)
- if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]))
+ darray_for_each_reverse(seen->ids, i)
+ if (*i != ancestor && bch2_snapshot_is_ancestor(c, id, *i))
return false;
return true;
@@ -806,7 +831,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
if (!n->whiteout) {
return bch2_inode_unpack(inode, &n->inode);
} else {
- n->inode.bi_inum = inode.k->p.inode;
+ n->inode.bi_inum = inode.k->p.offset;
n->inode.bi_snapshot = inode.k->p.snapshot;
return 0;
}
@@ -903,17 +928,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
- struct bch_inode_unpacked new = i->inode;
- struct bkey_i whiteout;
-
- new.bi_snapshot = k.k->p.snapshot;
-
if (!i->whiteout) {
+ struct bch_inode_unpacked new = i->inode;
+ new.bi_snapshot = k.k->p.snapshot;
ret = __bch2_fsck_write_inode(trans, &new);
} else {
+ struct bkey_i whiteout;
bkey_init(&whiteout.k);
whiteout.k.type = KEY_TYPE_whiteout;
- whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot);
+ whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot);
ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
&whiteout,
BTREE_UPDATE_internal_snapshot_node);
@@ -1135,13 +1158,14 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
- if (u.bi_dir || u.bi_dir_offset) {
+ if (bch2_inode_has_backpointer(&u)) {
ret = check_inode_dirent_inode(trans, &u, &do_update);
if (ret)
goto err;
}
- if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked),
+ if (fsck_err_on(bch2_inode_has_backpointer(&u) &&
+ (u.bi_flags & BCH_INODE_unlinked),
trans, inode_unlinked_but_has_dirent,
"inode unlinked but has dirent\n%s",
(printbuf_reset(&buf),
@@ -1438,6 +1462,7 @@ static int check_key_has_inode(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ struct btree_iter iter2 = {};
int ret = PTR_ERR_OR_ZERO(i);
if (ret)
return ret;
@@ -1447,40 +1472,105 @@ static int check_key_has_inode(struct btree_trans *trans,
bool have_inode = i && !i->whiteout;
- if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
- ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
- if (ret)
- goto err;
+ if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes)))
+ goto reconstruct;
- inode->last_pos.inode--;
- ret = bch_err_throw(c, transaction_restart_nested);
- goto err;
+ if (have_inode && btree_matches_i_mode(iter->btree_id, i->inode.bi_mode))
+ goto out;
+
+ prt_printf(&buf, ", ");
+
+ bool have_old_inode = false;
+ darray_for_each(inode->inodes, i2)
+ if (!i2->whiteout &&
+ bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i2->inode.bi_snapshot) &&
+ btree_matches_i_mode(iter->btree_id, i2->inode.bi_mode)) {
+ prt_printf(&buf, "but found good inode in older snapshot\n");
+ bch2_inode_unpacked_to_text(&buf, &i2->inode);
+ prt_newline(&buf);
+ have_old_inode = true;
+ break;
+ }
+
+ struct bkey_s_c k2;
+ unsigned nr_keys = 0;
+
+ prt_printf(&buf, "found keys:\n");
+
+ for_each_btree_key_max_norestart(trans, iter2, iter->btree_id,
+ SPOS(k.k->p.inode, 0, k.k->p.snapshot),
+ POS(k.k->p.inode, U64_MAX),
+ 0, k2, ret) {
+ nr_keys++;
+ if (nr_keys <= 10) {
+ bch2_bkey_val_to_text(&buf, c, k2);
+ prt_newline(&buf);
+ }
+ if (nr_keys >= 100)
+ break;
}
- if (fsck_err_on(!have_inode,
- trans, key_in_missing_inode,
- "key in missing inode:\n%s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- goto delete;
+ if (ret)
+ goto err;
- if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
- trans, key_in_wrong_inode_type,
- "key for wrong inode mode %o:\n%s",
- i->inode.bi_mode,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- goto delete;
+ if (nr_keys > 100)
+ prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys);
+ else if (nr_keys > 10)
+ prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys);
+
+ if (!have_inode) {
+ if (fsck_err_on(!have_inode,
+ trans, key_in_missing_inode,
+ "key in missing inode%s", buf.buf)) {
+ /*
+ * Maybe a deletion that raced with data move, or something
+ * weird like that? But if we know the inode was deleted, or
+ * it's just a few keys, we can safely delete them.
+ *
+ * If it's many keys, we should probably recreate the inode
+ */
+ if (have_old_inode || nr_keys <= 2)
+ goto delete;
+ else
+ goto reconstruct;
+ }
+ } else {
+ /*
+ * not autofix, this one would be a giant wtf - bit error in the
+ * inode corrupting i_mode?
+ *
+ * may want to try repairing inode instead of deleting
+ */
+ if (fsck_err_on(!btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
+ trans, key_in_wrong_inode_type,
+ "key for wrong inode mode %o%s",
+ i->inode.bi_mode, buf.buf))
+ goto delete;
+ }
out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &iter2);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
delete:
+ /*
+ * XXX: print out more info
+ * count up extents for this inode, check if we have different inode in
+ * an older snapshot version, perhaps decide if we want to reconstitute
+ */
ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node);
goto out;
+reconstruct:
+ ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ inode->last_pos.inode--;
+ ret = bch_err_throw(c, transaction_restart_nested);
+ goto out;
}
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
@@ -1822,20 +1912,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
!key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
- if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+ u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9;
+
+ if (fsck_err_on(k.k->p.offset > last_block &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- struct btree_iter iter2;
-
- bch2_trans_copy_iter(trans, &iter2, iter);
- bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
- ret = bch2_btree_iter_traverse(trans, &iter2) ?:
- bch2_btree_delete_at(trans, &iter2,
- BTREE_UPDATE_internal_snapshot_node);
- bch2_trans_iter_exit(trans, &iter2);
+ ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?:
+ bch2_fpunch_snapshot(trans,
+ SPOS(i->inode.bi_inum,
+ last_block,
+ i->inode.bi_snapshot),
+ POS(i->inode.bi_inum, U64_MAX));
if (ret)
goto err;
@@ -1949,14 +2039,22 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
continue;
}
- if (fsck_err_on(i->inode.bi_nlink != i->count,
- trans, inode_dir_wrong_nlink,
- "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
- w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) {
- i->inode.bi_nlink = i->count;
- ret = bch2_fsck_write_inode(trans, &i->inode);
- if (ret)
- break;
+ if (i->inode.bi_nlink != i->count) {
+ CLASS(printbuf, buf)();
+
+ lockrestart_do(trans,
+ bch2_inum_snapshot_to_path(trans, w->last_pos.inode,
+ i->inode.bi_snapshot, NULL, &buf));
+
+ if (fsck_err_on(i->inode.bi_nlink != i->count,
+ trans, inode_dir_wrong_nlink,
+ "directory with wrong i_nlink: got %u, should be %llu\n%s",
+ i->inode.bi_nlink, i->count, buf.buf)) {
+ i->inode.bi_nlink = i->count;
+ ret = bch2_fsck_write_inode(trans, &i->inode);
+ if (ret)
+ break;
+ }
}
}
fsck_err:
@@ -2184,9 +2282,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
-#ifdef CONFIG_UNICODE
hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL;
-#endif
ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info,
iter, k, need_second_pass);
@@ -2493,6 +2589,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_subvolume)
return 0;
+ subvol_inum start = {
+ .subvol = k.k->p.offset,
+ .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode),
+ };
+
while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) {
ret = darray_push(&subvol_path, k.k->p.offset);
if (ret)
@@ -2511,11 +2612,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
if (darray_u32_has(&subvol_path, parent)) {
printbuf_reset(&buf);
- prt_printf(&buf, "subvolume loop:\n");
+ prt_printf(&buf, "subvolume loop: ");
- darray_for_each_reverse(subvol_path, i)
- prt_printf(&buf, "%u ", *i);
- prt_printf(&buf, "%u", parent);
+ ret = bch2_inum_to_path(trans, start, &buf);
+ if (ret)
+ goto err;
if (fsck_err(trans, subvol_loop, "%s", buf.buf))
ret = reattach_subvol(trans, s);
@@ -2559,19 +2660,13 @@ int bch2_check_subvolume_structure(struct bch_fs *c)
return ret;
}
-struct pathbuf_entry {
- u64 inum;
- u32 snapshot;
-};
-
-typedef DARRAY(struct pathbuf_entry) pathbuf;
-
-static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
+static int bch2_bi_depth_renumber_one(struct btree_trans *trans,
+ u64 inum, u32 snapshot,
u32 new_depth)
{
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, p->inum, p->snapshot), 0);
+ SPOS(0, inum, snapshot), 0);
struct bch_inode_unpacked inode;
int ret = bkey_err(k) ?:
@@ -2590,14 +2685,15 @@ err:
return ret;
}
-static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
+static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path,
+ u32 snapshot, u32 new_bi_depth)
{
u32 restart_count = trans->restart_count;
int ret = 0;
darray_for_each_reverse(*path, i) {
ret = nested_lockrestart_do(trans,
- bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
+ bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth));
bch_err_fn(trans->c, ret);
if (ret)
break;
@@ -2608,37 +2704,36 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32
return ret ?: trans_was_restarted(trans, restart_count);
}
-static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
-{
- darray_for_each(*p, i)
- if (i->inum == inum &&
- i->snapshot == snapshot)
- return true;
- return false;
-}
-
static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
- pathbuf path = {};
+ darray_u64 path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
bool redo_bi_depth = false;
u32 min_bi_depth = U32_MAX;
int ret = 0;
+ struct bpos start = inode_k.k->p;
+
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(inode_k, &inode);
if (ret)
return ret;
- while (!inode.bi_subvol) {
+ /*
+ * If we're running full fsck, check_dirents() will have already ran,
+ * and we shouldn't see any missing backpointers here - otherwise that's
+ * handled separately, by check_unreachable_inodes
+ */
+ while (!inode.bi_subvol &&
+ bch2_inode_has_backpointer(&inode)) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
- u32 parent_snapshot = snapshot;
- d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
+ d = dirent_get_by_pos(trans, &dirent_iter,
+ SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot));
ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
goto out;
@@ -2656,15 +2751,10 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
bch2_trans_iter_exit(trans, &dirent_iter);
- ret = darray_push(&path, ((struct pathbuf_entry) {
- .inum = inode.bi_inum,
- .snapshot = snapshot,
- }));
+ ret = darray_push(&path, inode.bi_inum);
if (ret)
return ret;
- snapshot = parent_snapshot;
-
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
@@ -2686,21 +2776,28 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
break;
inode = parent_inode;
- snapshot = inode_k.k->p.snapshot;
redo_bi_depth = true;
- if (path_is_dup(&path, inode.bi_inum, snapshot)) {
+ if (darray_find(path, inode.bi_inum)) {
printbuf_reset(&buf);
- prt_printf(&buf, "directory structure loop:\n");
- darray_for_each_reverse(path, i)
- prt_printf(&buf, "%llu:%u ", i->inum, i->snapshot);
- prt_printf(&buf, "%llu:%u", inode.bi_inum, snapshot);
+ prt_printf(&buf, "directory structure loop in snapshot %u: ",
+ snapshot);
+
+ ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf);
+ if (ret)
+ goto out;
+
+ if (c->opts.verbose) {
+ prt_newline(&buf);
+ darray_for_each(path, i)
+ prt_printf(&buf, "%llu ", *i);
+ }
if (fsck_err(trans, dir_loop, "%s", buf.buf)) {
ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent");
if (ret)
- break;
+ goto out;
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
@@ -2714,7 +2811,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
min_bi_depth = 0;
if (redo_bi_depth)
- ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
+ ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
@@ -2731,7 +2828,7 @@ fsck_err:
int bch2_check_directory_structure(struct bch_fs *c)
{
int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
+ for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 53e5dc1f6ac1..ef4cc7395b86 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1265,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
{
struct bch_fs *c = trans->c;
-#ifdef CONFIG_UNICODE
+#ifndef CONFIG_UNICODE
+ bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
+ return -EOPNOTSUPP;
+#endif
+
+ if (c->opts.casefold_disabled)
+ return -EOPNOTSUPP;
+
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
@@ -1289,10 +1296,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
bi->bi_fields_set |= BIT(Inode_opt_casefold);
return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi);
-#else
- bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
- return -EOPNOTSUPP;
-#endif
}
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 82cec2836cbd..b8ec3e628d90 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -254,6 +254,11 @@ static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_
: c->opts.casefold;
}
+static inline bool bch2_inode_has_backpointer(const struct bch_inode_unpacked *bi)
+{
+ return bi->bi_dir || bi->bi_dir_offset;
+}
+
/* i_nlink: */
static inline unsigned nlink_bias(umode_t mode)
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index bf72b1d2e2cb..07023667a475 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -135,6 +135,33 @@ err_noprint:
return ret;
}
+/* For fsck */
+int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end)
+{
+ u32 restart_count = trans->restart_count;
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0);
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bkey_i delete;
+
+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
+ start, end, 0, k,
+ &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ bkey_init(&delete.k);
+ delete.k.p = iter.pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end, &delete);
+
+ bch2_extent_trim_atomic(trans, &iter, &delete) ?:
+ bch2_trans_update(trans, &iter, &delete, 0);
+ }));
+
+ bch2_disk_reservation_put(c, &disk_res);
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
/*
* Returns -BCH_ERR_transacton_restart if we had to drop locks:
*/
diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
index 9cb44a7c43c1..b93e4d4b3c0c 100644
--- a/fs/bcachefs/io_misc.h
+++ b/fs/bcachefs/io_misc.h
@@ -5,6 +5,8 @@
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
u64, struct bch_io_opts, s64 *,
struct write_point_specifier);
+
+int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index a77779afad01..e0874ad9a6cf 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -166,6 +166,7 @@ static noinline void promote_free(struct bch_read_bio *rbio)
BUG_ON(ret);
async_object_list_del(c, promote, op->list_idx);
+ async_object_list_del(c, rbio, rbio->list_idx);
bch2_data_update_exit(&op->write);
@@ -343,6 +344,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
*bounce = true;
*read_full = promote_full;
+
+ if (have_io_error(failed))
+ orig->self_healing = true;
+
return promote;
nopromote:
trace_io_read_nopromote(c, ret);
@@ -452,6 +457,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
if (rbio->start_time)
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
rbio->start_time);
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ if (rbio->list_idx)
+ async_object_list_del(rbio->c, rbio, rbio->list_idx);
+#endif
bio_endio(&rbio->bio);
}
@@ -635,12 +644,15 @@ static void bch2_rbio_retry(struct work_struct *work)
prt_str(&buf, "(internal move) ");
prt_str(&buf, "data read error, ");
- if (!ret)
+ if (!ret) {
prt_str(&buf, "successful retry");
- else
+ if (rbio->self_healing)
+ prt_str(&buf, ", self healing");
+ } else
prt_str(&buf, bch2_err_str(ret));
prt_newline(&buf);
+
if (!bkey_deleted(&sk.k->k)) {
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k));
prt_newline(&buf);
@@ -1484,7 +1496,12 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref);
prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs);
prt_printf(out, "context:\t%u\n", rbio->context);
- prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret));
+
+ int ret = READ_ONCE(rbio->ret);
+ if (ret < 0)
+ prt_printf(out, "ret:\t%s\n", bch2_err_str(ret));
+ else
+ prt_printf(out, "ret:\t%i\n", ret);
prt_printf(out, "flags:\t");
bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags);
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index 45c959018919..9c5ddbf861b3 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -44,6 +44,7 @@ struct bch_read_bio {
have_ioref:1,
narrow_crcs:1,
saw_error:1,
+ self_healing:1,
context:2;
};
u16 _state;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index dda802a656cf..ddfeb0dafc9d 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1082,6 +1082,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
if (open && !*blocked) {
__bch2_journal_block(j);
+ s.v = atomic64_read_acquire(&j->reservations.counter);
*blocked = true;
}
@@ -1283,7 +1284,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca
ret = 0; /* wait and retry */
bch2_disk_reservation_put(c, &disk_res);
- closure_sync(&cl);
+ bch2_wait_on_allocator(c, &cl);
}
return ret;
@@ -1474,14 +1475,13 @@ void bch2_fs_journal_stop(struct journal *j)
clear_bit(JOURNAL_running, &j->flags);
}
-int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
+int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_entry_pin_list *p;
struct journal_replay *i, **_i;
struct genradix_iter iter;
bool had_entries = false;
- u64 last_seq = cur_seq, nr, seq;
/*
*
@@ -1495,17 +1495,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
return -EINVAL;
}
- genradix_for_each_reverse(&c->journal_entries, iter, _i) {
- i = *_i;
-
- if (journal_replay_ignore(i))
- continue;
+ /* Clean filesystem? */
+ if (!last_seq)
+ last_seq = cur_seq;
- last_seq = le64_to_cpu(i->j.last_seq);
- break;
- }
-
- nr = cur_seq - last_seq;
+ u64 nr = cur_seq - last_seq;
/*
* Extra fudge factor, in case we crashed when the journal pin fifo was
@@ -1532,6 +1526,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
+ u64 seq;
fifo_for_each_entry_ptr(p, &j->pin, seq)
journal_pin_list_init(p, 1);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 83734fe4331f..977907038d98 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -453,7 +453,7 @@ int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
void bch2_fs_journal_stop(struct journal *);
-int bch2_fs_journal_start(struct journal *, u64);
+int bch2_fs_journal_start(struct journal *, u64, u64);
void bch2_journal_set_replay_done(struct journal *);
void bch2_dev_journal_exit(struct bch_dev *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 0b15d71a8d2d..9e028dbcc3d0 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct printbuf buf = PRINTBUF;
int ret = JOURNAL_ENTRY_ADD_OK;
+ if (last_seq && c->opts.journal_rewind)
+ last_seq = min(last_seq, c->opts.journal_rewind);
+
if (!c->journal.oldest_seq_found_ondisk ||
le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk)
c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq);
@@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c,
printbuf_reset(&buf);
prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
*last_seq, *blacklist_seq - 1);
+
+ /*
+ * Drop blacklisted entries and entries older than last_seq (or start of
+ * journal rewind:
+ */
+ u64 drop_before = *last_seq;
+ if (c->opts.journal_rewind) {
+ drop_before = min(drop_before, c->opts.journal_rewind);
+ prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
+ }
+
+ *last_seq = drop_before;
if (*start_seq != *blacklist_seq)
prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
bch_info(c, "%s", buf.buf);
-
- /* Drop blacklisted entries and entries older than last_seq: */
genradix_for_each(&c->journal_entries, radix_iter, _i) {
i = *_i;
@@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c,
continue;
seq = le64_to_cpu(i->j.seq);
- if (seq < *last_seq) {
+ if (seq < drop_before) {
journal_replay_free(c, i, false);
continue;
}
@@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c,
}
}
- ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1);
+ ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1);
if (ret)
goto err;
@@ -1703,9 +1716,10 @@ static CLOSURE_CALLBACK(journal_write_done)
bch2_log_msg_start(c, &buf);
if (err == -BCH_ERR_journal_write_err)
- prt_printf(&buf, "unable to write journal to sufficient devices");
+ prt_printf(&buf, "unable to write journal to sufficient devices\n");
else
- prt_printf(&buf, "journal write error marking replicas: %s", bch2_err_str(err));
+ prt_printf(&buf, "journal write error marking replicas: %s\n",
+ bch2_err_str(err));
bch2_fs_emergency_read_only2(c, &buf);
@@ -1753,6 +1767,7 @@ static CLOSURE_CALLBACK(journal_write_done)
closure_wake_up(&c->freelist_wait);
bch2_reset_alloc_cursors(c);
+ do_discards = true;
}
j->seq_ondisk = seq;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index cd6201741c59..0042d43b8e57 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
return (struct journal_space) { 0, 0 };
/*
+ * It's possible for bucket size to be misaligned w.r.t. the filesystem
+ * block size:
+ */
+ min_bucket_size = round_down(min_bucket_size, block_sectors(c));
+
+ /*
* We sorted largest to smallest, and we want the smallest out of the
* @nr_devs_want largest devices:
*/
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 6d7b1d5f7697..5e6de91a8763 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -28,7 +28,7 @@
#include <linux/wait.h>
struct buckets_in_flight {
- struct rhashtable table;
+ struct rhashtable *table;
struct move_bucket *first;
struct move_bucket *last;
size_t nr;
@@ -71,7 +71,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (ret)
return ret;
- struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode);
+ struct bch_dev *ca = bch2_dev_bucket_tryget(c, k.k->p);
if (!ca)
goto out;
@@ -98,7 +98,7 @@ out:
static void move_bucket_free(struct buckets_in_flight *list,
struct move_bucket *b)
{
- int ret = rhashtable_remove_fast(&list->table, &b->hash,
+ int ret = rhashtable_remove_fast(list->table, &b->hash,
bch_move_bucket_params);
BUG_ON(ret);
kfree(b);
@@ -133,7 +133,7 @@ static void move_buckets_wait(struct moving_context *ctxt,
static bool bucket_in_flight(struct buckets_in_flight *list,
struct move_bucket_key k)
{
- return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params);
+ return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params);
}
static int bch2_copygc_get_buckets(struct moving_context *ctxt,
@@ -185,7 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
goto err;
}
- ret2 = rhashtable_lookup_insert_fast(&buckets_in_flight->table, &b_i->hash,
+ ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash,
bch_move_bucket_params);
BUG_ON(ret2);
@@ -350,10 +350,13 @@ static int bch2_copygc_thread(void *arg)
struct buckets_in_flight buckets = {};
u64 last, wait;
- int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
+ buckets.table = kzalloc(sizeof(*buckets.table), GFP_KERNEL);
+ int ret = !buckets.table
+ ? -ENOMEM
+ : rhashtable_init(buckets.table, &bch_move_bucket_params);
bch_err_msg(c, ret, "allocating copygc buckets in flight");
if (ret)
- return ret;
+ goto err;
set_freezable();
@@ -421,11 +424,12 @@ static int bch2_copygc_thread(void *arg)
}
move_buckets_wait(&ctxt, &buckets, true);
- rhashtable_destroy(&buckets.table);
+ rhashtable_destroy(buckets.table);
bch2_moving_ctxt_exit(&ctxt);
bch2_move_stats_exit(&move_stats, c);
-
- return 0;
+err:
+ kfree(buckets.table);
+ return ret;
}
void bch2_copygc_stop(struct bch_fs *c)
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 24120037c031..c3f87c59922d 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}
+ if (S_ISDIR(mode)) {
+ ret = bch2_maybe_propagate_has_case_insensitive(trans,
+ (subvol_inum) {
+ new_inode->bi_subvol ?: dir.subvol,
+ new_inode->bi_inum },
+ new_inode);
+ if (ret)
+ goto err;
+ }
+
if (S_ISDIR(mode) &&
!new_inode->bi_subvol)
new_inode->bi_depth = dir_u->bi_depth + 1;
@@ -615,14 +625,26 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
{
unsigned orig_pos = path->pos;
int ret = 0;
+ DARRAY(subvol_inum) inums = {};
+
+ if (!snapshot) {
+ ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
+ if (ret)
+ goto disconnected;
+ }
while (true) {
- if (!snapshot) {
- ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
- if (ret)
- goto disconnected;
+ subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum };
+
+ if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) {
+ prt_str_reversed(path, "(loop)");
+ break;
}
+ ret = darray_push(&inums, n);
+ if (ret)
+ goto err;
+
struct bch_inode_unpacked inode;
ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
if (ret)
@@ -640,7 +662,9 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
inum = inode.bi_dir;
if (inode.bi_parent_subvol) {
subvol = inode.bi_parent_subvol;
- snapshot = 0;
+ ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot);
+ if (ret)
+ goto disconnected;
}
struct btree_iter d_iter;
@@ -652,6 +676,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans,
goto disconnected;
struct qstr dirent_name = bch2_dirent_get_name(d);
+
prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
prt_char(path, '/');
@@ -667,8 +692,10 @@ out:
goto err;
reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
+ darray_exit(&inums);
return 0;
err:
+ darray_exit(&inums);
return ret;
disconnected:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -707,8 +734,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
if (inode_points_to_dirent(target, d))
return 0;
- if (!target->bi_dir &&
- !target->bi_dir_offset) {
+ if (!bch2_inode_has_backpointer(target)) {
fsck_err_on(S_ISDIR(target->bi_mode),
trans, inode_dir_missing_backpointer,
"directory with missing backpointer\n%s",
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 2a02606254b3..63f8e254495c 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -234,6 +234,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_CASEFOLD, false, \
NULL, "Dirent lookups are casefolded") \
+ x(casefold_disabled, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Disable casefolding filesystem wide") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
@@ -379,6 +384,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Exit recovery immediately prior to journal replay")\
+ x(journal_rewind, u64, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_UINT(0, U64_MAX), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Rewind journal") \
x(recovery_passes, u64, \
OPT_FS|OPT_MOUNT, \
OPT_BITFIELD(bch2_recovery_passes), \
diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c
index bef2aa1b8bcd..b1438be9d690 100644
--- a/fs/bcachefs/rcu_pending.c
+++ b/fs/bcachefs/rcu_pending.c
@@ -182,11 +182,6 @@ static inline void kfree_bulk(size_t nr, void ** p)
while (nr--)
kfree(*p);
}
-
-#define local_irq_save(flags) \
-do { \
- flags = 0; \
-} while (0)
#endif
static noinline void __process_finished_items(struct rcu_pending *pending,
@@ -429,9 +424,15 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));
- local_irq_save(flags);
- p = this_cpu_ptr(pending->p);
- spin_lock(&p->lock);
+ /* We could technically be scheduled before taking the lock and end up
+ * using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock
+ * anyways
+ *
+ * And we have to do it this way to avoid breaking PREEMPT_RT, which
+ * redefines how spinlocks work:
+ */
+ p = raw_cpu_ptr(pending->p);
+ spin_lock_irqsave(&p->lock, flags);
rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu);
restart:
if (may_sleep &&
@@ -520,9 +521,8 @@ check_expired:
goto free_node;
}
- local_irq_save(flags);
- p = this_cpu_ptr(pending->p);
- spin_lock(&p->lock);
+ p = raw_cpu_ptr(pending->p);
+ spin_lock_irqsave(&p->lock, flags);
goto restart;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 1e68e61f08e8..c94debb12d2f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -99,9 +99,11 @@ int bch2_btree_lost_data(struct bch_fs *c,
goto out;
case BTREE_ID_snapshots:
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
default:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
goto out;
}
@@ -272,6 +274,28 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
struct btree_path *path = btree_iter_path(trans, &iter);
if (unlikely(!btree_path_node(path, k->level))) {
+ struct bch_fs *c = trans->c;
+
+ CLASS(printbuf, buf)();
+ prt_str(&buf, "btree=");
+ bch2_btree_id_to_text(&buf, k->btree_id);
+ prt_printf(&buf, " level=%u ", k->level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k));
+
+ if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
+ BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
+ bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s",
+ buf.buf);
+ ret = -EINVAL;
+ }
+
+ if (!k->allocated) {
+ bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s",
+ buf.buf);
+ k->overwritten = true;
+ goto out;
+ }
+
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
@@ -594,6 +618,7 @@ static int read_btree_roots(struct bch_fs *c)
buf.buf, bch2_err_str(ret))) {
if (btree_id_is_alloc(i))
r->error = 0;
+ ret = 0;
}
}
@@ -679,7 +704,7 @@ static bool check_version_upgrade(struct bch_fs *c)
ret = true;
}
- if (new_version > c->sb.version_incompat &&
+ if (new_version > c->sb.version_incompat_allowed &&
c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
struct printbuf buf = PRINTBUF;
@@ -739,7 +764,24 @@ int bch2_fs_recovery(struct bch_fs *c)
? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
: BCH_RECOVERY_PASS_snapshots_read;
c->opts.nochanges = true;
+ }
+
+ if (c->opts.nochanges)
c->opts.read_only = true;
+
+ if (c->opts.journal_rewind) {
+ bch_info(c, "rewinding journal, fsck required");
+ c->opts.fsck = true;
+ }
+
+ if (go_rw_in_recovery(c)) {
+ /*
+ * start workqueues/kworkers early - kthread creation checks for
+ * pending signals, which is _very_ annoying
+ */
+ ret = bch2_fs_init_rw(c);
+ if (ret)
+ goto err;
}
mutex_lock(&c->sb_lock);
@@ -950,7 +992,7 @@ use_clean:
ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
journal_seq, last_seq, blacklist_seq - 1) ?:
- bch2_fs_journal_start(&c->journal, journal_seq);
+ bch2_fs_journal_start(&c->journal, last_seq, journal_seq);
if (ret)
goto err;
@@ -1093,9 +1135,6 @@ use_clean:
out:
bch2_flush_fsck_errs(c);
- if (!IS_ERR(clean))
- kfree(clean);
-
if (!ret &&
test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
!c->opts.nochanges) {
@@ -1104,6 +1143,9 @@ out:
}
bch_err_fn(c, ret);
+final_out:
+ if (!IS_ERR(clean))
+ kfree(clean);
return ret;
err:
fsck_err:
@@ -1111,13 +1153,13 @@ fsck_err:
struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
- prt_printf(&buf, "error in recovery: %s", bch2_err_str(ret));
+ prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret));
bch2_fs_emergency_read_only2(c, &buf);
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
- return ret;
+ goto final_out;
}
int bch2_fs_initialize(struct bch_fs *c)
@@ -1166,7 +1208,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- ret = bch2_fs_journal_start(&c->journal, 1);
+ ret = bch2_fs_journal_start(&c->journal, 1, 1);
if (ret)
goto err;
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 605588e33fb3..6a039e011064 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -217,11 +217,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
set_bit(BCH_FS_may_go_rw, &c->flags);
- if (keys->nr ||
- !c->opts.read_only ||
- !c->sb.clean ||
- c->opts.recovery_passes ||
- (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) {
+ if (go_rw_in_recovery(c)) {
if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
bch2_reconstruct_alloc(c);
@@ -294,8 +290,13 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
enum bch_run_recovery_pass_flags *flags)
{
struct bch_fs_recovery *r = &c->recovery;
- bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
- bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
+
+ /*
+ * Never run scan_for_btree_nodes persistently: check_topology will run
+ * it if required
+ */
+ if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes)
+ *flags |= RUN_RECOVERY_PASS_nopersistent;
if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
!bch2_recovery_pass_want_ratelimit(c, pass))
@@ -310,6 +311,11 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
* Otherwise, we run run_explicit_recovery_pass when we find damage, so
* it should run again even if it's already run:
*/
+ bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
+ bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
+ bool rewind = in_recovery &&
+ r->curr_pass > pass &&
+ !(r->passes_complete & BIT_ULL(pass));
if (persistent
? !(c->sb.recovery_passes_required & BIT_ULL(pass))
@@ -320,6 +326,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
(r->passes_ratelimiting & BIT_ULL(pass)))
return true;
+ if (rewind)
+ return true;
+
return false;
}
@@ -351,7 +360,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
!(r->passes_complete & BIT_ULL(pass));
bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
- if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) {
+ if (!(flags & RUN_RECOVERY_PASS_nopersistent)) {
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
}
@@ -404,10 +413,8 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
{
int ret = 0;
- scoped_guard(mutex, &c->sb_lock) {
- if (!recovery_pass_needs_set(c, pass, &flags))
- return 0;
-
+ if (recovery_pass_needs_set(c, pass, &flags)) {
+ guard(mutex)(&c->sb_lock);
ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
bch2_write_super(c);
}
@@ -446,7 +453,7 @@ int bch2_require_recovery_pass(struct bch_fs *c,
int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
{
- enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent;
+ enum bch_run_recovery_pass_flags flags = 0;
if (!recovery_pass_needs_set(c, pass, &flags))
return 0;
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
index 260571c7105e..2117f0ce1922 100644
--- a/fs/bcachefs/recovery_passes.h
+++ b/fs/bcachefs/recovery_passes.h
@@ -17,6 +17,15 @@ enum bch_run_recovery_pass_flags {
RUN_RECOVERY_PASS_ratelimit = BIT(1),
};
+static inline bool go_rw_in_recovery(struct bch_fs *c)
+{
+ return (c->journal_keys.nr ||
+ !c->opts.read_only ||
+ !c->sb.clean ||
+ c->opts.recovery_passes ||
+ (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))));
+}
+
int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index a535abd44df3..92b90cfe622b 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -64,6 +64,9 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
REFLINK_P_IDX(p.v),
le32_to_cpu(p.v->front_pad),
le32_to_cpu(p.v->back_pad));
+
+ if (REFLINK_P_ERROR(p.v))
+ prt_str(out, " error");
}
bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
@@ -269,13 +272,12 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
return k;
if (unlikely(!bkey_extent_is_reflink_data(k.k))) {
- unsigned size = min((u64) k.k->size,
- REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) -
- reflink_offset);
- bch2_key_resize(&iter->k, size);
+ u64 missing_end = min(k.k->p.offset,
+ REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad));
+ BUG_ON(reflink_offset == missing_end);
int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset,
- k.k->p.offset, should_commit);
+ missing_end, should_commit);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return bkey_s_c_err(ret);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index b61f88450a6d..1506d05e0665 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -253,6 +253,7 @@ DOWNGRADE_TABLE()
static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
{
+ unsigned dst_offset = table->nr;
struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table);
unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
int ret = 0;
@@ -268,6 +269,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
if (ret)
return ret;
+ dst = (void *) &table->data[dst_offset];
+ dst->nr_errors = cpu_to_le16(nr_errors + 1);
+
/* open coded __set_bit_le64, as dst is packed and
* dst->recovery_passes is misaligned */
unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations;
@@ -278,7 +282,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
break;
}
- dst->nr_errors = cpu_to_le16(nr_errors);
return ret;
}
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 6fdbf265e4c0..d154b7651d28 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -3,9 +3,10 @@
#define _BCACHEFS_SB_ERRORS_FORMAT_H
enum bch_fsck_flags {
- FSCK_CAN_FIX = 1 << 0,
- FSCK_CAN_IGNORE = 1 << 1,
- FSCK_AUTOFIX = 1 << 2,
+ FSCK_CAN_FIX = BIT(0),
+ FSCK_CAN_IGNORE = BIT(1),
+ FSCK_AUTOFIX = BIT(2),
+ FSCK_ERR_NO_LOG = BIT(3),
};
#define BCH_SB_ERRS() \
@@ -134,7 +135,7 @@ enum bch_fsck_flags {
x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \
x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
- x(need_discard_freespace_key_bad, 124, 0) \
+ x(need_discard_freespace_key_bad, 124, FSCK_AUTOFIX) \
x(discarding_bucket_not_in_need_discard_btree, 291, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_level_bad, 294, 0) \
@@ -165,7 +166,7 @@ enum bch_fsck_flags {
x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
- x(ptr_gen_newer_than_bucket_gen, 152, 0) \
+ x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \
x(ptr_too_stale, 153, 0) \
x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \
x(ptr_bucket_data_type_mismatch, 155, 0) \
@@ -217,7 +218,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
- x(snapshot_key_missing_inode_snapshot, 314, 0) \
+ x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
@@ -236,7 +237,7 @@ enum bch_fsck_flags {
x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
- x(inode_has_child_snapshots_wrong, 287, 0) \
+ x(inode_has_child_snapshots_wrong, 287, FSCK_AUTOFIX) \
x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
@@ -251,20 +252,20 @@ enum bch_fsck_flags {
x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \
x(deleted_inode_has_child_snapshots, 288, FSCK_AUTOFIX) \
x(extent_overlapping, 215, 0) \
- x(key_in_missing_inode, 216, 0) \
+ x(key_in_missing_inode, 216, FSCK_AUTOFIX) \
x(key_in_wrong_inode_type, 217, 0) \
- x(extent_past_end_of_inode, 218, 0) \
+ x(extent_past_end_of_inode, 218, FSCK_AUTOFIX) \
x(dirent_empty_name, 219, 0) \
x(dirent_val_too_big, 220, 0) \
x(dirent_name_too_long, 221, 0) \
x(dirent_name_embedded_nul, 222, 0) \
x(dirent_name_dot_or_dotdot, 223, 0) \
x(dirent_name_has_slash, 224, 0) \
- x(dirent_d_type_wrong, 225, 0) \
+ x(dirent_d_type_wrong, 225, FSCK_AUTOFIX) \
x(inode_bi_parent_wrong, 226, 0) \
x(dirent_in_missing_dir_inode, 227, 0) \
x(dirent_in_non_dir_inode, 228, 0) \
- x(dirent_to_missing_inode, 229, 0) \
+ x(dirent_to_missing_inode, 229, FSCK_AUTOFIX) \
x(dirent_to_overwritten_inode, 302, 0) \
x(dirent_to_missing_subvol, 230, 0) \
x(dirent_to_itself, 231, 0) \
@@ -279,8 +280,8 @@ enum bch_fsck_flags {
x(root_dir_missing, 239, 0) \
x(root_inode_not_dir, 240, 0) \
x(dir_loop, 241, 0) \
- x(hash_table_key_duplicate, 242, 0) \
- x(hash_table_key_wrong_offset, 243, 0) \
+ x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \
+ x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \
x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \
x(reflink_p_front_pad_bad, 245, 0) \
x(journal_entry_dup_same_device, 246, 0) \
@@ -300,7 +301,7 @@ enum bch_fsck_flags {
x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \
- x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
x(snapshot_node_missing, 264, FSCK_AUTOFIX) \
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \
@@ -313,7 +314,7 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
x(accounting_to_invalid_device, 289, 0) \
- x(invalid_btree_id, 274, 0) \
+ x(invalid_btree_id, 274, FSCK_AUTOFIX) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 363eb0c6eb7c..6245e342a8a8 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
{
struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
- unsigned i;
- for (i = 0; i < sb->nr_devices; i++)
+ if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) {
+ prt_printf(out, "field ends before start of entries");
+ return;
+ }
+
+ unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]);
+ if (nr != sb->nr_devices)
+ prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices);
+
+ for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
member_to_text(out, members_v1_get(mi, i), gi, sb, i);
}
@@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
{
struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
- unsigned i;
- for (i = 0; i < sb->nr_devices; i++)
+ if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) {
+ prt_printf(out, "field ends before start of entries");
+ return;
+ }
+
+ if (!le16_to_cpu(mi->member_bytes)) {
+ prt_printf(out, "member_bytes 0");
+ return;
+ }
+
+ unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes);
+ if (nr != sb->nr_devices)
+ prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices);
+
+ /*
+ * We call to_text() on superblock sections that haven't passed
+ * validate, so we can't trust sb->nr_devices.
+ */
+
+ for (unsigned i = 0; i < min(sb->nr_devices, nr); i++)
member_to_text(out, members_v2_get(mi, i), gi, sb, i);
}
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 23a332d76b32..4c43d2a2c1f5 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -135,7 +135,9 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
- bool ret;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ u32 orig_id = id;
+#endif
guard(rcu)();
struct snapshot_table *t = rcu_dereference(c->snapshots);
@@ -147,11 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- ret = id && id < ancestor
+ bool ret = id && id < ancestor
? test_ancestor_bitmap(t, id, ancestor)
: id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor));
return ret;
}
@@ -869,7 +871,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN,
0, k, ret) {
- if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
+ if (k.k->type == KEY_TYPE_snapshot_tree &&
+ le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) {
tree_id = k.k->p.offset;
break;
}
@@ -897,7 +900,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
0, k, ret) {
- if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
+ if (k.k->type == KEY_TYPE_subvolume &&
+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) {
snapshot->v.subvol = cpu_to_le32(k.k->p.offset);
SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true);
break;
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index 71b735a85026..3e9f59226bdf 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -38,6 +38,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
struct bkey_s_c_dirent old,
bool *updated_before_k_pos)
{
+ struct bch_fs *c = trans->c;
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
int ret = PTR_ERR_OR_ZERO(new);
@@ -60,7 +61,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
old_name.len, old_name.name, i));
- ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL);
+ ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL);
if (ret)
return ret;
@@ -79,7 +80,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans,
}
ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
- bch_err_fn(trans->c, ret);
+ bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 79d51aef70aa..8979ac2d7a3b 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
-#ifdef CONFIG_UNICODE
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
-#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 397a69da5a75..c46b1053a02c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -104,7 +104,7 @@ const char * const bch2_dev_write_refs[] = {
#undef x
static void __bch2_print_str(struct bch_fs *c, const char *prefix,
- const char *str, bool nonblocking)
+ const char *str)
{
#ifdef __KERNEL__
struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
@@ -114,17 +114,12 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix,
return;
}
#endif
- bch2_print_string_as_lines(KERN_ERR, str, nonblocking);
+ bch2_print_string_as_lines(KERN_ERR, str);
}
void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str)
{
- __bch2_print_str(c, prefix, str, false);
-}
-
-void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str)
-{
- __bch2_print_str(c, prefix, str, true);
+ __bch2_print_str(c, prefix, str);
}
__printf(2, 0)
@@ -215,7 +210,6 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned);
static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
-static int bch2_fs_init_rw(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -799,7 +793,7 @@ err:
return ret;
}
-static int bch2_fs_init_rw(struct bch_fs *c)
+int bch2_fs_init_rw(struct bch_fs *c)
{
if (test_bit(BCH_FS_rw_init_done, &c->flags))
return 0;
@@ -1020,16 +1014,28 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
if (ret)
goto err;
+ if (go_rw_in_recovery(c)) {
+ /*
+ * start workqueues/kworkers early - kthread creation checks for
+ * pending signals, which is _very_ annoying
+ */
+ ret = bch2_fs_init_rw(c);
+ if (ret)
+ goto err;
+ }
+
#ifdef CONFIG_UNICODE
- /* Default encoding until we can potentially have more as an option. */
- c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
- if (IS_ERR(c->cf_encoding)) {
- printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
- ret = -EINVAL;
- goto err;
+ if (bch2_fs_casefold_enabled(c)) {
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
}
#else
if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) {
@@ -1072,12 +1078,13 @@ noinline_for_stack
static void print_mount_opts(struct bch_fs *c)
{
enum bch_opt_id i;
- struct printbuf p = PRINTBUF;
- bool first = true;
+ CLASS(printbuf, p)();
+ bch2_log_msg_start(c, &p);
prt_str(&p, "starting version ");
bch2_version_to_text(&p, c->sb.version);
+ bool first = true;
for (i = 0; i < bch2_opts_nr; i++) {
const struct bch_option *opt = &bch2_opt_table[i];
u64 v = bch2_opt_get_by_id(&c->opts, i);
@@ -1094,17 +1101,24 @@ static void print_mount_opts(struct bch_fs *c)
}
if (c->sb.version_incompat_allowed != c->sb.version) {
- prt_printf(&p, "\n allowing incompatible features above ");
+ prt_printf(&p, "\nallowing incompatible features above ");
bch2_version_to_text(&p, c->sb.version_incompat_allowed);
}
if (c->opts.verbose) {
- prt_printf(&p, "\n features: ");
+ prt_printf(&p, "\nfeatures: ");
prt_bitflags(&p, bch2_sb_features, c->sb.features);
}
- bch_info(c, "%s", p.buf);
- printbuf_exit(&p);
+ if (c->sb.multi_device) {
+ prt_printf(&p, "\nwith devices");
+ for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) {
+ prt_char(&p, ' ');
+ prt_str(&p, ca->name);
+ }
+ }
+
+ bch2_print_str(c, KERN_INFO, p.buf);
}
static bool bch2_fs_may_start(struct bch_fs *c)
@@ -1148,12 +1162,11 @@ int bch2_fs_start(struct bch_fs *c)
print_mount_opts(c);
-#ifdef CONFIG_UNICODE
- bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
- unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
- unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
-#endif
+ if (c->cf_encoding)
+ bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
if (!bch2_fs_may_start(c))
return bch_err_throw(c, insufficient_devices_to_start);
@@ -1995,6 +2008,22 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_late;
}
+ /*
+ * We just changed the superblock UUID, invalidate cache and send a
+ * uevent to update /dev/disk/by-uuid
+ */
+ invalidate_bdev(ca->disk_sb.bdev);
+
+ char uuid_str[37];
+ snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid);
+
+ char *envp[] = {
+ "CHANGE=uuid",
+ uuid_str,
+ NULL,
+ };
+ kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp);
+
up_write(&c->state_lock);
out:
printbuf_exit(&label);
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index dc52f06cb2b9..e90bab9afe78 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -46,6 +46,7 @@ void __bch2_fs_stop(struct bch_fs *);
void bch2_fs_free(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
+int bch2_fs_init_rw(struct bch_fs *);
int bch2_fs_start(struct bch_fs *);
struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *);
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index dc09532796af..9c5a9c551f03 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -1080,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim,
__entry->must_wait)
);
-TRACE_EVENT(trans_restart_journal_preres_get,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- unsigned flags),
- TP_ARGS(trans, caller_ip, flags),
-
- TP_STRUCT__entry(
- __array(char, trans_fn, 32 )
- __field(unsigned long, caller_ip )
- __field(unsigned, flags )
- ),
-
- TP_fast_assign(
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->caller_ip = caller_ip;
- __entry->flags = flags;
- ),
-
- TP_printk("%s %pS %x", __entry->trans_fn,
- (void *) __entry->caller_ip,
- __entry->flags)
-);
-
+#if 0
+/* todo: bring back dynamic fault injection */
DEFINE_EVENT(transaction_event, trans_restart_fault_inject,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
);
+#endif
DEFINE_EVENT(transaction_event, trans_traverse_all,
TP_PROTO(struct btree_trans *trans,
@@ -1195,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill,
TP_ARGS(trans, caller_ip, path)
);
-DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path),
- TP_ARGS(trans, caller_ip, path)
-);
-
-DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip),
- TP_ARGS(trans, caller_ip)
-);
-
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
@@ -1229,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent,
TP_ARGS(trans, caller_ip, path)
);
-DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path),
- TP_ARGS(trans, caller_ip, path)
-);
-
DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
@@ -1294,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced,
__entry->bytes)
);
-TRACE_EVENT(trans_restart_key_cache_key_realloced,
- TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
- struct btree_path *path,
- unsigned old_u64s,
- unsigned new_u64s),
- TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
-
- TP_STRUCT__entry(
- __array(char, trans_fn, 32 )
- __field(unsigned long, caller_ip )
- __field(enum btree_id, btree_id )
- TRACE_BPOS_entries(pos)
- __field(u32, old_u64s )
- __field(u32, new_u64s )
- ),
-
- TP_fast_assign(
- strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
- __entry->caller_ip = caller_ip;
-
- __entry->btree_id = path->btree_id;
- TRACE_BPOS_assign(pos, path->pos);
- __entry->old_u64s = old_u64s;
- __entry->new_u64s = new_u64s;
- ),
-
- TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
- __entry->trans_fn,
- (void *) __entry->caller_ip,
- bch2_btree_id_str(__entry->btree_id),
- __entry->pos_inode,
- __entry->pos_offset,
- __entry->pos_snapshot,
- __entry->old_u64s,
- __entry->new_u64s)
-);
-
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),
@@ -1490,6 +1412,31 @@ DEFINE_EVENT(fs_str, io_move_evacuate_bucket,
TP_ARGS(c, str)
);
+DEFINE_EVENT(fs_str, extent_trim_atomic,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_slot,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, __btree_iter_peek,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_max,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, btree_iter_peek_prev_min,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
#ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS
TRACE_EVENT(update_by_path,
@@ -1902,21 +1849,6 @@ TRACE_EVENT(btree_path_free,
__entry->dup_locked)
);
-TRACE_EVENT(btree_path_free_trans_begin,
- TP_PROTO(btree_path_idx_t path),
- TP_ARGS(path),
-
- TP_STRUCT__entry(
- __field(btree_path_idx_t, idx )
- ),
-
- TP_fast_assign(
- __entry->idx = path;
- ),
-
- TP_printk(" path %3u", __entry->idx)
-);
-
#else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */
#ifndef _TRACE_BCACHEFS_H
@@ -1934,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st
static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {}
static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {}
static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {}
-static inline void trace_btree_path_free_trans_begin(btree_path_idx_t path) {}
#endif
#endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index dc3817f545fa..df9a6071fe18 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -262,8 +262,7 @@ static bool string_is_spaces(const char *str)
return true;
}
-void bch2_print_string_as_lines(const char *prefix, const char *lines,
- bool nonblocking)
+void bch2_print_string_as_lines(const char *prefix, const char *lines)
{
bool locked = false;
const char *p;
@@ -273,12 +272,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines,
return;
}
- if (!nonblocking) {
- console_lock();
- locked = true;
- } else {
- locked = console_trylock();
- }
+ locked = console_trylock();
while (*lines) {
p = strchrnul(lines, '\n');
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 0a4b1d433621..6488f098d140 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -214,7 +214,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]);
void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
void bch2_prt_u64_base2(struct printbuf *, u64);
-void bch2_print_string_as_lines(const char *, const char *, bool);
+void bch2_print_string_as_lines(const char *, const char *);
typedef DARRAY(unsigned long) bch_stacktrace;
int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index fa66a09e496a..10dc0151ea55 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -170,9 +170,10 @@ static void bfs_write_failed(struct address_space *mapping, loff_t to)
truncate_pagecache(inode, inode->i_size);
}
-static int bfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int bfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 432fbf4fc334..a839f960cd4a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -675,44 +675,6 @@ static void bm_evict_inode(struct inode *inode)
}
/**
- * unlink_binfmt_dentry - remove the dentry for the binary type handler
- * @dentry: dentry associated with the binary type handler
- *
- * Do the actual filesystem work to remove a dentry for a registered binary
- * type handler. Since binfmt_misc only allows simple files to be created
- * directly under the root dentry of the filesystem we ensure that we are
- * indeed passed a dentry directly beneath the root dentry, that the inode
- * associated with the root dentry is locked, and that it is a regular file we
- * are asked to remove.
- */
-static void unlink_binfmt_dentry(struct dentry *dentry)
-{
- struct dentry *parent = dentry->d_parent;
- struct inode *inode, *parent_inode;
-
- /* All entries are immediate descendants of the root dentry. */
- if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
- return;
-
- /* We only expect to be called on regular files. */
- inode = d_inode(dentry);
- if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
- return;
-
- /* The parent inode must be locked. */
- parent_inode = d_inode(parent);
- if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
- return;
-
- if (simple_positive(dentry)) {
- dget(dentry);
- simple_unlink(parent_inode, dentry);
- d_delete(dentry);
- dput(dentry);
- }
-}
-
-/**
* remove_binfmt_handler - remove a binary type handler
* @misc: handle to binfmt_misc instance
* @e: binary type handler to remove
@@ -729,7 +691,7 @@ static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e)
write_lock(&misc->entries_lock);
list_del_init(&e->list);
write_unlock(&misc->entries_lock);
- unlink_binfmt_dentry(e->dentry);
+ locked_recursive_removal(e->dentry, NULL);
}
/* /<entry> */
@@ -772,7 +734,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
case 3:
/* Delete this handler. */
inode = d_inode(inode->i_sb->s_root);
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_PARENT);
/*
* In order to add new element or remove elements from the list
@@ -922,7 +884,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
case 3:
/* Delete all handlers. */
inode = d_inode(file_inode(file)->i_sb->s_root);
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_PARENT);
/*
* In order to add new element or remove elements from the list
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index c352f3ae0385..ea95c90c8474 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -114,6 +114,8 @@ config BTRFS_EXPERIMENTAL
- extent tree v2 - complex rework of extent tracking
+ - large folio support
+
If unsure, say N.
config BTRFS_FS_REF_VERIFY
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
index e3716516ca38..861c7d92c437 100644
--- a/fs/btrfs/accessors.c
+++ b/fs/btrfs/accessors.c
@@ -9,27 +9,24 @@
#include "fs.h"
#include "accessors.h"
-static bool check_setget_bounds(const struct extent_buffer *eb,
- const void *ptr, unsigned off, int size)
+static void __cold report_setget_bounds(const struct extent_buffer *eb,
+ const void *ptr, unsigned off, int size)
{
- const unsigned long member_offset = (unsigned long)ptr + off;
+ unsigned long member_offset = (unsigned long)ptr + off;
- if (unlikely(member_offset + size > eb->len)) {
- btrfs_warn(eb->fs_info,
- "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d",
- (member_offset > eb->len ? "start" : "end"),
- (unsigned long)ptr, eb->start, member_offset, size);
- return false;
- }
-
- return true;
+ btrfs_warn(eb->fs_info,
+ "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d",
+ (member_offset > eb->len ? "start" : "end"),
+ (unsigned long)ptr, eb->start, member_offset, size);
}
-void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb)
+/* Copy bytes from @src1 and @src2 to @dest. */
+static __always_inline void memcpy_split_src(char *dest, const char *src1,
+ const char *src2, const size_t len1,
+ const size_t total)
{
- token->eb = eb;
- token->kaddr = folio_address(eb->folios[0]);
- token->offset = 0;
+ memcpy(dest, src1, len1);
+ memcpy(dest + len1, src2, total - len1);
}
/*
@@ -41,11 +38,6 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
* - btrfs_set_8 (for 8/16/32/64)
* - btrfs_get_8 (for 8/16/32/64)
*
- * Generic helpers with a token (cached address of the most recently accessed
- * page):
- * - btrfs_set_token_8 (for 8/16/32/64)
- * - btrfs_get_token_8 (for 8/16/32/64)
- *
* The set/get functions handle data spanning two pages transparently, in case
* metadata block size is larger than page. Every pointer to metadata items is
* an offset into the extent buffer page array, cast to a specific type. This
@@ -57,118 +49,66 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
- const void *ptr, unsigned long off) \
-{ \
- const unsigned long member_offset = (unsigned long)ptr + off; \
- const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
- const unsigned long oil = get_eb_offset_in_folio(token->eb, \
- member_offset);\
- const int unit_size = token->eb->folio_size; \
- const int unit_shift = token->eb->folio_shift; \
- const int size = sizeof(u##bits); \
- u8 lebytes[sizeof(u##bits)]; \
- const int part = unit_size - oil; \
- \
- ASSERT(token); \
- ASSERT(token->kaddr); \
- ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
- if (token->offset <= member_offset && \
- member_offset + size <= token->offset + unit_size) { \
- return get_unaligned_le##bits(token->kaddr + oil); \
- } \
- token->kaddr = folio_address(token->eb->folios[idx]); \
- token->offset = idx << unit_shift; \
- if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \
- return get_unaligned_le##bits(token->kaddr + oil); \
- \
- memcpy(lebytes, token->kaddr + oil, part); \
- token->kaddr = folio_address(token->eb->folios[idx + 1]); \
- token->offset = (idx + 1) << unit_shift; \
- memcpy(lebytes + part, token->kaddr, size - part); \
- return get_unaligned_le##bits(lebytes); \
-} \
u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
- const unsigned long oil = get_eb_offset_in_folio(eb, \
- member_offset);\
- const int unit_size = eb->folio_size; \
- char *kaddr = folio_address(eb->folios[idx]); \
- const int size = sizeof(u##bits); \
- const int part = unit_size - oil; \
- u8 lebytes[sizeof(u##bits)]; \
- \
- ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \
- return get_unaligned_le##bits(kaddr + oil); \
- \
- memcpy(lebytes, kaddr + oil, part); \
- kaddr = folio_address(eb->folios[idx + 1]); \
- memcpy(lebytes + part, kaddr, size - part); \
- return get_unaligned_le##bits(lebytes); \
-} \
-void btrfs_set_token_##bits(struct btrfs_map_token *token, \
- const void *ptr, unsigned long off, \
- u##bits val) \
-{ \
- const unsigned long member_offset = (unsigned long)ptr + off; \
- const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
- const unsigned long oil = get_eb_offset_in_folio(token->eb, \
+ const unsigned long oif = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = token->eb->folio_size; \
- const int unit_shift = token->eb->folio_shift; \
- const int size = sizeof(u##bits); \
+ char *kaddr = folio_address(eb->folios[idx]) + oif; \
+ const int part = eb->folio_size - oif; \
u8 lebytes[sizeof(u##bits)]; \
- const int part = unit_size - oil; \
\
- ASSERT(token); \
- ASSERT(token->kaddr); \
- ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
- if (token->offset <= member_offset && \
- member_offset + size <= token->offset + unit_size) { \
- put_unaligned_le##bits(val, token->kaddr + oil); \
- return; \
+ if (unlikely(member_offset + sizeof(u##bits) > eb->len)) { \
+ report_setget_bounds(eb, ptr, off, sizeof(u##bits)); \
+ return 0; \
} \
- token->kaddr = folio_address(token->eb->folios[idx]); \
- token->offset = idx << unit_shift; \
- if (INLINE_EXTENT_BUFFER_PAGES == 1 || \
- oil + size <= unit_size) { \
- put_unaligned_le##bits(val, token->kaddr + oil); \
- return; \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || sizeof(u##bits) == 1 || \
+ likely(sizeof(u##bits) <= part)) \
+ return get_unaligned_le##bits(kaddr); \
+ \
+ if (sizeof(u##bits) == 2) { \
+ lebytes[0] = *kaddr; \
+ kaddr = folio_address(eb->folios[idx + 1]); \
+ lebytes[1] = *kaddr; \
+ } else { \
+ memcpy_split_src(lebytes, kaddr, \
+ folio_address(eb->folios[idx + 1]), \
+ part, sizeof(u##bits)); \
} \
- put_unaligned_le##bits(val, lebytes); \
- memcpy(token->kaddr + oil, lebytes, part); \
- token->kaddr = folio_address(token->eb->folios[idx + 1]); \
- token->offset = (idx + 1) << unit_shift; \
- memcpy(token->kaddr, lebytes + part, size - part); \
+ return get_unaligned_le##bits(lebytes); \
} \
void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
- const unsigned long oil = get_eb_offset_in_folio(eb, \
+ const unsigned long oif = get_eb_offset_in_folio(eb, \
member_offset);\
- const int unit_size = eb->folio_size; \
- char *kaddr = folio_address(eb->folios[idx]); \
- const int size = sizeof(u##bits); \
- const int part = unit_size - oil; \
+ char *kaddr = folio_address(eb->folios[idx]) + oif; \
+ const int part = eb->folio_size - oif; \
u8 lebytes[sizeof(u##bits)]; \
\
- ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (INLINE_EXTENT_BUFFER_PAGES == 1 || \
- oil + size <= unit_size) { \
- put_unaligned_le##bits(val, kaddr + oil); \
+ if (unlikely(member_offset + sizeof(u##bits) > eb->len)) { \
+ report_setget_bounds(eb, ptr, off, sizeof(u##bits)); \
+ return; \
+ } \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || sizeof(u##bits) == 1 || \
+ likely(sizeof(u##bits) <= part)) { \
+ put_unaligned_le##bits(val, kaddr); \
return; \
} \
- \
put_unaligned_le##bits(val, lebytes); \
- memcpy(kaddr + oil, lebytes, part); \
- kaddr = folio_address(eb->folios[idx + 1]); \
- memcpy(kaddr, lebytes + part, size - part); \
+ if (sizeof(u##bits) == 2) { \
+ *kaddr = lebytes[0]; \
+ kaddr = folio_address(eb->folios[idx + 1]); \
+ *kaddr = lebytes[1]; \
+ } else { \
+ memcpy(kaddr, lebytes, part); \
+ kaddr = folio_address(eb->folios[idx + 1]); \
+ memcpy(kaddr, lebytes + part, sizeof(u##bits) - part); \
+ } \
}
DEFINE_BTRFS_SETGET_BITS(8)
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index 15ea6348800b..99b3ced12805 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -16,14 +16,6 @@
struct extent_buffer;
-struct btrfs_map_token {
- struct extent_buffer *eb;
- char *kaddr;
- unsigned long offset;
-};
-
-void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb);
-
/*
* Some macros to generate set/get functions for the struct fields. This
* assumes there is a lefoo_to_cpu for every type, so lets make a simple one
@@ -56,11 +48,6 @@ static inline void put_unaligned_le8(u8 val, void *p)
sizeof_field(type, member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
- const void *ptr, unsigned long off); \
-void btrfs_set_token_##bits(struct btrfs_map_token *token, \
- const void *ptr, unsigned long off, \
- u##bits val); \
u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off); \
void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
@@ -83,18 +70,6 @@ static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
{ \
static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
-} \
-static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
- const type *s) \
-{ \
- static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
- return btrfs_get_token_##bits(token, s, offsetof(type, member));\
-} \
-static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
- type *s, u##bits val) \
-{ \
- static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
- btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
@@ -479,18 +454,6 @@ static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \
int slot, u32 val) \
{ \
btrfs_set_raw_item_##member(eb, btrfs_item_nr(eb, slot), val); \
-} \
-static inline u32 btrfs_token_item_##member(struct btrfs_map_token *token, \
- int slot) \
-{ \
- struct btrfs_item *item = btrfs_item_nr(token->eb, slot); \
- return btrfs_token_raw_item_##member(token, item); \
-} \
-static inline void btrfs_set_token_item_##member(struct btrfs_map_token *token, \
- int slot, u32 val) \
-{ \
- struct btrfs_item *item = btrfs_item_nr(token->eb, slot); \
- btrfs_set_token_raw_item_##member(token, item, val); \
}
BTRFS_ITEM_SETGET_FUNCS(offset)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ed497f5f8d1b..6a450be293b1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -733,7 +733,6 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
struct preftrees *preftrees,
struct share_check *sc)
{
- int err;
int ret = 0;
struct ulist *parents;
struct ulist_node *node;
@@ -752,6 +751,7 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
*/
while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
struct prelim_ref *ref;
+ int ret2;
ref = rb_entry(rnode, struct prelim_ref, rbnode);
if (WARN(ref->parent,
@@ -773,18 +773,18 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
ret = BACKREF_FOUND_SHARED;
goto out;
}
- err = resolve_indirect_ref(ctx, path, preftrees, ref, parents);
+ ret2 = resolve_indirect_ref(ctx, path, preftrees, ref, parents);
/*
* we can only tolerate ENOENT,otherwise,we should catch error
* and return directly.
*/
- if (err == -ENOENT) {
+ if (ret2 == -ENOENT) {
prelim_ref_insert(ctx->fs_info, &preftrees->direct, ref,
NULL);
continue;
- } else if (err) {
+ } else if (ret2) {
free_pref(ref);
- ret = err;
+ ret = ret2;
goto out;
}
@@ -2201,7 +2201,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
int ret;
u64 flags;
u64 size = 0;
- u32 item_size;
const struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct btrfs_key key;
@@ -2244,7 +2243,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
}
eb = path->nodes[0];
- item_size = btrfs_item_size(eb, path->slots[0]);
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
@@ -2252,7 +2250,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
btrfs_debug(fs_info,
"logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u",
logical, logical - found_key->objectid, found_key->objectid,
- found_key->offset, flags, item_size);
+ found_key->offset, flags, btrfs_item_size(eb, path->slots[0]));
WARN_ON(!flags_ret);
if (flags_ret) {
@@ -2548,17 +2546,20 @@ static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *c
}
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
void *ctx, bool ignore_offset)
{
struct btrfs_backref_walk_ctx walk_ctx = { 0 };
int ret;
u64 flags = 0;
struct btrfs_key found_key;
- int search_commit_root = path->search_commit_root;
+ struct btrfs_path *path;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
ret = extent_from_logical(fs_info, logical, path, &found_key, &flags);
- btrfs_release_path(path);
+ btrfs_free_path(path);
if (ret < 0)
return ret;
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
@@ -2571,8 +2572,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
walk_ctx.extent_item_pos = logical - found_key.objectid;
walk_ctx.fs_info = fs_info;
- return iterate_extent_inodes(&walk_ctx, search_commit_root,
- build_ino_list, ctx);
+ return iterate_extent_inodes(&walk_ctx, false, build_ino_list, ctx);
}
static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
@@ -3161,18 +3161,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
ASSERT(!cache->nr_edges);
}
-void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
- struct btrfs_backref_node *lower,
- struct btrfs_backref_node *upper,
- int link_which)
+static void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper)
{
ASSERT(upper && lower && upper->level == lower->level + 1);
edge->node[LOWER] = lower;
edge->node[UPPER] = upper;
- if (link_which & LINK_LOWER)
- list_add_tail(&edge->list[LOWER], &lower->upper);
- if (link_which & LINK_UPPER)
- list_add_tail(&edge->list[UPPER], &upper->lower);
+ list_add_tail(&edge->list[LOWER], &lower->upper);
}
/*
* Handle direct tree backref
@@ -3242,7 +3238,7 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
ASSERT(upper->checked);
INIT_LIST_HEAD(&edge->list[UPPER]);
}
- btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
+ btrfs_backref_link_edge(edge, cur, upper);
return 0;
}
@@ -3412,7 +3408,7 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
if (!upper->owner)
upper->owner = btrfs_header_owner(eb);
}
- btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
+ btrfs_backref_link_edge(edge, lower, upper);
if (rb_node) {
btrfs_put_root(root);
@@ -3570,7 +3566,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
ASSERT(start->checked);
- rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, &start->rb_node);
+ rb_node = rb_simple_insert(&cache->rb_root, &start->simple_node);
if (rb_node)
btrfs_backref_panic(cache->fs_info, start->bytenr, -EEXIST);
@@ -3621,8 +3617,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
return -EUCLEAN;
}
- rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
- &upper->rb_node);
+ rb_node = rb_simple_insert(&cache->rb_root, &upper->simple_node);
if (unlikely(rb_node)) {
btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
return -EUCLEAN;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 953637115956..34b0193a181c 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -226,8 +226,7 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx,
iterate_extent_inodes_t *iterate, void *user_ctx);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, void *ctx,
- bool ignore_offset);
+ void *ctx, bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
@@ -313,10 +312,15 @@ int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
* Represent a tree block in the backref cache
*/
struct btrfs_backref_node {
- struct {
- struct rb_node rb_node;
- u64 bytenr;
- }; /* Use rb_simple_node for search/insert */
+ union{
+ /* Use rb_simple_node for search/insert */
+ struct {
+ struct rb_node rb_node;
+ u64 bytenr;
+ };
+
+ struct rb_simple_node simple_node;
+ };
/*
* This is a sanity check, whenever we COW a block we will update
@@ -423,13 +427,6 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache);
-#define LINK_LOWER (1U << 0)
-#define LINK_UPPER (1U << 1)
-
-void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
- struct btrfs_backref_node *lower,
- struct btrfs_backref_node *upper,
- int link_which);
void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index f7d8958b7327..50b5fc1c06d7 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -27,12 +27,12 @@ struct btrfs_failed_bio {
};
/* Is this a data path I/O that needs storage layer checksum and repair? */
-static inline bool is_data_bbio(struct btrfs_bio *bbio)
+static inline bool is_data_bbio(const struct btrfs_bio *bbio)
{
return bbio->inode && is_data_inode(bbio->inode);
}
-static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
+static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio)
{
return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
}
@@ -134,14 +134,14 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
}
}
-static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+static int next_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror)
{
if (cur_mirror == fbio->num_copies)
return cur_mirror + 1 - fbio->num_copies;
return cur_mirror + 1;
}
-static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+static int prev_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror)
{
if (cur_mirror == 1)
return fbio->num_copies;
@@ -165,12 +165,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
int mirror = repair_bbio->mirror_num;
- /*
- * We can only trigger this for data bio, which doesn't support larger
- * folios yet.
- */
- ASSERT(folio_order(page_folio(bv->bv_page)) == 0);
-
if (repair_bbio->bio.bi_status ||
!btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
@@ -301,7 +295,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
-static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
+static void btrfs_log_dev_io_error(const struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
return;
@@ -316,8 +310,8 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
}
-static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
- struct bio *bio)
+static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_info,
+ const struct bio *bio)
{
if (bio->bi_opf & REQ_META)
return fs_info->endio_meta_workers;
@@ -439,7 +433,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
ASSERT(btrfs_dev_is_sequential(dev, physical));
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
}
- btrfs_debug_in_rcu(dev->fs_info,
+ btrfs_debug(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
@@ -845,7 +839,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
goto out_bio_uninit;
}
- btrfs_info_rl_in_rcu(fs_info,
+ btrfs_info_rl(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
ino, start, btrfs_dev_name(smap.dev),
smap.physical >> SECTOR_SHIFT);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5b0cb04b2b93..9bf282d2453c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group
}
#endif
+static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group)
+{
+ /* The meta_write_pointer is available only on the zoned setup. */
+ if (!btrfs_is_zoned(block_group->fs_info))
+ return false;
+
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+ return false;
+
+ return block_group->start + block_group->alloc_offset >
+ block_group->meta_write_pointer;
+}
+
/*
* Return target flags in extended format or 0 if restripe for this chunk_type
* is not in progress
@@ -832,8 +845,8 @@ out:
static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
{
- btrfs_clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
- bg->start + bg->length - 1, EXTENT_DIRTY);
+ btrfs_clear_extent_bit(&bg->fs_info->excluded_extents, bg->start,
+ bg->start + bg->length - 1, EXTENT_DIRTY, NULL);
}
static noinline void caching_thread(struct btrfs_work *work)
@@ -877,7 +890,7 @@ static noinline void caching_thread(struct btrfs_work *work)
*/
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
- ret = load_free_space_tree(caching_ctl);
+ ret = btrfs_load_free_space_tree(caching_ctl);
else
ret = load_extent_tree_free(caching_ctl);
done:
@@ -1235,7 +1248,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* another task to attempt to create another block group with the same
* item key (and failing with -EEXIST and a transaction abort).
*/
- ret = remove_block_group_free_space(trans, block_group);
+ ret = btrfs_remove_block_group_free_space(trans, block_group);
if (ret)
goto out;
@@ -1244,6 +1257,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
spin_lock(&block_group->lock);
+ /*
+ * Hitting this WARN means we removed a block group with an unwritten
+ * region. It will cause "unable to find chunk map for logical" errors.
+ */
+ if (WARN_ON(has_unwritten_metadata(block_group)))
+ btrfs_warn(fs_info,
+ "block group %llu is removed before metadata write out",
+ block_group->start);
+
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
/*
@@ -1403,7 +1425,7 @@ out:
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
btrfs_info(cache->fs_info,
"unable to make block group %llu ro", cache->start);
- btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, false);
}
return ret;
}
@@ -1436,14 +1458,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
- ret = btrfs_clear_extent_bits(&prev_trans->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bit(&prev_trans->pinned_extents, start, end,
+ EXTENT_DIRTY, NULL);
if (ret)
goto out;
}
- ret = btrfs_clear_extent_bits(&trans->transaction->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bit(&trans->transaction->pinned_extents, start, end,
+ EXTENT_DIRTY, NULL);
out:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans)
@@ -1586,8 +1608,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
- if (space_info->total_bytes - block_group->length < used &&
- block_group->zone_unusable < block_group->length) {
+ if ((space_info->total_bytes - block_group->length < used &&
+ block_group->zone_unusable < block_group->length) ||
+ has_unwritten_metadata(block_group)) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
@@ -1616,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
@@ -1843,7 +1868,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
while (!list_empty(&fs_info->reclaim_bgs)) {
- u64 zone_unusable;
u64 used;
u64 reserved;
int ret = 0;
@@ -1910,16 +1934,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
goto next;
}
- /*
- * Cache the zone_unusable value before turning the block group
- * to read only. As soon as the block group is read only it's
- * zone_unusable value gets moved to the block group's read-only
- * bytes and isn't available for calculations anymore. We also
- * cache it before unlocking the block group, to prevent races
- * (reports from KCSAN and such tools) with tasks updating it.
- */
- zone_unusable = bg->zone_unusable;
-
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
@@ -1963,14 +1977,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
reserved = bg->reserved;
spin_unlock(&bg->lock);
- btrfs_info(fs_info,
- "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable",
- bg->start,
- div64_u64(used * 100, bg->length),
- div64_u64(reserved * 100, bg->length),
- div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
- ret = btrfs_relocate_chunk(fs_info, bg->start);
+ ret = btrfs_relocate_chunk(fs_info, bg->start, false);
if (ret) {
btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu",
@@ -2372,7 +2380,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
cache->space_info = btrfs_find_space_info(info, cache->flags);
- set_free_space_tree_thresholds(cache);
+ btrfs_set_free_space_tree_thresholds(cache);
if (need_clear) {
/*
@@ -2791,7 +2799,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
- add_block_group_free_space(trans, block_group);
+ btrfs_add_block_group_free_space(trans, block_group);
/*
* If we restriped during balance, we may have added a new raid
@@ -2889,7 +2897,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
cache->length = size;
- set_free_space_tree_thresholds(cache);
+ btrfs_set_free_space_tree_thresholds(cache);
cache->flags = type;
cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
@@ -3636,9 +3644,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
ret = update_block_group_item(trans, path, cache);
- }
- if (ret)
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ } else if (ret) {
btrfs_abort_transaction(trans, ret);
+ }
}
/* If its not on the io list, we need to put the block group */
@@ -4298,7 +4308,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, bytes, type);
- btrfs_dump_space_info(fs_info, info, 0, 0);
+ btrfs_dump_space_info(fs_info, info, 0, false);
}
if (left < bytes) {
@@ -4443,7 +4453,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
* indicates a real bug if this happens.
*/
if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
+ btrfs_dump_space_info(info, space_info, 0, false);
/*
* If there was a failure to cleanup a log tree, very likely due to an
@@ -4454,7 +4464,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
!BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
if (WARN_ON(space_info->bytes_reserved > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
+ btrfs_dump_space_info(info, space_info, 0, false);
}
WARN_ON(space_info->reclaim_size > 0);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 9de356bcb411..a8bb8429c966 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
/* Does the block group need to be added to the free space tree? */
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ /* Set after we add a new block group to the free space tree. */
+ BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
/* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
/*
@@ -244,6 +246,11 @@ struct btrfs_block_group {
/* Lock for free space tree operations. */
struct mutex free_space_lock;
+ /* Protected by @free_space_lock. */
+ bool using_free_space_bitmaps;
+ /* Protected by @free_space_lock. */
+ bool using_free_space_bitmaps_cached;
+
/*
* Number of extents in this block group used for swap files.
* All accesses protected by the spinlock 'lock'.
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a79fa0726f1d..b99fb0273292 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -525,6 +525,19 @@ static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode)
mapping_set_stable_writes(inode->vfs_inode.i_mapping);
}
+static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
+{
+ /* Metadata inode should not reach here. */
+ ASSERT(is_data_inode(inode));
+
+ /* We only allow BITS_PER_LONGS blocks for each bitmap. */
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0,
+ ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits)
+ >> PAGE_SHIFT)));
+#endif
+}
+
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 48d07939fee4..d09d622016ef 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -282,8 +282,8 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
{
struct inode *inode = &cb->bbio.inode->vfs_inode;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- unsigned long index = cb->start >> PAGE_SHIFT;
- unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
+ pgoff_t index = cb->start >> PAGE_SHIFT;
+ const pgoff_t end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct folio_batch fbatch;
int i;
int ret;
@@ -415,7 +415,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
int *memstall, unsigned long *pflags)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- unsigned long end_index;
+ pgoff_t end_index;
struct bio *orig_bio = &cb->orig_bbio->bio;
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
u64 isize = i_size_read(inode);
@@ -446,8 +446,8 @@ static noinline int add_ra_bio_pages(struct inode *inode,
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (cur < compressed_end) {
- u64 page_end;
- u64 pg_index = cur >> PAGE_SHIFT;
+ pgoff_t page_end;
+ pgoff_t pg_index = cur >> PAGE_SHIFT;
u32 add_size;
if (pg_index > end_index)
@@ -789,8 +789,8 @@ static void btrfs_init_workspace_manager(int type)
*/
workspace = alloc_workspace(type, 0);
if (IS_ERR(workspace)) {
- pr_warn(
- "BTRFS: cannot preallocate compression workspace, will try later\n");
+ btrfs_warn(NULL,
+ "cannot preallocate compression workspace, will try later");
} else {
atomic_set(&wsm->total_ws, 1);
wsm->free_ws = 1;
@@ -888,9 +888,9 @@ again:
/* once per minute */ 60 * HZ,
/* no burst */ 1);
- if (__ratelimit(&_rs)) {
- pr_warn("BTRFS: no compression workspaces, low memory, retrying\n");
- }
+ if (__ratelimit(&_rs))
+ btrfs_warn(NULL,
+ "no compression workspaces, low memory, retrying");
}
goto again;
}
@@ -975,7 +975,7 @@ static int btrfs_compress_set_level(unsigned int type, int level)
if (level == 0)
level = ops->default_level;
else
- level = min(max(level, ops->min_level), ops->max_level);
+ level = clamp(level, ops->min_level, ops->max_level);
return level;
}
@@ -1482,7 +1482,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
struct heuristic_ws *ws)
{
struct page *page;
- u64 index, index_end;
+ pgoff_t index, index_end;
u32 i, curr_sample_pos;
u8 *in_data;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d34c4341eaf4..1b38e707bbd9 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -13,6 +13,7 @@
#include <linux/wait.h>
#include <linux/pagemap.h>
#include "bio.h"
+#include "fs.h"
#include "messages.h"
struct address_space;
@@ -77,12 +78,10 @@ struct compressed_bio {
/* @range_end must be exclusive. */
static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur)
{
- const u64 folio_end = folio_pos(folio) + folio_size(folio);
-
/* @cur must be inside the folio. */
ASSERT(folio_pos(folio) <= cur);
- ASSERT(cur < folio_end);
- return min(range_end, folio_end) - cur;
+ ASSERT(cur < folio_end(folio));
+ return min(range_end, folio_end(folio)) - cur;
}
int __init btrfs_init_compress(void);
@@ -114,6 +113,8 @@ enum btrfs_compression_type {
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_ZSTD = 3,
BTRFS_NR_COMPRESS_TYPES = 4,
+
+ BTRFS_DEFRAG_DONT_COMPRESS,
};
struct workspace_manager {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a2e7979372cc..74e6d7f3d266 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -198,7 +198,7 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
* the inc_not_zero dance and if it doesn't work then
* synchronize_rcu and try again.
*/
- if (atomic_inc_not_zero(&eb->refs)) {
+ if (refcount_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
break;
}
@@ -283,15 +283,26 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
- if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
- else
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ } else {
ret = btrfs_inc_ref(trans, root, cow, 0);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ }
if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -303,9 +314,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
/*
* check if the tree block can be shared by multiple trees
*/
-bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf)
+bool btrfs_block_can_be_shared(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf)
{
const u64 buf_gen = btrfs_header_generation(buf);
@@ -549,7 +560,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
- atomic_inc(&cow->refs);
+ refcount_inc(&cow->refs);
rcu_assign_pointer(root->node, cow);
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
@@ -602,9 +613,9 @@ error_unlock_cow:
return ret;
}
-static inline int should_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf)
+static inline int should_cow_block(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf)
{
if (btrfs_is_testing(root->fs_info))
return 0;
@@ -724,7 +735,7 @@ int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_ke
* Slot may point to the total number of items (i.e. one position beyond the last
* key) if the key is bigger than the last key in the extent buffer.
*/
-int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
+int btrfs_bin_search(const struct extent_buffer *eb, int first_slot,
const struct btrfs_key *key, int *slot)
{
unsigned long p;
@@ -1081,7 +1092,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* update the path */
if (left) {
if (btrfs_header_nritems(left) > orig_slot) {
- atomic_inc(&left->refs);
+ refcount_inc(&left->refs);
/* left was locked after cow */
path->nodes[level] = left;
path->slots[level + 1] -= 1;
@@ -1268,7 +1279,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* to the block in 'slot', and triggering ra on them.
*/
static void reada_for_search(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+ const struct btrfs_path *path,
int level, int slot, u64 objectid)
{
struct extent_buffer *node;
@@ -1350,7 +1361,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
}
}
-static noinline void reada_for_balance(struct btrfs_path *path, int level)
+static noinline void reada_for_balance(const struct btrfs_path *path, int level)
{
struct extent_buffer *parent;
int slot;
@@ -1446,8 +1457,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
u64 blocknr;
struct extent_buffer *tmp = NULL;
int ret = 0;
+ int ret2;
int parent_level;
- int err;
bool read_tmp = false;
bool tmp_locked = false;
bool path_released = false;
@@ -1505,9 +1516,9 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
}
/* Now we're allowed to do a blocking uptodate check. */
- err = btrfs_read_extent_buffer(tmp, &check);
- if (err) {
- ret = err;
+ ret2 = btrfs_read_extent_buffer(tmp, &check);
+ if (ret2) {
+ ret = ret2;
goto out;
}
@@ -1548,9 +1559,9 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
}
/* Now we're allowed to do a blocking uptodate check. */
- err = btrfs_read_extent_buffer(tmp, &check);
- if (err) {
- ret = err;
+ ret2 = btrfs_read_extent_buffer(tmp, &check);
+ if (ret2) {
+ ret = ret2;
goto out;
}
@@ -1685,7 +1696,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
if (p->search_commit_root) {
b = root->commit_root;
- atomic_inc(&b->refs);
+ refcount_inc(&b->refs);
level = btrfs_header_level(b);
/*
* Ensure that all callers have set skip_locking when
@@ -1794,7 +1805,7 @@ static int finish_need_commit_sem_search(struct btrfs_path *path)
return 0;
}
-static inline int search_for_key_slot(struct extent_buffer *eb,
+static inline int search_for_key_slot(const struct extent_buffer *eb,
int search_low_slot,
const struct btrfs_key *key,
int prev_cmp,
@@ -1928,15 +1939,14 @@ static int search_leaf(struct btrfs_trans_handle *trans,
ASSERT(leaf_free_space >= 0);
if (leaf_free_space < ins_len) {
- int err;
-
- err = split_leaf(trans, root, key, path, ins_len,
- (ret == 0));
- ASSERT(err <= 0);
- if (WARN_ON(err > 0))
- err = -EUCLEAN;
- if (err)
- ret = err;
+ int ret2;
+
+ ret2 = split_leaf(trans, root, key, path, ins_len, (ret == 0));
+ ASSERT(ret2 <= 0);
+ if (WARN_ON(ret2 > 0))
+ ret2 = -EUCLEAN;
+ if (ret2)
+ ret = ret2;
}
}
@@ -1982,7 +1992,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *b;
int slot;
int ret;
- int err;
int level;
int lowest_unlock = 1;
/* everything at write_lock_level or lower must be write locked */
@@ -2053,6 +2062,7 @@ again:
while (b) {
int dec = 0;
+ int ret2;
level = btrfs_header_level(b);
@@ -2081,16 +2091,15 @@ again:
}
if (last_level)
- err = btrfs_cow_block(trans, root, b, NULL, 0,
- &b,
- BTRFS_NESTING_COW);
+ ret2 = btrfs_cow_block(trans, root, b, NULL, 0,
+ &b, BTRFS_NESTING_COW);
else
- err = btrfs_cow_block(trans, root, b,
- p->nodes[level + 1],
- p->slots[level + 1], &b,
- BTRFS_NESTING_COW);
- if (err) {
- ret = err;
+ ret2 = btrfs_cow_block(trans, root, b,
+ p->nodes[level + 1],
+ p->slots[level + 1], &b,
+ BTRFS_NESTING_COW);
+ if (ret2) {
+ ret = ret2;
goto done;
}
}
@@ -2138,12 +2147,12 @@ cow_done:
slot--;
}
p->slots[level] = slot;
- err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
- &write_lock_level);
- if (err == -EAGAIN)
+ ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len,
+ &write_lock_level);
+ if (ret2 == -EAGAIN)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
b = p->nodes[level];
@@ -2169,11 +2178,11 @@ cow_done:
goto done;
}
- err = read_block_for_search(root, p, &b, slot, key);
- if (err == -EAGAIN && !p->nowait)
+ ret2 = read_block_for_search(root, p, &b, slot, key);
+ if (ret2 == -EAGAIN && !p->nowait)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
@@ -2236,7 +2245,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct extent_buffer *b;
int slot;
int ret;
- int err;
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
@@ -2261,6 +2269,7 @@ again:
while (b) {
int dec = 0;
+ int ret2;
level = btrfs_header_level(b);
p->nodes[level] = b;
@@ -2296,11 +2305,11 @@ again:
goto done;
}
- err = read_block_for_search(root, p, &b, slot, key);
- if (err == -EAGAIN && !p->nowait)
+ ret2 = read_block_for_search(root, p, &b, slot, key);
+ if (ret2 == -EAGAIN && !p->nowait)
goto again;
- if (err) {
- ret = err;
+ if (ret2) {
+ ret = ret2;
goto done;
}
@@ -2872,6 +2881,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (ret < 0) {
int ret2;
+ btrfs_clear_buffer_dirty(trans, c);
ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
if (ret2 < 0)
btrfs_abort_transaction(trans, ret2);
@@ -2885,7 +2895,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
free_extent_buffer(old);
add_root_to_dirty_list(root);
- atomic_inc(&c->refs);
+ refcount_inc(&c->refs);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK;
path->slots[level] = 0;
@@ -3100,7 +3110,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = right->fs_info;
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
- struct btrfs_map_token token;
struct btrfs_disk_key disk_key;
int slot;
u32 i;
@@ -3174,13 +3183,12 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
copy_leaf_items(right, left, 0, left_nritems - push_items, push_items);
/* update the item pointers */
- btrfs_init_map_token(&token, right);
right_nritems += push_items;
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- push_space -= btrfs_token_item_size(&token, i);
- btrfs_set_token_item_offset(&token, i, push_space);
+ push_space -= btrfs_item_size(right, i);
+ btrfs_set_item_offset(right, i, push_space);
}
left_nritems -= push_items;
@@ -3323,7 +3331,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
int ret = 0;
u32 this_item_size;
u32 old_left_item_size;
- struct btrfs_map_token token;
if (empty)
nr = min(right_nritems, max_slot);
@@ -3371,13 +3378,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
old_left_nritems = btrfs_header_nritems(left);
BUG_ON(old_left_nritems <= 0);
- btrfs_init_map_token(&token, left);
old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i,
+ ioff = btrfs_item_offset(left, i);
+ btrfs_set_item_offset(left, i,
ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
@@ -3398,13 +3404,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_header_nritems(right) - push_items);
}
- btrfs_init_map_token(&token, right);
right_nritems -= push_items;
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
- push_space = push_space - btrfs_token_item_size(&token, i);
- btrfs_set_token_item_offset(&token, i, push_space);
+ push_space = push_space - btrfs_item_size(right, i);
+ btrfs_set_item_offset(right, i, push_space);
}
btrfs_mark_buffer_dirty(trans, left);
@@ -3518,7 +3523,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
int i;
int ret;
struct btrfs_disk_key disk_key;
- struct btrfs_map_token token;
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
@@ -3531,12 +3535,11 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
- btrfs_init_map_token(&token, right);
for (i = 0; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
+ ioff = btrfs_item_offset(right, i);
+ btrfs_set_item_offset(right, i, ioff + rt_data_off);
}
btrfs_set_header_nritems(l, mid);
@@ -4002,7 +4005,6 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
unsigned int size_diff;
int i;
- struct btrfs_map_token token;
leaf = path->nodes[0];
slot = path->slots[0];
@@ -4025,12 +4027,11 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + size_diff);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff + size_diff);
}
/* shift the data */
@@ -4093,7 +4094,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_data;
unsigned int old_size;
int i;
- struct btrfs_map_token token;
leaf = path->nodes[0];
@@ -4119,12 +4119,11 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- btrfs_init_map_token(&token, leaf);
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff - data_size);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff - data_size);
}
/* shift the data */
@@ -4164,7 +4163,6 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *leaf;
int slot;
- struct btrfs_map_token token;
u32 total_size;
/*
@@ -4192,7 +4190,6 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
BUG();
}
- btrfs_init_map_token(&token, leaf);
if (slot != nritems) {
unsigned int old_data = btrfs_item_data_end(leaf, slot);
@@ -4210,8 +4207,8 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
for (i = slot; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i,
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i,
ioff - batch->total_data_size);
}
/* shift the items */
@@ -4228,8 +4225,8 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
btrfs_set_item_key(leaf, &disk_key, slot + i);
data_end -= batch->data_sizes[i];
- btrfs_set_token_item_offset(&token, slot + i, data_end);
- btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
+ btrfs_set_item_offset(leaf, slot + i, data_end);
+ btrfs_set_item_size(leaf, slot + i, batch->data_sizes[i]);
}
btrfs_set_header_nritems(leaf, nritems + batch->nr);
@@ -4442,7 +4439,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used_bytes(root);
- atomic_inc(&leaf->refs);
+ refcount_inc(&leaf->refs);
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
if (ret < 0)
@@ -4469,7 +4466,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (slot + nr != nritems) {
const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
const int data_end = leaf_data_end(leaf);
- struct btrfs_map_token token;
u32 dsize = 0;
int i;
@@ -4479,12 +4475,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
memmove_leaf_data(leaf, data_end + dsize, data_end,
last_off - data_end);
- btrfs_init_map_token(&token, leaf);
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- ioff = btrfs_token_item_offset(&token, i);
- btrfs_set_token_item_offset(&token, i, ioff + dsize);
+ ioff = btrfs_item_offset(leaf, i);
+ btrfs_set_item_offset(leaf, i, ioff + dsize);
}
memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr);
@@ -4527,7 +4522,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* for possible call to btrfs_del_ptr below
*/
slot = path->slots[1];
- atomic_inc(&leaf->refs);
+ refcount_inc(&leaf->refs);
/*
* We want to be able to at least push one item to the
* left neighbour leaf, and that's the first item.
@@ -4585,16 +4580,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/*
* A helper function to walk down the tree starting at min_key, and looking
- * for nodes or leaves that are have a minimum transaction id.
+ * for leaves that have a minimum transaction id.
* This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
* key and get a writable path.
*
- * This honors path->lowest_level to prevent descent past a given level
- * of the tree.
- *
* min_trans indicates the oldest transaction that you are interested
* in walking through. Any nodes or leaves older than min_trans are
* skipped over (without reading them).
@@ -4615,6 +4607,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
int keep_locks = path->keep_locks;
ASSERT(!path->nowait);
+ ASSERT(path->lowest_level == 0);
path->keep_locks = 1;
again:
cur = btrfs_read_lock_root_node(root);
@@ -4636,8 +4629,8 @@ again:
goto out;
}
- /* at the lowest level, we're done, setup the path and exit */
- if (level == path->lowest_level) {
+ /* At level 0 we're done, setup the path and exit. */
+ if (level == 0) {
if (slot >= nritems)
goto find_next_key;
ret = 0;
@@ -4678,12 +4671,6 @@ find_next_key:
goto out;
}
}
- if (level == path->lowest_level) {
- ret = 0;
- /* Save our key for returning back. */
- btrfs_node_key_to_cpu(cur, min_key, slot);
- goto out;
- }
cur = btrfs_read_node_slot(cur, slot);
if (IS_ERR(cur)) {
ret = PTR_ERR(cur);
@@ -4699,7 +4686,7 @@ find_next_key:
out:
path->keep_locks = keep_locks;
if (ret == 0)
- btrfs_unlock_up_safe(path, path->lowest_level + 1);
+ btrfs_unlock_up_safe(path, 1);
return ret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 71fa42ca04fe..fe70b593c7cd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -224,16 +224,10 @@ struct btrfs_root {
struct list_head root_list;
- /*
- * Xarray that keeps track of in-memory inodes, protected by the lock
- * @inode_lock.
- */
+ /* Xarray that keeps track of in-memory inodes. */
struct xarray inodes;
- /*
- * Xarray that keeps track of delayed nodes of every inode, protected
- * by @inode_lock.
- */
+ /* Xarray that keeps track of delayed nodes of every inode. */
struct xarray delayed_nodes;
/*
* right now this just gets used so that a root has its own devid
@@ -508,7 +502,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
int __init btrfs_ctree_init(void);
void __cold btrfs_ctree_exit(void);
-int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
+int btrfs_bin_search(const struct extent_buffer *eb, int first_slot,
const struct btrfs_key *key, int *slot);
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
@@ -576,9 +570,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
struct extent_buffer **cow_ret, u64 new_root_objectid);
-bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf);
+bool btrfs_block_can_be_shared(const struct btrfs_trans_handle *trans,
+ const struct btrfs_root *root,
+ const struct extent_buffer *buf);
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
void btrfs_extend_item(struct btrfs_trans_handle *trans,
@@ -727,13 +721,18 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
}
int btrfs_leaf_free_space(const struct extent_buffer *leaf);
-static inline int is_fstree(u64 rootid)
+static inline bool btrfs_is_fstree(u64 rootid)
{
- if (rootid == BTRFS_FS_TREE_OBJECTID ||
- ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
- !btrfs_qgroup_level(rootid)))
- return 1;
- return 0;
+ if (rootid == BTRFS_FS_TREE_OBJECTID)
+ return true;
+
+ if ((s64)rootid < (s64)BTRFS_FIRST_FREE_OBJECTID)
+ return false;
+
+ if (btrfs_qgroup_level(rootid) != 0)
+ return false;
+
+ return true;
}
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 1831618579cb..738179a5e170 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -60,6 +60,14 @@ static int compare_inode_defrag(const struct inode_defrag *defrag1,
return 0;
}
+static int inode_defrag_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct inode_defrag *new_defrag = rb_entry(new, struct inode_defrag, rb_node);
+ const struct inode_defrag *existing_defrag = rb_entry(existing, struct inode_defrag, rb_node);
+
+ return compare_inode_defrag(new_defrag, existing_defrag);
+}
+
/*
* Insert a record for an inode into the defrag tree. The lock must be held
* already.
@@ -71,37 +79,23 @@ static int btrfs_insert_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct inode_defrag *entry;
- struct rb_node **p;
- struct rb_node *parent = NULL;
- int ret;
+ struct rb_node *node;
- p = &fs_info->defrag_inodes.rb_node;
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct inode_defrag, rb_node);
+ node = rb_find_add(&defrag->rb_node, &fs_info->defrag_inodes, inode_defrag_cmp);
+ if (node) {
+ struct inode_defrag *entry;
- ret = compare_inode_defrag(defrag, entry);
- if (ret < 0)
- p = &parent->rb_left;
- else if (ret > 0)
- p = &parent->rb_right;
- else {
- /*
- * If we're reinserting an entry for an old defrag run,
- * make sure to lower the transid of our existing
- * record.
- */
- if (defrag->transid < entry->transid)
- entry->transid = defrag->transid;
- entry->extent_thresh = min(defrag->extent_thresh,
- entry->extent_thresh);
- return -EEXIST;
- }
+ entry = rb_entry(node, struct inode_defrag, rb_node);
+ /*
+ * If we're reinserting an entry for an old defrag run, make
+ * sure to lower the transid of our existing record.
+ */
+ if (defrag->transid < entry->transid)
+ entry->transid = defrag->transid;
+ entry->extent_thresh = min(defrag->extent_thresh, entry->extent_thresh);
+ return -EEXIST;
}
set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
- rb_link_node(&defrag->rb_node, parent, p);
- rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
return 0;
}
@@ -854,8 +848,8 @@ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t
{
struct address_space *mapping = inode->vfs_inode.i_mapping;
gfp_t mask = btrfs_alloc_write_mask(mapping);
- u64 folio_start;
- u64 folio_end;
+ u64 lock_start;
+ u64 lock_end;
struct extent_state *cached_state = NULL;
struct folio *folio;
int ret;
@@ -891,15 +885,15 @@ again:
return ERR_PTR(ret);
}
- folio_start = folio_pos(folio);
- folio_end = folio_pos(folio) + folio_size(folio) - 1;
+ lock_start = folio_pos(folio);
+ lock_end = folio_end(folio) - 1;
/* Wait for any existing ordered extent in the range */
while (1) {
struct btrfs_ordered_extent *ordered;
- btrfs_lock_extent(&inode->io_tree, folio_start, folio_end, &cached_state);
- ordered = btrfs_lookup_ordered_range(inode, folio_start, folio_size(folio));
- btrfs_unlock_extent(&inode->io_tree, folio_start, folio_end, &cached_state);
+ btrfs_lock_extent(&inode->io_tree, lock_start, lock_end, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, lock_start, folio_size(folio));
+ btrfs_unlock_extent(&inode->io_tree, lock_start, lock_end, &cached_state);
if (!ordered)
break;
@@ -953,7 +947,7 @@ struct defrag_target_range {
* @extent_thresh: file extent size threshold, any extent size >= this value
* will be ignored
* @newer_than: only defrag extents newer than this value
- * @do_compress: whether the defrag is doing compression
+ * @do_compress: whether the defrag is doing compression or no-compression
* if true, @extent_thresh will be ignored and all regular
* file extents meeting @newer_than will be targets.
* @locked: if the range has already held extent lock
@@ -1184,8 +1178,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
if (!folio)
break;
- if (start >= folio_pos(folio) + folio_size(folio) ||
- start + len <= folio_pos(folio))
+ if (start >= folio_end(folio) || start + len <= folio_pos(folio))
continue;
btrfs_folio_clamp_clear_checked(fs_info, folio, start, len);
btrfs_folio_clamp_set_dirty(fs_info, folio, start, len);
@@ -1226,7 +1219,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
folios[i] = NULL;
goto free_folios;
}
- cur = folio_pos(folios[i]) + folio_size(folios[i]);
+ cur = folio_end(folios[i]);
}
for (int i = 0; i < nr_pages; i++) {
if (!folios[i])
@@ -1371,6 +1364,7 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra,
u64 cur;
u64 last_byte;
bool do_compress = (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS);
+ bool no_compress = (range->flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS);
int compress_type = BTRFS_COMPRESS_ZLIB;
int compress_level = 0;
int ret = 0;
@@ -1401,6 +1395,9 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra,
if (range->compress_type)
compress_type = range->compress_type;
}
+ } else if (range->flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS) {
+ compress_type = BTRFS_DEFRAG_DONT_COMPRESS;
+ compress_level = 1;
}
if (extent_thresh == 0)
@@ -1451,13 +1448,14 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra,
btrfs_inode_unlock(inode, 0);
break;
}
- if (do_compress) {
+ if (do_compress || no_compress) {
inode->defrag_compress = compress_type;
inode->defrag_compress_level = compress_level;
}
ret = defrag_one_cluster(inode, ra, cur,
cluster_end + 1 - cur, extent_thresh,
- newer_than, do_compress, &sectors_defragged,
+ newer_than, do_compress || no_compress,
+ &sectors_defragged,
max_to_defrag, &last_scanned);
if (sectors_defragged > prev_sectors_defragged)
@@ -1496,7 +1494,7 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra,
btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
ret = sectors_defragged;
}
- if (do_compress) {
+ if (do_compress || no_compress) {
btrfs_inode_lock(inode, 0);
inode->defrag_compress = BTRFS_COMPRESS_NONE;
btrfs_inode_unlock(inode, 0);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c7cc24a5dd5e..0f8d8e275143 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -334,6 +334,20 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
return item;
}
+static int delayed_item_index_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *index = key;
+ const struct btrfs_delayed_item *delayed_item = rb_entry(node,
+ struct btrfs_delayed_item, rb_node);
+
+ if (delayed_item->index < *index)
+ return 1;
+ else if (delayed_item->index > *index)
+ return -1;
+
+ return 0;
+}
+
/*
* Look up the delayed item by key.
*
@@ -347,21 +361,10 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
struct rb_root *root,
u64 index)
{
- struct rb_node *node = root->rb_node;
- struct btrfs_delayed_item *delayed_item = NULL;
+ struct rb_node *node;
- while (node) {
- delayed_item = rb_entry(node, struct btrfs_delayed_item,
- rb_node);
- if (delayed_item->index < index)
- node = node->rb_right;
- else if (delayed_item->index > index)
- node = node->rb_left;
- else
- return delayed_item;
- }
-
- return NULL;
+ node = rb_find(&index, root, delayed_item_index_cmp);
+ return rb_entry_safe(node, struct btrfs_delayed_item, rb_node);
}
static int btrfs_delayed_item_cmp(const struct rb_node *new,
@@ -369,14 +372,8 @@ static int btrfs_delayed_item_cmp(const struct rb_node *new,
{
const struct btrfs_delayed_item *new_item =
rb_entry(new, struct btrfs_delayed_item, rb_node);
- const struct btrfs_delayed_item *exist_item =
- rb_entry(exist, struct btrfs_delayed_item, rb_node);
- if (new_item->index < exist_item->index)
- return -1;
- if (new_item->index > exist_item->index)
- return 1;
- return 0;
+ return delayed_item_index_cmp(&new_item->index, exist);
}
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
@@ -1008,8 +1005,16 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
if (ret > 0)
ret = -ENOENT;
- if (ret < 0)
+ if (ret < 0) {
+ /*
+ * If we fail to update the delayed inode we need to abort the
+ * transaction, because we could leave the inode with the
+ * improper counts behind.
+ */
+ if (ret != -ENOENT)
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -1034,8 +1039,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
goto err_out;
+ }
ASSERT(ret > 0);
ASSERT(path->slots[0] > 0);
ret = 0;
@@ -1057,21 +1064,14 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* in the same item doesn't exist.
*/
ret = btrfs_del_item(trans, root, path);
+ if (ret < 0)
+ btrfs_abort_transaction(trans, ret);
out:
btrfs_release_delayed_iref(node);
btrfs_release_path(path);
err_out:
btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
btrfs_release_delayed_inode(node);
-
- /*
- * If we fail to update the delayed inode we need to abort the
- * transaction, because we could leave the inode with the improper
- * counts behind.
- */
- if (ret && ret != -ENOENT)
- btrfs_abort_transaction(trans, ret);
-
return ret;
}
@@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
{
- WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
+ struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
+
+ if (WARN_ON(node))
+ refcount_dec(&node->refs);
}
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
@@ -1537,8 +1540,8 @@ release_node:
return ret;
}
-static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
- u64 index)
+static bool btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
+ u64 index)
{
struct btrfs_delayed_item *item;
@@ -1546,7 +1549,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index);
if (!item) {
mutex_unlock(&node->mutex);
- return 1;
+ return false;
}
/*
@@ -1581,7 +1584,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
}
mutex_unlock(&node->mutex);
- return 0;
+ return true;
}
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
@@ -1595,9 +1598,10 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
if (IS_ERR(node))
return PTR_ERR(node);
- ret = btrfs_delete_delayed_insertion_item(node, index);
- if (!ret)
+ if (btrfs_delete_delayed_insertion_item(node, index)) {
+ ret = 0;
goto end;
+ }
item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM);
if (!item) {
@@ -1614,7 +1618,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
*/
if (ret < 0) {
btrfs_err(trans->fs_info,
-"metadata reservation failed for delayed dir item deltiona, should have been reserved");
+"metadata reservation failed for delayed dir item deletion, index: %llu, root: %llu, inode: %llu, error: %d",
+ index, btrfs_root_id(node->root), node->inode_id, ret);
btrfs_release_delayed_item(item);
goto end;
}
@@ -1623,9 +1628,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
ret = __btrfs_add_delayed_item(node, item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
- "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
- index, btrfs_root_id(node->root),
- node->inode_id, ret);
+"failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d",
+ index, btrfs_root_id(node->root), node->inode_id, ret);
btrfs_delayed_item_release_metadata(dir->root, item);
btrfs_release_delayed_item(item);
}
@@ -1730,17 +1734,16 @@ void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
downgrade_write(&inode->vfs_inode.i_rwsem);
}
-int btrfs_should_delete_dir_index(const struct list_head *del_list,
- u64 index)
+bool btrfs_should_delete_dir_index(const struct list_head *del_list, u64 index)
{
struct btrfs_delayed_item *curr;
- int ret = 0;
+ bool ret = false;
list_for_each_entry(curr, del_list, readdir_list) {
if (curr->index > index)
break;
if (curr->index == index) {
- ret = 1;
+ ret = true;
break;
}
}
@@ -1750,15 +1753,14 @@ int btrfs_should_delete_dir_index(const struct list_head *del_list,
/*
* Read dir info stored in the delayed tree.
*/
-int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- const struct list_head *ins_list)
+bool btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
+ const struct list_head *ins_list)
{
struct btrfs_dir_item *di;
struct btrfs_delayed_item *curr, *next;
struct btrfs_key location;
char *name;
int name_len;
- int over = 0;
unsigned char d_type;
/*
@@ -1767,6 +1769,8 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
* directory, nobody can delete any directory indexes now.
*/
list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
+ bool over;
+
list_del(&curr->readdir_list);
if (curr->index < ctx->pos) {
@@ -1784,17 +1788,16 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type));
btrfs_disk_key_to_cpu(&location, &di->location);
- over = !dir_emit(ctx, name, name_len,
- location.objectid, d_type);
+ over = !dir_emit(ctx, name, name_len, location.objectid, d_type);
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
if (over)
- return 1;
+ return true;
ctx->pos++;
}
- return 0;
+ return false;
}
static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index c4b4ba122beb..e6e763ad2d42 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -150,10 +150,9 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list,
struct list_head *del_list);
-int btrfs_should_delete_dir_index(const struct list_head *del_list,
- u64 index);
-int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- const struct list_head *ins_list);
+bool btrfs_should_delete_dir_index(const struct list_head *del_list, u64 index);
+bool btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
+ const struct list_head *ins_list);
/* Used during directory logging. */
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 739c9e29aaa3..ca382c5b186f 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -928,7 +928,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
- if (is_fstree(generic_ref->ref_root))
+ if (btrfs_is_fstree(generic_ref->ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
refcount_set(&ref->refs, 1);
@@ -958,8 +958,8 @@ void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
#endif
generic_ref->tree_ref.level = level;
generic_ref->type = BTRFS_REF_METADATA;
- if (skip_qgroup || !(is_fstree(generic_ref->ref_root) &&
- (!mod_root || is_fstree(mod_root))))
+ if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) &&
+ (!mod_root || btrfs_is_fstree(mod_root))))
generic_ref->skip_qgroup = true;
else
generic_ref->skip_qgroup = false;
@@ -976,8 +976,8 @@ void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
generic_ref->data_ref.objectid = ino;
generic_ref->data_ref.offset = offset;
generic_ref->type = BTRFS_REF_DATA;
- if (skip_qgroup || !(is_fstree(generic_ref->ref_root) &&
- (!mod_root || is_fstree(mod_root))))
+ if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) &&
+ (!mod_root || btrfs_is_fstree(mod_root))))
generic_ref->skip_qgroup = true;
else
generic_ref->skip_qgroup = false;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 78cc23837610..552ec4fa645d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -420,7 +420,7 @@ bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
u64 root, u64 parent);
void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans);
-static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
+static inline u64 btrfs_delayed_ref_owner(const struct btrfs_delayed_ref_node *node)
{
if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
@@ -428,7 +428,7 @@ static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
return node->tree_ref.level;
}
-static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node)
+static inline u64 btrfs_delayed_ref_offset(const struct btrfs_delayed_ref_node *node)
{
if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
@@ -436,7 +436,7 @@ static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node)
return 0;
}
-static inline u8 btrfs_ref_type(struct btrfs_ref *ref)
+static inline u8 btrfs_ref_type(const struct btrfs_ref *ref)
{
ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2decb9fff445..4675bcd5f92e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -250,7 +250,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
}
bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE,
- fs_info->bdev_holder, NULL);
+ fs_info->sb, &fs_holder_ops);
if (IS_ERR(bdev_file)) {
btrfs_err(fs_info, "target device %s is invalid!", device_path);
return PTR_ERR(bdev_file);
@@ -327,7 +327,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
return 0;
error:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ret;
}
@@ -600,7 +600,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return PTR_ERR(src_device);
if (btrfs_pinned_by_swapfile(fs_info, src_device)) {
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"cannot replace device %s (devid %llu) due to active swapfile",
btrfs_dev_name(src_device), src_device->devid);
return -ETXTBSY;
@@ -647,7 +647,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
dev_replace->srcdev = src_device;
dev_replace->tgtdev = tgt_device;
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"dev_replace from %s (devid %llu) to %s started",
btrfs_dev_name(src_device),
src_device->devid,
@@ -943,7 +943,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
tgt_device);
} else {
if (scrub_ret != -ECANCELED)
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"btrfs_scrub_dev(%s, %llu, %s) failed %d",
btrfs_dev_name(src_device),
src_device->devid,
@@ -961,7 +961,7 @@ error:
return scrub_ret;
}
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"dev_replace from %s (devid %llu) to %s finished",
btrfs_dev_name(src_device),
src_device->devid,
@@ -1109,7 +1109,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
* btrfs_dev_replace_finishing() will handle the
* cleanup part
*/
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"dev_replace from %s (devid %llu) to %s canceled",
btrfs_dev_name(src_device), src_device->devid,
btrfs_dev_name(tgt_device));
@@ -1143,7 +1143,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
ret = btrfs_commit_transaction(trans);
WARN_ON(ret);
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"suspended dev_replace from %s (devid %llu) to %s canceled",
btrfs_dev_name(src_device), src_device->devid,
btrfs_dev_name(tgt_device));
@@ -1247,7 +1247,7 @@ static int btrfs_dev_replace_kthread(void *data)
progress = btrfs_dev_replace_progress(fs_info);
progress = div_u64(progress, 10);
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"continuing dev_replace from %s (devid %llu) to target %s @%u%%",
btrfs_dev_name(dev_replace->srcdev),
dev_replace->srcdev->devid,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index b29cc31a7c4a..69863e398e22 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -227,7 +227,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
return di;
}
-int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir_ino,
const struct fscrypt_str *name)
{
int ret;
@@ -242,7 +242,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
if (!path)
return -ENOMEM;
- key.objectid = dir;
+ key.objectid = dir_ino;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name->name, name->len);
diff --git a/fs/btrfs/dir-item.h b/fs/btrfs/dir-item.h
index 8462579a95f4..e52174a8baf9 100644
--- a/fs/btrfs/dir-item.h
+++ b/fs/btrfs/dir-item.h
@@ -14,7 +14,7 @@ struct btrfs_inode;
struct btrfs_root;
struct btrfs_trans_handle;
-int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
+int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir_ino,
const struct fscrypt_str *name);
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name, struct btrfs_inode *dir,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1beb9458f622..9cc14ab35297 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -884,7 +884,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
- if (is_fstree(objectid))
+ if (btrfs_is_fstree(objectid))
generate_random_guid(root->root_item.uuid);
else
export_guid(root->root_item.uuid, &guid_null);
@@ -1104,7 +1104,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root) &&
- is_fstree(btrfs_root_id(root))) {
+ btrfs_is_fstree(btrfs_root_id(root))) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1113,7 +1113,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
- if (is_fstree(btrfs_root_id(root)) &&
+ if (btrfs_is_fstree(btrfs_root_id(root)) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
@@ -1246,6 +1246,8 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{
struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
+ if (fs_info->fs_devices)
+ btrfs_close_devices(fs_info->fs_devices);
percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
@@ -1315,7 +1317,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
* This is namely for free-space-tree and quota tree, which can change
* at runtime and should only be grabbed from fs_info.
*/
- if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (!btrfs_is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
return ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
@@ -1835,6 +1837,8 @@ void btrfs_put_root(struct btrfs_root *root)
if (refcount_dec_and_test(&root->refs)) {
if (WARN_ON(!xa_empty(&root->inodes)))
xa_destroy(&root->inodes);
+ if (WARN_ON(!xa_empty(&root->delayed_nodes)))
+ xa_destroy(&root->delayed_nodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
@@ -1945,7 +1949,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
- fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
@@ -2156,8 +2159,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
- if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
- ret = PTR_ERR(root);
+ ret = PTR_ERR(root);
break;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@@ -3395,6 +3397,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
fs_info->nodesize = nodesize;
+ fs_info->nodesize_bits = ilog2(nodesize);
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
@@ -3560,6 +3563,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
+ btrfs_zoned_reserve_data_reloc_bg(fs_info);
btrfs_free_zone_cache(fs_info);
btrfs_check_active_zone_reservation(fs_info);
@@ -3680,7 +3684,6 @@ fail_alloc:
iput(fs_info->btree_inode);
fail:
- btrfs_close_devices(fs_info->fs_devices);
ASSERT(ret < 0);
return ret;
}
@@ -3693,7 +3696,7 @@ static void btrfs_end_super_write(struct bio *bio)
bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
- btrfs_warn_rl_in_rcu(device->fs_info,
+ btrfs_warn_rl(device->fs_info,
"lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
@@ -3991,7 +3994,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
}
if (min_tolerated == INT_MAX) {
- pr_warn("BTRFS: unknown raid flag: %llu", flags);
+ btrfs_warn(NULL, "unknown raid flag: %llu", flags);
min_tolerated = 0;
}
@@ -4310,8 +4313,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*
* So wait for all ongoing ordered extents to complete and then run
* delayed iputs. This works because once we reach this point no one
- * can either create new ordered extents nor create delayed iputs
- * through some other means.
+ * can create new ordered extents, but delayed iputs can still be added
+ * by a reclaim worker (see comments further below).
*
* Also note that btrfs_wait_ordered_roots() is not safe here, because
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@@ -4322,15 +4325,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->endio_write_workers);
/* Ordered extents for free space inodes. */
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ /*
+ * Run delayed iputs in case an async reclaim worker is waiting for them
+ * to be run as mentioned above.
+ */
btrfs_run_delayed_iputs(fs_info);
- /* There should be no more workload to generate new delayed iputs. */
- set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
cancel_work_sync(&fs_info->em_shrinker_work);
+ /*
+ * Run delayed iputs again because an async reclaim worker may have
+ * added new ones if it was flushing delalloc:
+ *
+ * shrink_delalloc() -> btrfs_start_delalloc_roots() ->
+ * start_delalloc_inodes() -> btrfs_add_delayed_iput()
+ */
+ btrfs_run_delayed_iputs(fs_info);
+
+ /* There should be no more workload to generate new delayed iputs. */
+ set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
+
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
@@ -4413,7 +4430,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
iput(fs_info->btree_inode);
btrfs_mapping_tree_free(fs_info);
- btrfs_close_devices(fs_info->fs_devices);
}
void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
@@ -4625,7 +4641,7 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
mark, NULL)) {
- btrfs_clear_extent_bits(dirty_pages, start, end, mark);
+ btrfs_clear_extent_bit(dirty_pages, start, end, mark, NULL);
while (start <= end) {
eb = find_extent_buffer(fs_info, start);
start += fs_info->nodesize;
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index b1b96eb5f64e..66361325f6dc 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -43,7 +43,8 @@ static inline void btrfs_extent_state_leak_debug_check(void)
while (!list_empty(&states)) {
state = list_first_entry(&states, struct extent_state, leak_list);
- pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
+ btrfs_err(NULL,
+ "state leak: start %llu end %llu state %u in tree %d refs %d",
state->start, state->end, state->state,
extent_state_in_tree(state),
refcount_read(&state->refs));
@@ -1882,12 +1883,11 @@ int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 e
bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached)
{
- int err;
+ int ret;
u64 failed_start;
- err = set_extent_bit(tree, start, end, bits, &failed_start, NULL,
- cached, NULL);
- if (err == -EEXIST) {
+ ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL);
+ if (ret == -EEXIST) {
if (failed_start > start)
btrfs_clear_extent_bit(tree, start, failed_start - 1,
bits, cached);
@@ -1904,21 +1904,21 @@ int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32
struct extent_state **cached_state)
{
struct extent_state *failed_state = NULL;
- int err;
+ int ret;
u64 failed_start;
- err = set_extent_bit(tree, start, end, bits, &failed_start,
+ ret = set_extent_bit(tree, start, end, bits, &failed_start,
&failed_state, cached_state, NULL);
- while (err == -EEXIST) {
+ while (ret == -EEXIST) {
if (failed_start != start)
btrfs_clear_extent_bit(tree, start, failed_start - 1,
bits, cached_state);
wait_extent_bit(tree, failed_start, end, bits, &failed_state);
- err = set_extent_bit(tree, start, end, bits, &failed_start,
+ ret = set_extent_bit(tree, start, end, bits, &failed_start,
&failed_state, cached_state, NULL);
}
- return err;
+ return ret;
}
/*
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 0a18ca9c59c3..36facca37973 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -19,7 +19,8 @@ enum {
ENUM_BIT(EXTENT_DIRTY),
ENUM_BIT(EXTENT_LOCKED),
ENUM_BIT(EXTENT_DIO_LOCKED),
- ENUM_BIT(EXTENT_NEW),
+ ENUM_BIT(EXTENT_DIRTY_LOG1),
+ ENUM_BIT(EXTENT_DIRTY_LOG2),
ENUM_BIT(EXTENT_DELALLOC),
ENUM_BIT(EXTENT_DEFRAG),
ENUM_BIT(EXTENT_BOUNDARY),
@@ -191,12 +192,6 @@ static inline int btrfs_unlock_extent(struct extent_io_tree *tree, u64 start, u6
cached, NULL);
}
-static inline int btrfs_clear_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, u32 bits)
-{
- return btrfs_clear_extent_bit(tree, start, end, bits, NULL);
-}
-
int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cb6128778a83..97d517cdf2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -46,7 +46,7 @@
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
@@ -56,12 +56,12 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod, u64 oref_root);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
-static int find_next_key(struct btrfs_path *path, int level,
+static int find_next_key(const struct btrfs_path *path, int level,
struct btrfs_key *key);
-static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
+static int block_group_bits(const struct btrfs_block_group *cache, u64 bits)
{
return (cache->flags & bits) == bits;
}
@@ -329,7 +329,7 @@ search_again:
* is_data == BTRFS_REF_TYPE_ANY, either type is OK.
*/
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
- struct btrfs_extent_inline_ref *iref,
+ const struct btrfs_extent_inline_ref *iref,
enum btrfs_inline_ref_type is_data)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -401,16 +401,16 @@ u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
-static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref)
+static u64 hash_extent_data_ref_item(const struct extent_buffer *leaf,
+ const struct btrfs_extent_data_ref *ref)
{
return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
btrfs_extent_data_ref_objectid(leaf, ref),
btrfs_extent_data_ref_offset(leaf, ref));
}
-static bool match_extent_data_ref(struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref,
+static bool match_extent_data_ref(const struct extent_buffer *leaf,
+ const struct btrfs_extent_data_ref *ref,
u64 root_objectid, u64 owner, u64 offset)
{
if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
@@ -497,7 +497,7 @@ fail:
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
u64 bytenr)
{
struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
@@ -617,13 +617,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline u32 extent_data_ref_count(struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref)
+static noinline u32 extent_data_ref_count(const struct btrfs_path *path,
+ const struct btrfs_extent_inline_ref *iref)
{
struct btrfs_key key;
struct extent_buffer *leaf;
- struct btrfs_extent_data_ref *ref1;
- struct btrfs_shared_data_ref *ref2;
+ const struct btrfs_extent_data_ref *ref1;
+ const struct btrfs_shared_data_ref *ref2;
u32 num_refs = 0;
int type;
@@ -638,10 +638,10 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
ASSERT(type != BTRFS_REF_TYPE_INVALID);
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
+ ref1 = (const struct btrfs_extent_data_ref *)(&iref->offset);
num_refs = btrfs_extent_data_ref_count(leaf, ref1);
} else {
- ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
+ ref2 = (const struct btrfs_shared_data_ref *)(iref + 1);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
}
} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -684,7 +684,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
u64 bytenr)
{
struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
@@ -722,7 +722,7 @@ static inline int extent_ref_type(u64 parent, u64 owner)
return type;
}
-static int find_next_key(struct btrfs_path *path, int level,
+static int find_next_key(const struct btrfs_path *path, int level,
struct btrfs_key *key)
{
@@ -1480,7 +1480,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
*
*/
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
BTRFS_PATH_AUTO_FREE(path);
@@ -1522,19 +1522,21 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
/* now insert the actual backref */
- if (owner < BTRFS_FIRST_FREE_OBJECTID)
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = insert_tree_block_ref(trans, path, node, bytenr);
- else
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ } else {
ret = insert_extent_data_ref(trans, path, node, bytenr);
-
- if (ret)
- btrfs_abort_transaction(trans, ret);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ }
return ret;
}
static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_head *href)
+ const struct btrfs_delayed_ref_head *href)
{
u64 root = href->owning_root;
@@ -1543,7 +1545,7 @@ static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
* where it has already been unset.
*/
if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE ||
- !href->is_data || !is_fstree(root))
+ !href->is_data || !btrfs_is_fstree(root))
return;
btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes,
@@ -1552,7 +1554,7 @@ static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info,
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
{
@@ -1620,7 +1622,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
}
static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head,
+ const struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1707,7 +1709,7 @@ again:
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
{
@@ -1754,7 +1756,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
bool insert_reserved)
{
@@ -2998,7 +3000,7 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
return ret;
}
- ret = add_to_free_space_tree(trans, bytenr, num_bytes);
+ ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
@@ -3079,7 +3081,7 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
*/
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *info = trans->fs_info;
@@ -3649,6 +3651,21 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
btrfs_put_block_group(cache);
}
+static bool find_free_extent_check_size_class(const struct find_free_extent_ctl *ffe_ctl,
+ const struct btrfs_block_group *bg)
+{
+ if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
+ return true;
+ if (!btrfs_block_group_should_use_size_class(bg))
+ return true;
+ if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
+ return true;
+ if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
+ bg->size_class == BTRFS_BG_SZ_NONE)
+ return true;
+ return ffe_ctl->size_class == bg->size_class;
+}
+
/*
* Helper function for find_free_extent().
*
@@ -3670,7 +3687,8 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
if (!cluster_bg)
goto refill_cluster;
if (cluster_bg != bg && (cluster_bg->ro ||
- !block_group_bits(cluster_bg, ffe_ctl->flags)))
+ !block_group_bits(cluster_bg, ffe_ctl->flags) ||
+ !find_free_extent_check_size_class(ffe_ctl, cluster_bg)))
goto release_cluster;
offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
@@ -4227,21 +4245,6 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return -ENOSPC;
}
-static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
- struct btrfs_block_group *bg)
-{
- if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
- return true;
- if (!btrfs_block_group_should_use_size_class(bg))
- return true;
- if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
- return true;
- if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
- bg->size_class == BTRFS_BG_SZ_NONE)
- return true;
- return ffe_ctl->size_class == bg->size_class;
-}
-
static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info,
@@ -4782,7 +4785,7 @@ static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
- ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
+ ret = btrfs_remove_from_free_space_tree(trans, bytenr, num_bytes);
if (ret)
return ret;
@@ -4873,7 +4876,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
}
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *node,
+ const struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -4961,7 +4964,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ASSERT(generic_ref.ref_root != BTRFS_TREE_LOG_OBJECTID);
- if (btrfs_is_data_reloc_root(root) && is_fstree(root->relocation_src_root))
+ if (btrfs_is_data_reloc_root(root) && btrfs_is_fstree(root->relocation_src_root))
generic_ref.owning_root = root->relocation_src_root;
btrfs_init_data_ref(&generic_ref, owner, offset, 0, false);
@@ -4983,7 +4986,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
- struct btrfs_squota_delta delta = {
+ const struct btrfs_squota_delta delta = {
.root = root_objectid,
.num_bytes = ins->offset,
.generation = trans->transid,
@@ -5111,11 +5114,11 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (buf->log_index == 0)
btrfs_set_extent_bit(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1,
- EXTENT_DIRTY, NULL);
+ EXTENT_DIRTY_LOG1, NULL);
else
btrfs_set_extent_bit(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1,
- EXTENT_NEW, NULL);
+ EXTENT_DIRTY_LOG2, NULL);
} else {
buf->log_index = -1;
btrfs_set_extent_bit(&trans->transaction->dirty_pages, buf->start,
@@ -5552,7 +5555,7 @@ again:
goto again;
}
- exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent);
+ exists = btrfs_find_delayed_tree_ref(head, btrfs_root_id(root), parent);
mutex_unlock(&head->mutex);
out:
spin_unlock(&delayed_refs->lock);
@@ -5872,15 +5875,20 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
- if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
ret = btrfs_dec_ref(trans, root, eb, 1);
- else
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ } else {
ret = btrfs_dec_ref(trans, root, eb, 0);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- return ret;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
- if (is_fstree(btrfs_root_id(root))) {
+ if (btrfs_is_fstree(btrfs_root_id(root))) {
ret = btrfs_qgroup_trace_leaf_items(trans, eb);
if (ret) {
btrfs_err_rl(fs_info,
@@ -6341,7 +6349,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
btrfs_assert_tree_write_locked(parent);
parent_level = btrfs_header_level(parent);
- atomic_inc(&parent->refs);
+ refcount_inc(&parent->refs);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
@@ -6442,7 +6450,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
/* Check if there are any CHUNK_* bits left */
if (start > device->total_bytes) {
DEBUG_WARN();
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
start, end - start + 1,
btrfs_dev_name(device),
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 72914074c304..82d3a82dc712 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -97,7 +97,7 @@ enum btrfs_inline_ref_type {
};
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
- struct btrfs_extent_inline_ref *iref,
+ const struct btrfs_extent_inline_ref *iref,
enum btrfs_inline_ref_type is_data);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 849199768664..835b0deef9bb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -75,9 +75,9 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
while (!list_empty(&fs_info->allocated_ebs)) {
eb = list_first_entry(&fs_info->allocated_ebs,
struct extent_buffer, leak_list);
- pr_err(
- "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n",
- eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
+ btrfs_err(fs_info,
+ "buffer leak start %llu len %u refs %d bflags %lu owner %llu",
+ eb->start, eb->len, refcount_read(&eb->refs), eb->bflags,
btrfs_header_owner(eb));
list_del(&eb->leak_list);
WARN_ON_ONCE(1);
@@ -110,6 +110,7 @@ struct btrfs_bio_ctrl {
* This is to avoid touching ranges covered by compression/inline.
*/
unsigned long submit_bitmap;
+ struct readahead_control *ractl;
};
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -266,8 +267,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
goto out;
}
range_start = max_t(u64, folio_pos(folio), start);
- range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
- end + 1) - range_start;
+ range_len = min_t(u64, folio_end(folio), end + 1) - range_start;
btrfs_folio_set_lock(fs_info, folio, range_start, range_len);
processed_end = range_start + range_len - 1;
@@ -321,7 +321,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
ASSERT(orig_end > orig_start);
/* The range should at least cover part of the folio */
- ASSERT(!(orig_start >= folio_pos(locked_folio) + folio_size(locked_folio) ||
+ ASSERT(!(orig_start >= folio_end(locked_folio) ||
orig_end <= folio_pos(locked_folio)));
again:
/* step one, find a bunch of delalloc bytes starting at start */
@@ -419,7 +419,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
ASSERT(folio_pos(folio) <= start &&
- start + len <= folio_pos(folio) + folio_size(folio));
+ start + len <= folio_end(folio));
if (uptodate && btrfs_verify_folio(folio, start, len))
btrfs_folio_set_uptodate(fs_info, folio, start, len);
@@ -782,7 +782,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
static int attach_extent_buffer_folio(struct extent_buffer *eb,
struct folio *folio,
- struct btrfs_subpage *prealloc)
+ struct btrfs_folio_state *prealloc)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int ret = 0;
@@ -806,7 +806,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
/* Already mapped, just free prealloc */
if (folio_test_private(folio)) {
- btrfs_free_subpage(prealloc);
+ btrfs_free_folio_state(prealloc);
return 0;
}
@@ -815,7 +815,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb,
folio_attach_private(folio, prealloc);
else
/* Do new allocation to attach subpage */
- ret = btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+ ret = btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
return ret;
}
@@ -831,7 +831,7 @@ int set_folio_extent_mapped(struct folio *folio)
fs_info = folio_to_fs_info(folio);
if (btrfs_is_subpage(fs_info, folio))
- return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
+ return btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA);
folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE);
return 0;
@@ -848,7 +848,7 @@ void clear_folio_extent_mapped(struct folio *folio)
fs_info = folio_to_fs_info(folio);
if (btrfs_is_subpage(fs_info, folio))
- return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
+ return btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA);
folio_detach_private(folio);
}
@@ -882,6 +882,25 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
return em;
}
+
+static void btrfs_readahead_expand(struct readahead_control *ractl,
+ const struct extent_map *em)
+{
+ const u64 ra_pos = readahead_pos(ractl);
+ const u64 ra_end = ra_pos + readahead_length(ractl);
+ const u64 em_end = em->start + em->ram_bytes;
+
+ /* No expansion for holes and inline extents. */
+ if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE)
+ return;
+
+ ASSERT(em_end >= ra_pos,
+ "extent_map %llu %llu ends before current readahead position %llu",
+ em->start, em->len, ra_pos);
+ if (em_end > ra_end)
+ readahead_expand(ractl, ra_pos, em_end - ra_pos);
+}
+
/*
* basic readpage implementation. Locked extent state structs are inserted
* into the tree that are removed when the IO is done (by the end_io
@@ -945,6 +964,16 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
compress_type = btrfs_extent_map_compression(em);
+ /*
+ * Only expand readahead for extents which are already creating
+ * the pages anyway in add_ra_bio_pages, which is compressed
+ * extents in the non subpage case.
+ */
+ if (bio_ctrl->ractl &&
+ !btrfs_is_subpage(fs_info, folio) &&
+ compress_type != BTRFS_COMPRESS_NONE)
+ btrfs_readahead_expand(bio_ctrl->ractl, em);
+
if (compress_type != BTRFS_COMPRESS_NONE)
disk_bytenr = em->disk_bytenr;
else
@@ -1086,7 +1115,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* finished our folio read and unlocked the folio.
*/
if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_pos(folio) + folio_size(folio),
+ u64 range_len = min(folio_end(folio),
ordered->file_offset + ordered->num_bytes) - cur;
ret = true;
@@ -1108,7 +1137,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode,
* So we return true and update @next_ret to the OE/folio boundary.
*/
if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) {
- u64 range_len = min(folio_pos(folio) + folio_size(folio),
+ u64 range_len = min(folio_end(folio),
ordered->file_offset + ordered->num_bytes) - cur;
/*
@@ -1663,7 +1692,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
int ret;
size_t pg_offset;
loff_t i_size = i_size_read(&inode->vfs_inode);
- unsigned long end_index = i_size >> PAGE_SHIFT;
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc);
@@ -1704,7 +1733,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
btrfs_err_rl(fs_info,
"root %lld ino %llu folio %llu is marked dirty without notifying the fs",
- inode->root->root_key.objectid,
+ btrfs_root_id(inode->root),
btrfs_ino(inode), folio_pos(folio));
ret = -EUCLEAN;
goto done;
@@ -1774,7 +1803,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e
*/
spin_lock(&eb->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
unsigned long flags;
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
@@ -1874,7 +1903,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
unsigned long flags;
xas_lock_irqsave(&xas, flags);
@@ -1886,7 +1915,7 @@ static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark)
static void buffer_tree_clear_mark(const struct extent_buffer *eb, xa_mark_t mark)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits);
+ XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
unsigned long flags;
xas_lock_irqsave(&xas, flags);
@@ -1961,7 +1990,7 @@ retry:
if (!eb)
return NULL;
- if (!atomic_inc_not_zero(&eb->refs)) {
+ if (!refcount_inc_not_zero(&eb->refs)) {
xas_reset(xas);
goto retry;
}
@@ -1986,7 +2015,7 @@ static unsigned int buffer_tree_get_ebs_tag(struct btrfs_fs_info *fs_info,
rcu_read_lock();
while ((eb = find_get_eb(&xas, end, tag)) != NULL) {
if (!eb_batch_add(batch, eb)) {
- *start = ((eb->start + eb->len) >> fs_info->sectorsize_bits);
+ *start = ((eb->start + eb->len) >> fs_info->nodesize_bits);
goto out;
}
}
@@ -2008,11 +2037,11 @@ static struct extent_buffer *find_extent_buffer_nolock(
struct btrfs_fs_info *fs_info, u64 start)
{
struct extent_buffer *eb;
- unsigned long index = (start >> fs_info->sectorsize_bits);
+ unsigned long index = (start >> fs_info->nodesize_bits);
rcu_read_lock();
eb = xa_load(&fs_info->buffer_tree, index);
- if (eb && !atomic_inc_not_zero(&eb->refs))
+ if (eb && !refcount_inc_not_zero(&eb->refs))
eb = NULL;
rcu_read_unlock();
return eb;
@@ -2031,10 +2060,7 @@ static void end_bbio_meta_write(struct btrfs_bio *bbio)
}
buffer_tree_clear_mark(eb, PAGECACHE_TAG_WRITEBACK);
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
-
+ clear_and_wake_up_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
bio_put(&bbio->bio);
}
@@ -2085,7 +2111,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
+ u32 range_len = min_t(u64, folio_end(folio),
eb->start + eb->len) - range_start;
folio_lock(folio);
@@ -2114,8 +2140,8 @@ void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
u64 end)
{
struct eb_batch batch;
- unsigned long start_index = (start >> fs_info->sectorsize_bits);
- unsigned long end_index = (end >> fs_info->sectorsize_bits);
+ unsigned long start_index = (start >> fs_info->nodesize_bits);
+ unsigned long end_index = (end >> fs_info->nodesize_bits);
eb_batch_init(&batch);
while (start_index <= end_index) {
@@ -2151,7 +2177,7 @@ int btree_write_cache_pages(struct address_space *mapping,
eb_batch_init(&batch);
if (wbc->range_cyclic) {
- index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->sectorsize_bits);
+ index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->nodesize_bits);
end = -1;
/*
@@ -2160,8 +2186,8 @@ int btree_write_cache_pages(struct address_space *mapping,
*/
scanned = (index == 0);
} else {
- index = (wbc->range_start >> fs_info->sectorsize_bits);
- end = (wbc->range_end >> fs_info->sectorsize_bits);
+ index = (wbc->range_start >> fs_info->nodesize_bits);
+ end = (wbc->range_end >> fs_info->nodesize_bits);
scanned = 1;
}
@@ -2489,7 +2515,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
continue;
}
- cur_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1, end);
+ cur_end = min_t(u64, folio_end(folio) - 1, end);
cur_len = cur_end + 1 - cur;
ASSERT(folio_test_locked(folio));
@@ -2541,7 +2567,10 @@ int btrfs_writepages(struct address_space *mapping, struct writeback_control *wb
void btrfs_readahead(struct readahead_control *rac)
{
- struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD };
+ struct btrfs_bio_ctrl bio_ctrl = {
+ .opf = REQ_OP_READ | REQ_RAHEAD,
+ .ractl = rac
+ };
struct folio *folio;
struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
const u64 start = readahead_pos(rac);
@@ -2731,13 +2760,13 @@ static int extent_buffer_under_io(const struct extent_buffer *eb)
static bool folio_range_has_eb(struct folio *folio)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
lockdep_assert_held(&folio->mapping->i_private_lock);
if (folio_test_private(folio)) {
- subpage = folio_get_private(folio);
- if (atomic_read(&subpage->eb_refs))
+ bfs = folio_get_private(folio);
+ if (atomic_read(&bfs->eb_refs))
return true;
}
return false;
@@ -2787,7 +2816,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
* attached to one dummy eb, no sharing.
*/
if (!mapped) {
- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
return;
}
@@ -2798,7 +2827,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
* page range and no unfinished IO.
*/
if (!folio_range_has_eb(folio))
- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
spin_unlock(&mapping->i_private_lock);
}
@@ -2842,7 +2871,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info
btrfs_leak_debug_add_eb(eb);
spin_lock_init(&eb->refs_lock);
- atomic_set(&eb->refs, 1);
+ refcount_set(&eb->refs, 1);
ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE);
@@ -2975,13 +3004,13 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* once io is initiated, TREE_REF can no longer be cleared, so that is
* the moment at which any such race is best fixed.
*/
- refs = atomic_read(&eb->refs);
+ refs = refcount_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
return;
spin_lock(&eb->refs_lock);
if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_inc(&eb->refs);
+ refcount_inc(&eb->refs);
spin_unlock(&eb->refs_lock);
}
@@ -3038,7 +3067,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
eb->fs_info = fs_info;
again:
xa_lock_irq(&fs_info->buffer_tree);
- exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->sectorsize_bits,
+ exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->nodesize_bits,
NULL, eb, GFP_NOFS);
if (xa_is_err(exists)) {
ret = xa_err(exists);
@@ -3047,7 +3076,7 @@ again:
return ERR_PTR(ret);
}
if (exists) {
- if (!atomic_inc_not_zero(&exists->refs)) {
+ if (!refcount_inc_not_zero(&exists->refs)) {
/* The extent buffer is being freed, retry. */
xa_unlock_irq(&fs_info->buffer_tree);
goto again;
@@ -3092,7 +3121,7 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info,
* just overwrite folio private.
*/
exists = folio_get_private(folio);
- if (atomic_inc_not_zero(&exists->refs))
+ if (refcount_inc_not_zero(&exists->refs))
return exists;
WARN_ON(folio_test_dirty(folio));
@@ -3141,13 +3170,13 @@ static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
* The caller needs to free the existing folios and retry using the same order.
*/
static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
- struct btrfs_subpage *prealloc,
+ struct btrfs_folio_state *prealloc,
struct extent_buffer **found_eb_ret)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
- const unsigned long index = eb->start >> PAGE_SHIFT;
+ const pgoff_t index = eb->start >> PAGE_SHIFT;
struct folio *existing_folio;
int ret;
@@ -3224,7 +3253,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
int attached = 0;
struct extent_buffer *eb;
struct extent_buffer *existing_eb = NULL;
- struct btrfs_subpage *prealloc = NULL;
+ struct btrfs_folio_state *prealloc = NULL;
u64 lockdep_owner = owner_root;
bool page_contig = true;
int uptodate = 1;
@@ -3269,7 +3298,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
* manually if we exit earlier.
*/
if (btrfs_meta_is_subpage(fs_info)) {
- prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA);
+ prealloc = btrfs_alloc_folio_state(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA);
if (IS_ERR(prealloc)) {
ret = PTR_ERR(prealloc);
goto out;
@@ -3280,7 +3309,7 @@ reallocate:
/* Allocate all pages first. */
ret = alloc_eb_folio_array(eb, true);
if (ret < 0) {
- btrfs_free_subpage(prealloc);
+ btrfs_free_folio_state(prealloc);
goto out;
}
@@ -3354,7 +3383,7 @@ reallocate:
again:
xa_lock_irq(&fs_info->buffer_tree);
existing_eb = __xa_cmpxchg(&fs_info->buffer_tree,
- start >> fs_info->sectorsize_bits, NULL, eb,
+ start >> fs_info->nodesize_bits, NULL, eb,
GFP_NOFS);
if (xa_is_err(existing_eb)) {
ret = xa_err(existing_eb);
@@ -3362,7 +3391,7 @@ again:
goto out;
}
if (existing_eb) {
- if (!atomic_inc_not_zero(&existing_eb->refs)) {
+ if (!refcount_inc_not_zero(&existing_eb->refs)) {
xa_unlock_irq(&fs_info->buffer_tree);
goto again;
}
@@ -3391,7 +3420,7 @@ again:
return eb;
out:
- WARN_ON(!atomic_dec_and_test(&eb->refs));
+ WARN_ON(!refcount_dec_and_test(&eb->refs));
/*
* Any attached folios need to be detached before we unlock them. This
@@ -3437,8 +3466,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
{
lockdep_assert_held(&eb->refs_lock);
- WARN_ON(atomic_read(&eb->refs) == 0);
- if (atomic_dec_and_test(&eb->refs)) {
+ if (refcount_dec_and_test(&eb->refs)) {
struct btrfs_fs_info *fs_info = eb->fs_info;
spin_unlock(&eb->refs_lock);
@@ -3458,7 +3486,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
* in this case.
*/
xa_cmpxchg_irq(&fs_info->buffer_tree,
- eb->start >> fs_info->sectorsize_bits, eb, NULL,
+ eb->start >> fs_info->nodesize_bits, eb, NULL,
GFP_ATOMIC);
btrfs_leak_debug_del_eb(eb);
@@ -3484,22 +3512,26 @@ void free_extent_buffer(struct extent_buffer *eb)
if (!eb)
return;
- refs = atomic_read(&eb->refs);
+ refs = refcount_read(&eb->refs);
while (1) {
- if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
- || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
- refs == 1))
+ if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) {
+ if (refs == 1)
+ break;
+ } else if (refs <= 3) {
break;
- if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1))
+ }
+
+ /* Optimization to avoid locking eb->refs_lock. */
+ if (atomic_try_cmpxchg(&eb->refs.refs, &refs, refs - 1))
return;
}
spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) == 2 &&
+ if (refcount_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
!extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
+ refcount_dec(&eb->refs);
/*
* I know this is terrible, but it's temporary until we stop tracking
@@ -3516,9 +3548,9 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
- if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
+ if (refcount_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
+ refcount_dec(&eb->refs);
release_extent_buffer(eb);
}
@@ -3576,7 +3608,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
btree_clear_folio_dirty_tag(folio);
folio_unlock(folio);
}
- WARN_ON(atomic_read(&eb->refs) == 0);
+ WARN_ON(refcount_read(&eb->refs) == 0);
}
void set_extent_buffer_dirty(struct extent_buffer *eb)
@@ -3587,7 +3619,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
- WARN_ON(atomic_read(&eb->refs) == 0);
+ WARN_ON(refcount_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
@@ -3646,9 +3678,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
static void clear_extent_buffer_reading(struct extent_buffer *eb)
{
- clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+ clear_and_wake_up_bit(EXTENT_BUFFER_READING, &eb->bflags);
}
static void end_bbio_meta_read(struct btrfs_bio *bbio)
@@ -3713,7 +3743,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
- atomic_inc(&eb->refs);
+ refcount_inc(&eb->refs);
bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
REQ_OP_READ | REQ_META, eb->fs_info,
@@ -3725,7 +3755,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
- u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
+ u32 range_len = min_t(u64, folio_end(folio),
eb->start + eb->len) - range_start;
bio_add_folio_nofail(&bbio->bio, folio, range_len,
@@ -4104,8 +4134,8 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb,
* @start: offset of the bitmap item in the extent buffer
* @nr: bit number to test
*/
-int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
- unsigned long nr)
+bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
{
unsigned long i;
size_t offset;
@@ -4296,9 +4326,9 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
{
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
struct extent_buffer *eb;
- unsigned long start = (folio_pos(folio) >> fs_info->sectorsize_bits);
+ unsigned long start = (folio_pos(folio) >> fs_info->nodesize_bits);
unsigned long index = start;
- unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1;
+ unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1;
int ret;
xa_lock_irq(&fs_info->buffer_tree);
@@ -4308,11 +4338,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* won't disappear out from under us.
*/
spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
continue;
}
- xa_unlock_irq(&fs_info->buffer_tree);
/*
* If tree ref isn't set then we know the ref on this eb is a
@@ -4329,6 +4358,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* check the folio private at the end. And
* release_extent_buffer() will release the refs_lock.
*/
+ xa_unlock_irq(&fs_info->buffer_tree);
release_extent_buffer(eb);
xa_lock_irq(&fs_info->buffer_tree);
}
@@ -4374,7 +4404,7 @@ int try_release_extent_buffer(struct folio *folio)
* this page.
*/
spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
spin_unlock(&folio->mapping->i_private_lock);
return 0;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e36e8d6a00bc..61130786b9a3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -98,7 +98,7 @@ struct extent_buffer {
void *addr;
spinlock_t refs_lock;
- atomic_t refs;
+ refcount_t refs;
int read_mirror;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
s8 log_index;
@@ -345,8 +345,8 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long len);
void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
unsigned long len);
-int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
- unsigned long pos);
+bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
+ unsigned long pos);
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 02bfdb976e40..57f52585a6dd 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -84,7 +84,7 @@ static void remove_em(struct btrfs_inode *inode, struct extent_map *em)
rb_erase(&em->rb_node, &inode->extent_tree.root);
RB_CLEAR_NODE(&em->rb_node);
- if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
+ if (!btrfs_is_testing(fs_info) && btrfs_is_fstree(btrfs_root_id(inode->root)))
percpu_counter_dec(&fs_info->evictable_extent_maps);
}
@@ -502,7 +502,7 @@ static int add_extent_mapping(struct btrfs_inode *inode,
setup_extent_mapping(inode, em, modified);
- if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(root)))
+ if (!btrfs_is_testing(fs_info) && btrfs_is_fstree(btrfs_root_id(root)))
percpu_counter_inc(&fs_info->evictable_extent_maps);
return 0;
@@ -1337,7 +1337,7 @@ static void btrfs_extent_map_shrinker_worker(struct work_struct *work)
if (!root)
continue;
- if (is_fstree(btrfs_root_id(root)))
+ if (btrfs_is_fstree(btrfs_root_id(root)))
nr_dropped += btrfs_scan_root(root, &ctx);
btrfs_put_root(root);
diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c
index 43bf0979fd53..7935586a9dbd 100644
--- a/fs/btrfs/fiemap.c
+++ b/fs/btrfs/fiemap.c
@@ -320,7 +320,7 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p
* the cost of allocating a new one.
*/
ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
- atomic_inc(&clone->refs);
+ refcount_inc(&clone->refs);
ret = btrfs_next_leaf(inode->root, path);
if (ret != 0)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 54d523d4f421..c09fbc257634 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -427,7 +427,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
memset(csum_dst, 0, csum_size);
count = 1;
- if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ if (btrfs_is_data_reloc_root(inode->root)) {
u64 file_offset = bbio->file_offset + bio_offset;
btrfs_set_extent_bit(&inode->io_tree, file_offset,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8ce6f45f45e0..bc1e00db96c9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -89,8 +89,7 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos
num_bytes = round_up(write_bytes + pos - start_pos,
fs_info->sectorsize);
ASSERT(num_bytes <= U32_MAX);
- ASSERT(folio_pos(folio) <= pos &&
- folio_pos(folio) + folio_size(folio) >= pos + write_bytes);
+ ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes);
end_of_last_block = start_pos + num_bytes - 1;
@@ -801,7 +800,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
u64 len)
{
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
- u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
+ u64 clamp_end = min_t(u64, pos + len, folio_end(folio));
const u32 blocksize = inode_to_fs_info(inode)->sectorsize;
int ret = 0;
@@ -857,7 +856,7 @@ static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_
loff_t pos, size_t write_bytes,
bool nowait)
{
- unsigned long index = pos >> PAGE_SHIFT;
+ const pgoff_t index = pos >> PAGE_SHIFT;
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN) |
fgf_set_order(write_bytes);
@@ -963,6 +962,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
* @pos: File offset.
* @write_bytes: The length to write, will be updated to the nocow writeable
* range.
+ * @nowait: Indicate if we can block or not (non-blocking IO context).
*
* This function will flush ordered extents in the range to ensure proper
* nocow checks.
@@ -971,7 +971,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
* > 0 If we can nocow, and updates @write_bytes.
* 0 If we can't do a nocow write.
* -EAGAIN If we can't do a nocow write because snapshoting of the inode's
- * root is in progress.
+ * root is in progress or because we are in a non-blocking IO
+ * context and need to block (@nowait is true).
* < 0 If an error happened.
*
* NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0.
@@ -983,8 +984,8 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
struct btrfs_root *root = inode->root;
struct extent_state *cached_state = NULL;
u64 lockstart, lockend;
- u64 num_bytes;
- int ret;
+ u64 cur_offset;
+ int ret = 0;
if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return 0;
@@ -995,7 +996,6 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
lockstart = round_down(pos, fs_info->sectorsize);
lockend = round_up(pos + *write_bytes,
fs_info->sectorsize) - 1;
- num_bytes = lockend - lockstart + 1;
if (nowait) {
if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend,
@@ -1007,14 +1007,36 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend,
&cached_state);
}
- ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, nowait);
- if (ret <= 0)
- btrfs_drew_write_unlock(&root->snapshot_lock);
- else
- *write_bytes = min_t(size_t, *write_bytes ,
- num_bytes - pos + lockstart);
+
+ cur_offset = lockstart;
+ while (cur_offset < lockend) {
+ u64 num_bytes = lockend - cur_offset + 1;
+
+ ret = can_nocow_extent(inode, cur_offset, &num_bytes, NULL, nowait);
+ if (ret <= 0) {
+ /*
+ * If cur_offset == lockstart it means we haven't found
+ * any extent against which we can NOCOW, so unlock the
+ * snapshot lock.
+ */
+ if (cur_offset == lockstart)
+ btrfs_drew_write_unlock(&root->snapshot_lock);
+ break;
+ }
+ cur_offset += num_bytes;
+ }
+
btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ /*
+ * cur_offset > lockstart means there's at least a partial range we can
+ * NOCOW, and that range can cover one or more extents.
+ */
+ if (cur_offset > lockstart) {
+ *write_bytes = min_t(size_t, *write_bytes, cur_offset - pos);
+ return 1;
+ }
+
return ret;
}
@@ -1233,8 +1255,8 @@ again:
* The reserved range goes beyond the current folio, shrink the reserved
* space to the folio boundary.
*/
- if (reserved_start + reserved_len > folio_pos(folio) + folio_size(folio)) {
- const u64 last_block = folio_pos(folio) + folio_size(folio);
+ if (reserved_start + reserved_len > folio_end(folio)) {
+ const u64 last_block = folio_end(folio);
shrink_reserved_space(inode, *data_reserved, reserved_start,
reserved_len, last_block - reserved_start,
@@ -1832,9 +1854,9 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct folio *folio = page_folio(page);
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_inode *inode = BTRFS_I(file_inode(vmf->vma->vm_file));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
@@ -1842,6 +1864,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
loff_t size;
size_t fsize = folio_size(folio);
int ret;
+ bool only_release_metadata = false;
u64 reserved_space;
u64 page_start;
u64 page_end;
@@ -1849,7 +1872,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
reserved_space = fsize;
- sb_start_pagefault(inode->i_sb);
+ sb_start_pagefault(inode->vfs_inode.i_sb);
page_start = folio_pos(folio);
page_end = page_start + folio_size(folio) - 1;
end = page_end;
@@ -1862,20 +1885,43 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
* end up waiting indefinitely to get a lock on the page currently
* being processed by btrfs_page_mkwrite() function.
*/
- ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
- page_start, reserved_space);
- if (ret < 0)
+ ret = btrfs_check_data_free_space(inode, &data_reserved, page_start,
+ reserved_space, false);
+ if (ret < 0) {
+ size_t write_bytes = reserved_space;
+
+ if (btrfs_check_nocow_lock(inode, page_start, &write_bytes, false) <= 0)
+ goto out_noreserve;
+
+ only_release_metadata = true;
+
+ /*
+ * Can't write the whole range, there may be shared extents or
+ * holes in the range, bail out with @only_release_metadata set
+ * to true so that we unlock the nocow lock before returning the
+ * error.
+ */
+ if (write_bytes < reserved_space)
+ goto out_noreserve;
+ }
+ ret = btrfs_delalloc_reserve_metadata(inode, reserved_space,
+ reserved_space, false);
+ if (ret < 0) {
+ if (!only_release_metadata)
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ page_start, reserved_space);
goto out_noreserve;
+ }
ret = file_update_time(vmf->vma->vm_file);
if (ret < 0)
goto out;
again:
- down_read(&BTRFS_I(inode)->i_mmap_lock);
+ down_read(&inode->i_mmap_lock);
folio_lock(folio);
- size = i_size_read(inode);
+ size = i_size_read(&inode->vfs_inode);
- if ((folio->mapping != inode->i_mapping) ||
+ if ((folio->mapping != inode->vfs_inode.i_mapping) ||
(page_start >= size)) {
/* Page got truncated out from underneath us. */
goto out_unlock;
@@ -1893,11 +1939,11 @@ again:
* We can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish.
*/
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, fsize);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, fsize);
if (ordered) {
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
folio_unlock(folio);
- up_read(&BTRFS_I(inode)->i_mmap_lock);
+ up_read(&inode->i_mmap_lock);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
@@ -1906,10 +1952,14 @@ again:
if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
reserved_space = round_up(size - page_start, fs_info->sectorsize);
if (reserved_space < fsize) {
+ const u64 to_free = fsize - reserved_space;
+
end = page_start + reserved_space - 1;
- btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved, end + 1,
- fsize - reserved_space, true);
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(inode, to_free, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved,
+ end + 1, to_free, true);
}
}
@@ -1920,12 +1970,11 @@ again:
* clear any delalloc bits within this page range since we have to
* reserve data&meta space before lock_page() (see above comments).
*/
- btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
+ btrfs_clear_extent_bit(io_tree, page_start, end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, &cached_state);
- ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
- &cached_state);
+ ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state);
if (ret < 0) {
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
goto out_unlock;
@@ -1944,26 +1993,38 @@ again:
btrfs_folio_set_dirty(fs_info, folio, page_start, end + 1 - page_start);
btrfs_folio_set_uptodate(fs_info, folio, page_start, end + 1 - page_start);
- btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
+ btrfs_set_inode_last_sub_trans(inode);
+
+ if (only_release_metadata)
+ btrfs_set_extent_bit(io_tree, page_start, end, EXTENT_NORESERVE,
+ &cached_state);
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
- up_read(&BTRFS_I(inode)->i_mmap_lock);
+ up_read(&inode->i_mmap_lock);
- btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
- sb_end_pagefault(inode->i_sb);
+ btrfs_delalloc_release_extents(inode, fsize);
+ if (only_release_metadata)
+ btrfs_check_nocow_unlock(inode);
+ sb_end_pagefault(inode->vfs_inode.i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
out_unlock:
folio_unlock(folio);
- up_read(&BTRFS_I(inode)->i_mmap_lock);
+ up_read(&inode->i_mmap_lock);
out:
- btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
- btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
- reserved_space, true);
+ btrfs_delalloc_release_extents(inode, fsize);
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(inode, reserved_space, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ reserved_space, true);
extent_changeset_free(data_reserved);
out_noreserve:
- sb_end_pagefault(inode->i_sb);
+ if (only_release_metadata)
+ btrfs_check_nocow_unlock(inode);
+
+ sb_end_pagefault(inode->vfs_inode.i_sb);
if (ret < 0)
return vmf_error(ret);
@@ -2195,7 +2256,7 @@ static bool check_range_has_page(struct inode *inode, u64 start, u64 end)
if (folio->index < start_index)
continue;
/* A large folio extends beyond the end. Not a target. */
- if (folio->index + folio_nr_pages(folio) > end_index)
+ if (folio_next_index(folio) > end_index)
continue;
/* A folio doesn't cover the head/tail index. Found a target. */
ret = true;
@@ -2341,7 +2402,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1);
u64 ino_size = round_up(inode->vfs_inode.i_size, fs_info->sectorsize);
struct btrfs_trans_handle *trans = NULL;
- struct btrfs_block_rsv *rsv;
+ struct btrfs_block_rsv rsv;
unsigned int rsv_count;
u64 cur_offset;
u64 len = end - start;
@@ -2350,13 +2411,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
if (end <= start)
return -EINVAL;
- rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
- if (!rsv) {
- ret = -ENOMEM;
- goto out;
- }
- rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1);
- rsv->failfast = true;
+ btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP);
+ rsv.size = btrfs_calc_insert_metadata_size(fs_info, 1);
+ rsv.failfast = true;
/*
* 1 - update the inode
@@ -2373,14 +2430,14 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
- goto out_free;
+ goto out_release;
}
- ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
+ ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, &rsv,
min_size, false);
if (WARN_ON(ret))
goto out_trans;
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
cur_offset = start;
drop_args.path = path;
@@ -2496,10 +2553,10 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
}
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
- rsv, min_size, false);
+ &rsv, min_size, false);
if (WARN_ON(ret))
break;
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
cur_offset = drop_args.drop_end;
len = end - cur_offset;
@@ -2576,16 +2633,15 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
out_trans:
if (!trans)
- goto out_free;
+ goto out_release;
trans->block_rsv = &fs_info->trans_block_rsv;
if (ret)
btrfs_end_transaction(trans);
else
*trans_out = trans;
-out_free:
- btrfs_free_block_rsv(fs_info, rsv);
-out:
+out_release:
+ btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL);
return ret;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4b34ea1f01c2..5d8d1570a5c9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -366,7 +366,7 @@ fail:
static void readahead_cache(struct inode *inode)
{
struct file_ra_state ra;
- unsigned long last_index;
+ pgoff_t last_index;
file_ra_state_init(&ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -3192,7 +3192,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- int err;
+ int ret2;
u64 search_start = cluster->window_start;
u64 search_bytes = bytes;
u64 ret = 0;
@@ -3200,8 +3200,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
search_start = min_start;
search_bytes = bytes;
- err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
- if (err) {
+ ret2 = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
+ if (ret2) {
*max_extent_size = max(get_max_extent_size(entry),
*max_extent_size);
return 0;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 0c573d46639a..eba7f22ae49c 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -35,7 +35,7 @@ static struct btrfs_root *btrfs_free_space_root(
return btrfs_global_root(block_group->fs_info, &key);
}
-void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
+void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
{
u32 bitmap_range;
size_t bitmap_size;
@@ -82,22 +82,19 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
if (ret)
- goto out;
+ return ret;
leaf = path->nodes[0];
info = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_info);
btrfs_set_free_space_extent_count(leaf, info, 0);
btrfs_set_free_space_flags(leaf, info, 0);
-
- ret = 0;
-out:
btrfs_release_path(path);
- return ret;
+ return 0;
}
EXPORT_FOR_TESTS
-struct btrfs_free_space_info *search_free_space_info(
+struct btrfs_free_space_info *btrfs_search_free_space_info(
struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path, int cow)
@@ -201,9 +198,9 @@ static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
}
EXPORT_FOR_TESTS
-int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path)
+int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = btrfs_free_space_root(block_group);
@@ -281,7 +278,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
- info = search_free_space_info(trans, block_group, path, 1);
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
btrfs_abort_transaction(trans, ret);
@@ -290,6 +287,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
flags = btrfs_free_space_flags(leaf, info);
flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
+ block_group->using_free_space_bitmaps = true;
+ block_group->using_free_space_bitmaps_cached = true;
btrfs_set_free_space_flags(leaf, info, flags);
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
@@ -343,9 +342,9 @@ out:
}
EXPORT_FOR_TESTS
-int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path)
+int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = btrfs_free_space_root(block_group);
@@ -409,12 +408,12 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
data_size = free_space_bitmap_size(fs_info,
found_key.offset);
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
+ path->slots[0]--;
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
read_extent_buffer(leaf, bitmap_cursor, ptr,
data_size);
nr++;
- path->slots[0]--;
} else {
ASSERT(0);
}
@@ -428,7 +427,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
- info = search_free_space_info(trans, block_group, path, 1);
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
btrfs_abort_transaction(trans, ret);
@@ -437,20 +436,22 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
flags = btrfs_free_space_flags(leaf, info);
flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
+ block_group->using_free_space_bitmaps = false;
+ block_group->using_free_space_bitmaps_cached = true;
btrfs_set_free_space_flags(leaf, info, flags);
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
- nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
+ nrbits = block_group->length >> fs_info->sectorsize_bits;
start_bit = find_next_bit_le(bitmap, nrbits, 0);
while (start_bit < nrbits) {
end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
ASSERT(start_bit < end_bit);
- key.objectid = start + start_bit * block_group->fs_info->sectorsize;
+ key.objectid = start + start_bit * fs_info->sectorsize;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
- key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
+ key.offset = (end_bit - start_bit) * fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret) {
@@ -493,11 +494,10 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (new_extents == 0)
return 0;
- info = search_free_space_info(trans, block_group, path, 1);
- if (IS_ERR(info)) {
- ret = PTR_ERR(info);
- goto out;
- }
+ info = btrfs_search_free_space_info(trans, block_group, path, 1);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
flags = btrfs_free_space_flags(path->nodes[0], info);
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
@@ -507,19 +507,18 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count > block_group->bitmap_high_thresh) {
- ret = convert_free_space_to_bitmaps(trans, block_group, path);
+ ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count < block_group->bitmap_low_thresh) {
- ret = convert_free_space_to_extents(trans, block_group, path);
+ ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
}
-out:
return ret;
}
EXPORT_FOR_TESTS
-int free_space_test_bit(struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 offset)
+bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 offset)
{
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -537,13 +536,13 @@ int free_space_test_bit(struct btrfs_block_group *block_group,
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
i = div_u64(offset - found_start,
block_group->fs_info->sectorsize);
- return !!extent_buffer_test_bit(leaf, ptr, i);
+ return extent_buffer_test_bit(leaf, ptr, i);
}
-static void free_space_set_bits(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 *start, u64 *size,
- int bit)
+static void free_space_modify_bits(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 *start, u64 *size,
+ bool set_bits)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_buffer *leaf;
@@ -567,7 +566,7 @@ static void free_space_set_bits(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
first = (*start - found_start) >> fs_info->sectorsize_bits;
last = (end - found_start) >> fs_info->sectorsize_bits;
- if (bit)
+ if (set_bits)
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
else
extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
@@ -611,13 +610,14 @@ static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path,
- u64 start, u64 size, int remove)
+ u64 start, u64 size, bool remove)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_key key;
u64 end = start + size;
u64 cur_start, cur_size;
- int prev_bit, next_bit;
+ bool prev_bit_set = false;
+ bool next_bit_set = false;
int new_extents;
int ret;
@@ -634,16 +634,16 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
if (ret)
- goto out;
+ return ret;
- prev_bit = free_space_test_bit(block_group, path, prev_block);
+ prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
/* The previous block may have been in the previous bitmap. */
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (start >= key.objectid + key.offset) {
ret = free_space_next_bitmap(trans, root, path);
if (ret)
- goto out;
+ return ret;
}
} else {
key.objectid = start;
@@ -652,9 +652,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
if (ret)
- goto out;
-
- prev_bit = -1;
+ return ret;
}
/*
@@ -664,13 +662,13 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
cur_start = start;
cur_size = size;
while (1) {
- free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
- !remove);
+ free_space_modify_bits(trans, block_group, path, &cur_start,
+ &cur_size, !remove);
if (cur_size == 0)
break;
ret = free_space_next_bitmap(trans, root, path);
if (ret)
- goto out;
+ return ret;
}
/*
@@ -683,42 +681,36 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
if (end >= key.objectid + key.offset) {
ret = free_space_next_bitmap(trans, root, path);
if (ret)
- goto out;
+ return ret;
}
- next_bit = free_space_test_bit(block_group, path, end);
- } else {
- next_bit = -1;
+ next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
}
if (remove) {
new_extents = -1;
- if (prev_bit == 1) {
+ if (prev_bit_set) {
/* Leftover on the left. */
new_extents++;
}
- if (next_bit == 1) {
+ if (next_bit_set) {
/* Leftover on the right. */
new_extents++;
}
} else {
new_extents = 1;
- if (prev_bit == 1) {
+ if (prev_bit_set) {
/* Merging with neighbor on the left. */
new_extents--;
}
- if (next_bit == 1) {
+ if (next_bit_set) {
/* Merging with neighbor on the right. */
new_extents--;
}
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, block_group, path,
- new_extents);
-
-out:
- return ret;
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
}
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
@@ -739,7 +731,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
- goto out;
+ return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -771,7 +763,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
/* Delete the existing key (cases 1-4). */
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ return ret;
/* Add a key for leftovers at the beginning (cases 3 and 4). */
if (start > found_start) {
@@ -782,7 +774,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
- goto out;
+ return ret;
new_extents++;
}
@@ -795,50 +787,58 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
- goto out;
+ return ret;
new_extents++;
}
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, block_group, path,
- new_extents);
-
-out:
- return ret;
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
}
-EXPORT_FOR_TESTS
-int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 start, u64 size)
+static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
{
struct btrfs_free_space_info *info;
u32 flags;
- int ret;
- if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
- ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- return ret;
- }
+ if (bg->using_free_space_bitmaps_cached)
+ return bg->using_free_space_bitmaps;
- info = search_free_space_info(NULL, block_group, path, 0);
+ info = btrfs_search_free_space_info(NULL, bg, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
btrfs_release_path(path);
- if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
+ bg->using_free_space_bitmaps_cached = true;
+
+ return bg->using_free_space_bitmaps;
+}
+
+EXPORT_FOR_TESTS
+int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
+{
+ int ret;
+
+ ret = __add_block_group_free_space(trans, block_group, path);
+ if (ret)
+ return ret;
+
+ ret = using_bitmaps(block_group, path);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
return modify_free_space_bitmap(trans, block_group, path,
- start, size, 1);
- } else {
- return remove_free_space_extent(trans, block_group, path,
- start, size);
- }
+ start, size, true);
+
+ return remove_free_space_extent(trans, block_group, path, start, size);
}
-int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- u64 start, u64 size)
+int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size)
{
struct btrfs_block_group *block_group;
struct btrfs_path *path;
@@ -863,8 +863,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __remove_from_free_space_tree(trans, block_group, path, start,
- size);
+ ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -918,7 +917,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans,
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
- goto out;
+ return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -941,7 +940,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans,
if (found_end == start) {
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ return ret;
new_key.objectid = found_start;
new_key.offset += key.offset;
new_extents--;
@@ -958,7 +957,7 @@ right:
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
- goto out;
+ return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -982,7 +981,7 @@ right:
if (found_start == end) {
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ return ret;
new_key.offset += key.offset;
new_extents--;
}
@@ -992,48 +991,36 @@ insert:
/* Insert the new key (cases 1-4). */
ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
if (ret)
- goto out;
+ return ret;
btrfs_release_path(path);
- ret = update_free_space_extent_count(trans, block_group, path,
- new_extents);
-
-out:
- return ret;
+ return update_free_space_extent_count(trans, block_group, path, new_extents);
}
EXPORT_FOR_TESTS
-int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 start, u64 size)
+int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size)
{
- struct btrfs_free_space_info *info;
- u32 flags;
int ret;
- if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
- ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- return ret;
- }
+ ret = __add_block_group_free_space(trans, block_group, path);
+ if (ret)
+ return ret;
- info = search_free_space_info(NULL, block_group, path, 0);
- if (IS_ERR(info))
- return PTR_ERR(info);
- flags = btrfs_free_space_flags(path->nodes[0], info);
- btrfs_release_path(path);
+ ret = using_bitmaps(block_group, path);
+ if (ret < 0)
+ return ret;
- if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+ if (ret)
return modify_free_space_bitmap(trans, block_group, path,
- start, size, 0);
- } else {
- return add_free_space_extent(trans, block_group, path, start,
- size);
- }
+ start, size, false);
+
+ return add_free_space_extent(trans, block_group, path, start, size);
}
-int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- u64 start, u64 size)
+int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size)
{
struct btrfs_block_group *block_group;
struct btrfs_path *path;
@@ -1058,7 +1045,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans,
}
mutex_lock(&block_group->free_space_lock);
- ret = __add_to_free_space_tree(trans, block_group, path, start, size);
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -1115,11 +1102,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
if (ret < 0)
goto out_locked;
- ASSERT(ret == 0);
+ /*
+ * If ret is 1 (no key found), it means this is an empty block group,
+ * without any extents allocated from it and there's no block group
+ * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
+ * because we are using the block group tree feature, so block group
+ * items are stored in the block group tree. It also means there are no
+ * extents allocated for block groups with a start offset beyond this
+ * block group's end offset (this is the last, highest, block group).
+ */
+ if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
+ ASSERT(ret == 0);
start = block_group->start;
end = block_group->start + block_group->length;
- while (1) {
+ while (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@@ -1128,11 +1125,11 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
break;
if (start < key.objectid) {
- ret = __add_to_free_space_tree(trans,
- block_group,
- path2, start,
- key.objectid -
- start);
+ ret = __btrfs_add_to_free_space_tree(trans,
+ block_group,
+ path2, start,
+ key.objectid -
+ start);
if (ret)
goto out_locked;
}
@@ -1149,12 +1146,10 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_next_item(extent_root, path);
if (ret < 0)
goto out_locked;
- if (ret)
- break;
}
if (start < end) {
- ret = __add_to_free_space_tree(trans, block_group, path2,
- start, end - start);
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
+ start, end - start);
if (ret)
goto out_locked;
}
@@ -1233,6 +1228,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
{
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
+ struct rb_node *node;
int nr;
int ret;
@@ -1261,6 +1257,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
+ node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *bg;
+
+ bg = rb_entry(node, struct btrfs_block_group, cache_node);
+ clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
+ node = rb_next(node);
+ cond_resched();
+ }
+
return 0;
}
@@ -1350,12 +1356,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
+
+ if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
+ &block_group->runtime_flags))
+ goto next;
+
ret = populate_free_space_tree(trans, block_group);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
+next:
if (btrfs_should_end_transaction(trans)) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(free_space_root, 1);
@@ -1378,51 +1390,79 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
+ bool own_path = false;
int ret;
- clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
+ if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ &block_group->runtime_flags))
+ return 0;
+
+ /*
+ * While rebuilding the free space tree we may allocate new metadata
+ * block groups while modifying the free space tree.
+ *
+ * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
+ * can use multiple transactions, every time btrfs_end_transaction() is
+ * called at btrfs_rebuild_free_space_tree() we finish the creation of
+ * new block groups by calling btrfs_create_pending_block_groups(), and
+ * that in turn calls us, through add_block_group_free_space(), to add
+ * a free space info item and a free space extent item for the block
+ * group.
+ *
+ * Then later btrfs_rebuild_free_space_tree() may find such new block
+ * groups and processes them with populate_free_space_tree(), which can
+ * fail with EEXIST since there are already items for the block group in
+ * the free space tree. Notice that we say "may find" because a new
+ * block group may be added to the block groups rbtree in a node before
+ * or after the block group currently being processed by the rebuild
+ * process. So signal the rebuild process to skip such new block groups
+ * if it finds them.
+ */
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path) {
+ btrfs_abort_transaction(trans, -ENOMEM);
+ return -ENOMEM;
+ }
+ own_path = true;
+ }
ret = add_new_free_space_info(trans, block_group, path);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
if (ret)
- return ret;
+ btrfs_abort_transaction(trans, ret);
- return __add_to_free_space_tree(trans, block_group, path,
- block_group->start,
- block_group->length);
+out:
+ if (own_path)
+ btrfs_free_path(path);
+
+ return ret;
}
-int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group)
+int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_path *path = NULL;
- int ret = 0;
+ int ret;
- if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
mutex_lock(&block_group->free_space_lock);
- if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
- goto out;
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = __add_block_group_free_space(trans, block_group, path);
-
-out:
- btrfs_free_path(path);
+ ret = __add_block_group_free_space(trans, block_group, NULL);
mutex_unlock(&block_group->free_space_lock);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
-int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group)
+int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_path *path;
@@ -1443,6 +1483,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -1455,8 +1496,10 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
leaf = path->nodes[0];
nr = 0;
@@ -1484,16 +1527,16 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
btrfs_release_path(path);
}
ret = 0;
out:
btrfs_free_path(path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1505,7 +1548,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
struct btrfs_fs_info *fs_info;
struct btrfs_root *root;
struct btrfs_key key;
- int prev_bit = 0, bit;
+ bool prev_bit_set = false;
/* Initialize to silence GCC. */
u64 extent_start = 0;
u64 end, offset;
@@ -1522,7 +1565,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
while (1) {
ret = btrfs_next_item(root, path);
if (ret < 0)
- goto out;
+ return ret;
if (ret)
break;
@@ -1536,10 +1579,12 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
offset = key.objectid;
while (offset < key.objectid + key.offset) {
- bit = free_space_test_bit(block_group, path, offset);
- if (prev_bit == 0 && bit == 1) {
+ bool bit_set;
+
+ bit_set = btrfs_free_space_test_bit(block_group, path, offset);
+ if (!prev_bit_set && bit_set) {
extent_start = offset;
- } else if (prev_bit == 1 && bit == 0) {
+ } else if (prev_bit_set && !bit_set) {
u64 space_added;
ret = btrfs_add_new_free_space(block_group,
@@ -1547,7 +1592,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
offset,
&space_added);
if (ret)
- goto out;
+ return ret;
total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
@@ -1555,14 +1600,14 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
}
extent_count++;
}
- prev_bit = bit;
+ prev_bit_set = bit_set;
offset += fs_info->sectorsize;
}
}
- if (prev_bit == 1) {
+ if (prev_bit_set) {
ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
if (ret)
- goto out;
+ return ret;
extent_count++;
}
@@ -1572,13 +1617,10 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
block_group->start, extent_count,
expected_extent_count);
DEBUG_WARN();
- ret = -EIO;
- goto out;
+ return -EIO;
}
- ret = 0;
-out:
- return ret;
+ return 0;
}
static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
@@ -1605,7 +1647,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
ret = btrfs_next_item(root, path);
if (ret < 0)
- goto out;
+ return ret;
if (ret)
break;
@@ -1621,7 +1663,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
key.objectid + key.offset,
&space_added);
if (ret)
- goto out;
+ return ret;
total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
@@ -1636,16 +1678,13 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
block_group->start, extent_count,
expected_extent_count);
DEBUG_WARN();
- ret = -EIO;
- goto out;
+ return -EIO;
}
- ret = 0;
-out:
- return ret;
+ return 0;
}
-int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
+int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group *block_group;
struct btrfs_free_space_info *info;
@@ -1666,7 +1705,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
path->search_commit_root = 1;
path->reada = READA_FORWARD;
- info = search_free_space_info(NULL, block_group, path, 0);
+ info = btrfs_search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index e6c6d6f4f221..3d9a5d4477fc 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -22,39 +22,39 @@ struct btrfs_trans_handle;
#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
-void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
+void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info);
-int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
-int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group);
-int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group);
-int add_to_free_space_tree(struct btrfs_trans_handle *trans,
- u64 start, u64 size);
-int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- u64 start, u64 size);
+int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl);
+int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group);
+int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group);
+int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size);
+int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ u64 start, u64 size);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *
-search_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path, int cow);
-int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+btrfs_search_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 start, u64 size);
-int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 start, u64 size);
-int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path);
-int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_block_group *block_group,
- struct btrfs_path *path);
-int free_space_test_bit(struct btrfs_block_group *block_group,
- struct btrfs_path *path, u64 offset);
+ struct btrfs_path *path, int cow);
+int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size);
+int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 start, u64 size);
+int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path);
+int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group,
+ struct btrfs_path *path);
+bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
+ struct btrfs_path *path, u64 offset);
#endif
#endif
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 4394de12a767..8cc07cc70b12 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -420,6 +420,8 @@ struct btrfs_commit_stats {
u64 last_commit_dur;
/* The total commit duration in ns */
u64 total_commit_dur;
+ /* Start of the last critical section in ns. */
+ u64 critical_section_start_time;
};
struct btrfs_fs_info {
@@ -713,8 +715,6 @@ struct btrfs_fs_info {
u32 data_chunk_allocations;
u32 metadata_ratio;
- void *bdev_holder;
-
/* Private scrub information */
struct mutex scrub_lock;
atomic_t scrubs_running;
@@ -739,12 +739,6 @@ struct btrfs_fs_info {
spinlock_t qgroup_lock;
/*
- * Used to avoid frequently calling ulist_alloc()/ulist_free()
- * when doing qgroup accounting, it must be protected by qgroup_lock.
- */
- struct ulist *qgroup_ulist;
-
- /*
* Protect user change for quota operations. If a transaction is needed,
* it must be started before locking this lock.
*/
@@ -779,7 +773,7 @@ struct btrfs_fs_info {
struct btrfs_delayed_root *delayed_root;
- /* Entries are eb->start / sectorsize */
+ /* Entries are eb->start >> nodesize_bits */
struct xarray buffer_tree;
/* Next backup root to be overwritten */
@@ -811,6 +805,7 @@ struct btrfs_fs_info {
/* Cached block sizes */
u32 nodesize;
+ u32 nodesize_bits;
u32 sectorsize;
/* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index a61c3540d67b..f06cf701ae5a 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -78,13 +78,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
}
/* Returns NULL if no extref found */
-struct btrfs_inode_extref *
-btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const struct fscrypt_str *name,
- u64 inode_objectid, u64 ref_objectid, int ins_len,
- int cow)
+struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const struct fscrypt_str *name,
+ u64 inode_objectid, u64 ref_objectid)
{
int ret;
struct btrfs_key key;
@@ -93,7 +90,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
@@ -720,13 +717,12 @@ delete:
}
out:
if (ret >= 0 && pending_del_nr) {
- int err;
+ int ret2;
- err = btrfs_del_items(trans, root, path, pending_del_slot,
- pending_del_nr);
- if (err) {
- btrfs_abort_transaction(trans, err);
- ret = err;
+ ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr);
+ if (ret2) {
+ btrfs_abort_transaction(trans, ret2);
+ ret = ret2;
}
}
diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
index c11b97fdccc4..6d9f5ad20646 100644
--- a/fs/btrfs/inode-item.h
+++ b/fs/btrfs/inode-item.h
@@ -101,13 +101,10 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *location, int mod);
-struct btrfs_inode_extref *btrfs_lookup_inode_extref(
- struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const struct fscrypt_str *name,
- u64 inode_objectid, u64 ref_objectid, int ins_len,
- int cow);
+struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const struct fscrypt_str *name,
+ u64 inode_objectid, u64 ref_objectid);
struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
int slot,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c0c778243bf1..b77dd22b8cdb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -308,7 +308,7 @@ static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
const u32 csum_size = root->fs_info->csum_size;
/* For data reloc tree, it's better to do a backref lookup instead. */
- if (btrfs_root_id(root) == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (btrfs_is_data_reloc_root(root))
return print_data_reloc_error(inode, logical_start, csum,
csum_expected, mirror_num);
@@ -395,8 +395,8 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags)
static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
u64 offset, u64 bytes)
{
- unsigned long index = offset >> PAGE_SHIFT;
- unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+ pgoff_t index = offset >> PAGE_SHIFT;
+ const pgoff_t end_index = (offset + bytes - 1) >> PAGE_SHIFT;
struct folio *folio;
while (index <= end_index) {
@@ -423,18 +423,18 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct btrfs_new_inode_args *args)
{
- int err;
+ int ret;
if (args->default_acl) {
- err = __btrfs_set_acl(trans, args->inode, args->default_acl,
+ ret = __btrfs_set_acl(trans, args->inode, args->default_acl,
ACL_TYPE_DEFAULT);
- if (err)
- return err;
+ if (ret)
+ return ret;
}
if (args->acl) {
- err = __btrfs_set_acl(trans, args->inode, args->acl, ACL_TYPE_ACCESS);
- if (err)
- return err;
+ ret = __btrfs_set_acl(trans, args->inode, args->acl, ACL_TYPE_ACCESS);
+ if (ret)
+ return ret;
}
if (!args->default_acl && !args->acl)
cache_no_acl(args->inode);
@@ -781,12 +781,15 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
return 0;
}
+ /* Defrag ioctl takes precedence over mount options and properties. */
+ if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS)
+ return 0;
+ if (BTRFS_COMPRESS_NONE < inode->defrag_compress &&
+ inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES)
+ return 1;
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
- /* defrag ioctl */
- if (inode->defrag_compress)
- return 1;
/* bad compression ratios */
if (inode->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
@@ -808,12 +811,11 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
static int extent_range_clear_dirty_for_io(struct btrfs_inode *inode, u64 start, u64 end)
{
- unsigned long end_index = end >> PAGE_SHIFT;
+ const pgoff_t end_index = end >> PAGE_SHIFT;
struct folio *folio;
int ret = 0;
- for (unsigned long index = start >> PAGE_SHIFT;
- index <= end_index; index++) {
+ for (pgoff_t index = start >> PAGE_SHIFT; index <= end_index; index++) {
folio = filemap_get_folio(inode->vfs_inode.i_mapping, index);
if (IS_ERR(folio)) {
if (!ret)
@@ -943,7 +945,7 @@ again:
goto cleanup_and_bail_uncompressed;
}
- if (inode->defrag_compress) {
+ if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) {
compress_type = inode->defrag_compress;
compress_level = inode->defrag_compress_level;
} else if (inode->prop_compress) {
@@ -1755,7 +1757,8 @@ static int fallback_to_cow(struct btrfs_inode *inode,
spin_unlock(&sinfo->lock);
if (count > 0)
- btrfs_clear_extent_bits(io_tree, start, end, EXTENT_NORESERVE);
+ btrfs_clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
+ &cached_state);
}
btrfs_unlock_extent(io_tree, start, end, &cached_state);
@@ -2328,8 +2331,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
* The range must cover part of the @locked_folio, or a return of 1
* can confuse the caller.
*/
- ASSERT(!(end <= folio_pos(locked_folio) ||
- start >= folio_pos(locked_folio) + folio_size(locked_folio)));
+ ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio)));
if (should_nocow(inode, start, end)) {
ret = run_delalloc_nocow(inode, locked_folio, start, end);
@@ -2737,7 +2739,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct btrfs_inode *inode = fixup->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 page_start = folio_pos(folio);
- u64 page_end = folio_pos(folio) + folio_size(folio) - 1;
+ u64 page_end = folio_end(folio) - 1;
int ret = 0;
bool free_delalloc_space = true;
@@ -2881,7 +2883,7 @@ int btrfs_writepage_cow_fixup(struct folio *folio)
DEBUG_WARN();
btrfs_err_rl(fs_info,
"root %lld ino %llu folio %llu is marked dirty without notifying the fs",
- BTRFS_I(inode)->root->root_key.objectid,
+ btrfs_root_id(BTRFS_I(inode)->root),
btrfs_ino(BTRFS_I(inode)),
folio_pos(folio));
return -EUCLEAN;
@@ -3375,8 +3377,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
btrfs_test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM,
NULL)) {
/* Skip the range without csum for data reloc inode */
- btrfs_clear_extent_bits(&inode->io_tree, file_offset, end,
- EXTENT_NODATASUM);
+ btrfs_clear_extent_bit(&inode->io_tree, file_offset, end,
+ EXTENT_NODATASUM, NULL);
return true;
}
@@ -3946,6 +3948,7 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
&inode->flags, &inode->ro_flags);
btrfs_update_inode_mapping_flags(inode);
+ btrfs_set_inode_mapping_order(inode);
cache_index:
/*
@@ -4078,45 +4081,35 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
- struct btrfs_map_token token;
u64 flags;
- btrfs_init_map_token(&token, leaf);
-
- btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
- btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
- btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
- btrfs_set_token_inode_mode(&token, item, inode->i_mode);
- btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
-
- btrfs_set_token_timespec_sec(&token, &item->atime,
- inode_get_atime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->atime,
- inode_get_atime_nsec(inode));
-
- btrfs_set_token_timespec_sec(&token, &item->mtime,
- inode_get_mtime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->mtime,
- inode_get_mtime_nsec(inode));
-
- btrfs_set_token_timespec_sec(&token, &item->ctime,
- inode_get_ctime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->ctime,
- inode_get_ctime_nsec(inode));
-
- btrfs_set_token_timespec_sec(&token, &item->otime, BTRFS_I(inode)->i_otime_sec);
- btrfs_set_token_timespec_nsec(&token, &item->otime, BTRFS_I(inode)->i_otime_nsec);
-
- btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
- btrfs_set_token_inode_generation(&token, item,
- BTRFS_I(inode)->generation);
- btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
- btrfs_set_token_inode_transid(&token, item, trans->transid);
- btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
+ btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
+ btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
+ btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
+ btrfs_set_inode_mode(leaf, item, inode->i_mode);
+ btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+
+ btrfs_set_timespec_sec(leaf, &item->atime, inode_get_atime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->atime, inode_get_atime_nsec(inode));
+
+ btrfs_set_timespec_sec(leaf, &item->mtime, inode_get_mtime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->mtime, inode_get_mtime_nsec(inode));
+
+ btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
+ btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
+ btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
+ btrfs_set_inode_sequence(leaf, item, inode_peek_iversion(inode));
+ btrfs_set_inode_transid(leaf, item, trans->transid);
+ btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
BTRFS_I(inode)->ro_flags);
- btrfs_set_token_inode_flags(&token, item, flags);
- btrfs_set_token_inode_block_group(&token, item, 0);
+ btrfs_set_inode_flags(leaf, item, flags);
+ btrfs_set_inode_block_group(leaf, item, 0);
}
/*
@@ -4215,20 +4208,22 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
u64 dir_ino = btrfs_ino(dir);
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!path)
+ return -ENOMEM;
di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1);
if (IS_ERR_OR_NULL(di)) {
- ret = di ? PTR_ERR(di) : -ENOENT;
- goto err;
+ btrfs_free_path(path);
+ return di ? PTR_ERR(di) : -ENOENT;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ /*
+ * Down the call chains below we'll also need to allocate a path, so no
+ * need to hold on to this one for longer than necessary.
+ */
+ btrfs_free_path(path);
if (ret)
- goto err;
- btrfs_release_path(path);
+ return ret;
/*
* If we don't have dir index, we have to get it by looking up
@@ -4250,11 +4245,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
if (ret) {
- btrfs_info(fs_info,
- "failed to delete reference to %.*s, inode %llu parent %llu",
- name->len, name->name, ino, dir_ino);
+ btrfs_crit(fs_info,
+ "failed to delete reference to %.*s, root %llu inode %llu parent %llu",
+ name->len, name->name, btrfs_root_id(root), ino, dir_ino);
btrfs_abort_transaction(trans, ret);
- goto err;
+ return ret;
}
skip_backref:
if (rename_ctx)
@@ -4263,7 +4258,7 @@ skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
- goto err;
+ return ret;
}
/*
@@ -4287,19 +4282,14 @@ skip_backref:
* holding.
*/
btrfs_run_delayed_iput(fs_info, inode);
-err:
- btrfs_free_path(path);
- if (ret)
- goto out;
btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
inode_inc_iversion(&inode->vfs_inode);
inode_set_ctime_current(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode));
- ret = btrfs_update_inode(trans, dir);
-out:
- return ret;
+
+ return btrfs_update_inode(trans, dir);
}
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -4704,68 +4694,68 @@ out_up_write:
return ret;
}
-static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int btrfs_rmdir(struct inode *vfs_dir, struct dentry *dentry)
{
- struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_inode *dir = BTRFS_I(vfs_dir);
+ struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret = 0;
struct btrfs_trans_handle *trans;
- u64 last_unlink_trans;
struct fscrypt_name fname;
- if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+ if (inode->vfs_inode.i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
- if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) {
btrfs_err(fs_info,
"extent tree v2 doesn't support snapshot deletion yet");
return -EOPNOTSUPP;
}
- return btrfs_delete_subvolume(BTRFS_I(dir), dentry);
+ return btrfs_delete_subvolume(dir, dentry);
}
- ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+ ret = fscrypt_setup_filename(vfs_dir, &dentry->d_name, 1, &fname);
if (ret)
return ret;
/* This needs to handle no-key deletions later on */
- trans = __unlink_start_trans(BTRFS_I(dir));
+ trans = __unlink_start_trans(dir);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_notrans;
}
- if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to its
+ * parent directory. This is to prevent an unrecoverable log tree in the
+ * case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ *
+ * This is because we can't unlink other roots when replaying the dir
+ * deletes for directory foo.
+ */
+ if (inode->last_unlink_trans >= trans->transid)
+ btrfs_record_snapshot_destroy(trans, dir);
+
+ if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ ret = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
}
- ret = btrfs_orphan_add(trans, BTRFS_I(inode));
+ ret = btrfs_orphan_add(trans, inode);
if (ret)
goto out;
- last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
/* now the directory is empty */
- ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
- &fname.disk_name);
- if (!ret) {
- btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * Propagate the last_unlink_trans value of the deleted dir to
- * its parent directory. This is to prevent an unrecoverable
- * log tree in the case we do something like this:
- * 1) create dir foo
- * 2) create snapshot under dir foo
- * 3) delete the snapshot
- * 4) rmdir foo
- * 5) mkdir foo
- * 6) fsync foo or some file inside foo
- */
- if (last_unlink_trans >= trans->transid)
- BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
- }
+ ret = btrfs_unlink_inode(trans, dir, inode, &fname.disk_name);
+ if (!ret)
+ btrfs_i_size_write(inode, 0);
out:
btrfs_end_transaction(trans);
out_notrans:
@@ -4821,9 +4811,9 @@ again:
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = folio_pos(folio) + folio_size(folio) - 1;
+ zero_end = folio_end(folio);
folio_zero_range(folio, zero_start - folio_pos(folio),
- zero_end - zero_start + 1);
+ zero_end - zero_start);
out_unlock:
folio_unlock(folio);
@@ -4861,7 +4851,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 e
pgoff_t index = (offset >> PAGE_SHIFT);
struct folio *folio;
gfp_t mask = btrfs_alloc_write_mask(mapping);
- size_t write_bytes = blocksize;
int ret = 0;
const bool in_head_block = is_inside_block(offset, round_down(start, blocksize),
blocksize);
@@ -4913,8 +4902,12 @@ int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 e
ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
blocksize, false);
if (ret < 0) {
+ size_t write_bytes = blocksize;
+
if (btrfs_check_nocow_lock(inode, block_start, &write_bytes, false) > 0) {
- /* For nocow case, no need to reserve data space */
+ /* For nocow case, no need to reserve data space. */
+ ASSERT(write_bytes == blocksize, "write_bytes=%zu blocksize=%u",
+ write_bytes, blocksize);
only_release_metadata = true;
} else {
goto out;
@@ -5001,8 +4994,7 @@ again:
* not reach disk, it still affects our page caches.
*/
zero_start = max_t(u64, folio_pos(folio), start);
- zero_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1,
- end);
+ zero_end = min_t(u64, folio_end(folio) - 1, end);
} else {
zero_start = max_t(u64, block_start, start);
zero_end = min_t(u64, block_end, end);
@@ -5014,11 +5006,12 @@ again:
block_end + 1 - block_start);
btrfs_folio_set_dirty(fs_info, folio, block_start,
block_end + 1 - block_start);
- btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state);
if (only_release_metadata)
btrfs_set_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_NORESERVE, NULL);
+ EXTENT_NORESERVE, &cached_state);
+
+ btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state);
out_unlock:
if (ret) {
@@ -5256,7 +5249,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
ret = btrfs_truncate(BTRFS_I(inode), newsize == oldsize);
if (ret && inode->i_nlink) {
- int err;
+ int ret2;
/*
* Truncate failed, so fix up the in-memory size. We
@@ -5264,9 +5257,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* wait for disk_i_size to be stable and then update the
* in-memory size to match.
*/
- err = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
- if (err)
- return err;
+ ret2 = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
+ if (ret2)
+ return ret2;
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
}
}
@@ -5279,31 +5272,31 @@ static int btrfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
- int err;
+ int ret;
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(idmap, dentry, attr);
- if (err)
- return err;
+ ret = setattr_prepare(idmap, dentry, attr);
+ if (ret)
+ return ret;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- err = btrfs_setsize(inode, attr);
- if (err)
- return err;
+ ret = btrfs_setsize(inode, attr);
+ if (ret)
+ return ret;
}
if (attr->ia_valid) {
setattr_copy(idmap, inode, attr);
inode_inc_iversion(inode);
- err = btrfs_dirty_inode(BTRFS_I(inode));
+ ret = btrfs_dirty_inode(BTRFS_I(inode));
- if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(idmap, dentry, inode->i_mode);
+ if (!ret && attr->ia_valid & ATTR_MODE)
+ ret = posix_acl_chmod(idmap, dentry, inode->i_mode);
}
- return err;
+ return ret;
}
/*
@@ -5437,7 +5430,7 @@ void btrfs_evict_inode(struct inode *inode)
struct btrfs_fs_info *fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *rsv = NULL;
+ struct btrfs_block_rsv rsv;
int ret;
trace_btrfs_inode_evict(inode);
@@ -5485,11 +5478,9 @@ void btrfs_evict_inode(struct inode *inode)
*/
btrfs_kill_delayed_inode_items(BTRFS_I(inode));
- rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
- if (!rsv)
- goto out;
- rsv->size = btrfs_calc_metadata_size(fs_info, 1);
- rsv->failfast = true;
+ btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP);
+ rsv.size = btrfs_calc_metadata_size(fs_info, 1);
+ rsv.failfast = true;
btrfs_i_size_write(BTRFS_I(inode), 0);
@@ -5501,11 +5492,11 @@ void btrfs_evict_inode(struct inode *inode)
.min_type = 0,
};
- trans = evict_refill_and_join(root, rsv);
+ trans = evict_refill_and_join(root, &rsv);
if (IS_ERR(trans))
- goto out;
+ goto out_release;
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
ret = btrfs_truncate_inode_items(trans, root, &control);
trans->block_rsv = &fs_info->trans_block_rsv;
@@ -5517,7 +5508,7 @@ void btrfs_evict_inode(struct inode *inode)
*/
btrfs_btree_balance_dirty_nodelay(fs_info);
if (ret && ret != -ENOSPC && ret != -EAGAIN)
- goto out;
+ goto out_release;
else if (!ret)
break;
}
@@ -5531,16 +5522,17 @@ void btrfs_evict_inode(struct inode *inode)
* If it turns out that we are dropping too many of these, we might want
* to add a mechanism for retrying these after a commit.
*/
- trans = evict_refill_and_join(root, rsv);
+ trans = evict_refill_and_join(root, &rsv);
if (!IS_ERR(trans)) {
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
btrfs_orphan_del(trans, BTRFS_I(inode));
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
}
+out_release:
+ btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL);
out:
- btrfs_free_block_rsv(fs_info, rsv);
/*
* If we didn't successfully delete, the orphan item will still be in
* the tree and we'll retry on the next mount. Again, we might also want
@@ -6173,8 +6165,7 @@ again:
if (ret)
goto nopos;
- ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
- if (ret)
+ if (btrfs_readdir_delayed_dir_index(ctx, &ins_list))
goto nopos;
/*
@@ -6467,6 +6458,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
btrfs_update_inode_mapping_flags(BTRFS_I(inode));
+ btrfs_set_inode_mapping_order(BTRFS_I(inode));
}
ret = btrfs_insert_inode_locked(inode);
@@ -6610,13 +6602,17 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
if (args->orphan) {
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto discard;
+ }
} else {
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
0, BTRFS_I(inode)->dir_index);
- }
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- goto discard;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto discard;
+ }
}
return 0;
@@ -6703,20 +6699,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
fail_dir_item:
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
u64 local_index;
- int err;
- err = btrfs_del_root_ref(trans, key.objectid,
- btrfs_root_id(root), parent_ino,
- &local_index, name);
- if (err)
- btrfs_abort_transaction(trans, err);
+ int ret2;
+
+ ret2 = btrfs_del_root_ref(trans, key.objectid, btrfs_root_id(root),
+ parent_ino, &local_index, name);
+ if (ret2)
+ btrfs_abort_transaction(trans, ret2);
} else if (add_backref) {
- u64 local_index;
- int err;
+ int ret2;
- err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino,
- &local_index);
- if (err)
- btrfs_abort_transaction(trans, err);
+ ret2 = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, NULL);
+ if (ret2)
+ btrfs_abort_transaction(trans, ret2);
}
/* Return the original error code */
@@ -6735,20 +6729,20 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
};
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
- int err;
+ int ret;
- err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
- if (err)
+ ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
+ if (ret)
goto out_inode;
trans = btrfs_start_transaction(root, trans_num_items);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out_new_inode_args;
}
- err = btrfs_create_new_inode(trans, &new_inode_args);
- if (!err)
+ ret = btrfs_create_new_inode(trans, &new_inode_args);
+ if (!ret)
d_instantiate_new(dentry, inode);
btrfs_end_transaction(trans);
@@ -6756,9 +6750,9 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
- if (err)
+ if (ret)
iput(inode);
- return err;
+ return ret;
}
static int btrfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
@@ -6799,7 +6793,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct fscrypt_name fname;
u64 index;
- int err;
+ int ret;
int drop_inode = 0;
/* do not allow sys_link's with other subvols of the same device */
@@ -6809,12 +6803,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
- if (err)
+ ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
+ if (ret)
goto fail;
- err = btrfs_set_inode_index(BTRFS_I(dir), &index);
- if (err)
+ ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
+ if (ret)
goto fail;
/*
@@ -6825,7 +6819,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
*/
trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
trans = NULL;
goto fail;
}
@@ -6838,24 +6832,24 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
- err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
&fname.disk_name, 1, index);
- if (err) {
+ if (ret) {
drop_inode = 1;
} else {
struct dentry *parent = dentry->d_parent;
- err = btrfs_update_inode(trans, BTRFS_I(inode));
- if (err)
+ ret = btrfs_update_inode(trans, BTRFS_I(inode));
+ if (ret)
goto fail;
if (inode->i_nlink == 1) {
/*
* If new hard link count is 1, it's a file created
* with open(2) O_TMPFILE flag.
*/
- err = btrfs_orphan_del(trans, BTRFS_I(inode));
- if (err)
+ ret = btrfs_orphan_del(trans, BTRFS_I(inode));
+ if (ret)
goto fail;
}
d_instantiate(dentry, inode);
@@ -6871,7 +6865,7 @@ fail:
iput(inode);
}
btrfs_btree_balance_dirty(fs_info);
- return err;
+ return ret;
}
static struct dentry *btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
@@ -7364,13 +7358,13 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
static void wait_subpage_spinlock(struct folio *folio)
{
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
if (!btrfs_is_subpage(fs_info, folio))
return;
ASSERT(folio_test_private(folio) && folio_get_private(folio));
- subpage = folio_get_private(folio);
+ bfs = folio_get_private(folio);
/*
* This may look insane as we just acquire the spinlock and release it,
@@ -7383,8 +7377,8 @@ static void wait_subpage_spinlock(struct folio *folio)
* Here we just acquire the spinlock so that all existing callers
* should exit and we're safe to release/invalidate the page.
*/
- spin_lock_irq(&subpage->lock);
- spin_unlock_irq(&subpage->lock);
+ spin_lock_irq(&bfs->lock);
+ spin_unlock_irq(&bfs->lock);
}
static int btrfs_launder_folio(struct folio *folio)
@@ -7607,7 +7601,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
};
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_rsv *rsv;
+ struct btrfs_block_rsv rsv;
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
@@ -7649,11 +7643,9 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
* 2) fs_info->trans_block_rsv - this will have 1 items worth left for
* updating the inode.
*/
- rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
- if (!rsv)
- return -ENOMEM;
- rsv->size = min_size;
- rsv->failfast = true;
+ btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP);
+ rsv.size = min_size;
+ rsv.failfast = true;
/*
* 1 for the truncate slack space
@@ -7666,7 +7658,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
}
/* Migrate the slack space for the truncate to our reserve */
- ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
+ ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, &rsv,
min_size, false);
/*
* We have reserved 2 metadata units when we started the transaction and
@@ -7678,7 +7670,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
goto out;
}
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
while (1) {
struct extent_state *cached_state = NULL;
@@ -7721,9 +7713,9 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
break;
}
- btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
+ btrfs_block_rsv_release(fs_info, &rsv, -1, NULL);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
- rsv, min_size, false);
+ &rsv, min_size, false);
/*
* We have reserved 2 metadata units when we started the
* transaction and min_size matches 1 unit, so this should never
@@ -7732,7 +7724,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
if (WARN_ON(ret))
break;
- trans->block_rsv = rsv;
+ trans->block_rsv = &rsv;
}
/*
@@ -7771,7 +7763,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
btrfs_btree_balance_dirty(fs_info);
}
out:
- btrfs_free_block_rsv(fs_info, rsv);
+ btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL);
/*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
@@ -8026,7 +8018,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
generic_fillattr(idmap, request_mask, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
- stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
+ stat->subvol = btrfs_root_id(BTRFS_I(inode)->root);
stat->result_mask |= STATX_SUBVOL;
spin_lock(&BTRFS_I(inode)->lock);
@@ -8059,6 +8051,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret;
int ret2;
bool need_abort = false;
+ bool logs_pinned = false;
struct fscrypt_name old_fname, new_fname;
struct fscrypt_str *old_name, *new_name;
@@ -8182,6 +8175,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
inode_inc_iversion(new_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID &&
+ new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not for
+ * root entries) pin the log early to prevent any concurrent
+ * task from logging the directory after we removed the old
+ * entries and before we add the new entries, otherwise that
+ * task can sync a log without any entry for the inodes we are
+ * renaming and therefore replaying that log, if a power failure
+ * happens after syncing the log, would result in deleting the
+ * inodes.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8253,30 +8271,23 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode)->dir_index = new_idx;
/*
- * Now pin the logs of the roots. We do it to ensure that no other task
- * can sync the logs while we are in progress with the rename, because
- * that could result in an inconsistency in case any of the inodes that
- * are part of this rename operation were logged before.
+ * Do the log updates for all inodes.
+ *
+ * If either entry is for a root we don't need to update the logs since
+ * we've called btrfs_set_log_full_commit() before.
*/
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(dest);
-
- /* Do the log updates for all inodes. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned) {
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
old_rename_ctx.index, new_dentry->d_parent);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
new_rename_ctx.index, old_dentry->d_parent);
+ }
- /* Now unpin the logs. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+out_fail:
+ if (logs_pinned) {
btrfs_end_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(dest);
-out_fail:
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -8326,6 +8337,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
struct fscrypt_name old_fname, new_fname;
+ bool logs_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -8460,6 +8472,29 @@ static int btrfs_rename(struct mnt_idmap *idmap,
inode_inc_iversion(old_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not a
+ * root entry) pin the log to prevent any concurrent task from
+ * logging the directory after we removed the old entry and
+ * before we add the new entry, otherwise that task can sync
+ * a log without any entry for the inode we are renaming and
+ * therefore replaying that log, if a power failure happens
+ * after syncing the log, would result in deleting the inode.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8524,7 +8559,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
rename_ctx.index, new_dentry->d_parent);
@@ -8540,6 +8575,10 @@ static int btrfs_rename(struct mnt_idmap *idmap,
}
}
out_fail:
+ if (logs_pinned) {
+ btrfs_end_log_trans(root);
+ btrfs_end_log_trans(dest);
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -8776,7 +8815,7 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
.dentry = dentry,
};
unsigned int trans_num_items;
- int err;
+ int ret;
int name_len;
int datasize;
unsigned long ptr;
@@ -8803,26 +8842,26 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode_set_bytes(inode, name_len);
new_inode_args.inode = inode;
- err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
- if (err)
+ ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items);
+ if (ret)
goto out_inode;
/* 1 additional item for the inline extent */
trans_num_items++;
trans = btrfs_start_transaction(root, trans_num_items);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out_new_inode_args;
}
- err = btrfs_create_new_inode(trans, &new_inode_args);
- if (err)
+ ret = btrfs_create_new_inode(trans, &new_inode_args);
+ if (ret)
goto out;
path = btrfs_alloc_path();
if (!path) {
- err = -ENOMEM;
- btrfs_abort_transaction(trans, err);
+ ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
discard_new_inode(inode);
inode = NULL;
goto out;
@@ -8831,10 +8870,9 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
datasize = btrfs_file_extent_calc_inline_size(name_len);
- err = btrfs_insert_empty_item(trans, root, path, &key,
- datasize);
- if (err) {
- btrfs_abort_transaction(trans, err);
+ ret = btrfs_insert_empty_item(trans, root, path, &key, datasize);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
discard_new_inode(inode);
inode = NULL;
@@ -8856,16 +8894,16 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
btrfs_free_path(path);
d_instantiate_new(dentry, inode);
- err = 0;
+ ret = 0;
out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
- if (err)
+ if (ret)
iput(inode);
- return err;
+ return ret;
}
static struct btrfs_trans_handle *insert_prealloc_file_extent(
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 913acef3f0a9..bf561be18885 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out;
}
+ btrfs_record_new_subvolume(trans, BTRFS_I(dir));
+
ret = btrfs_create_new_inode(trans, &new_inode_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- btrfs_record_new_subvolume(trans, BTRFS_I(dir));
-
d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL;
@@ -841,7 +841,7 @@ free_pending:
static int btrfs_may_delete(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *victim, int isdir)
{
- int error;
+ int ret;
if (d_really_is_negative(victim))
return -ENOENT;
@@ -851,9 +851,9 @@ static int btrfs_may_delete(struct mnt_idmap *idmap,
return -EINVAL;
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
- if (error)
- return error;
+ ret = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
+ if (ret)
+ return ret;
if (IS_APPEND(dir))
return -EPERM;
if (check_sticky(idmap, dir, d_inode(victim)) ||
@@ -892,39 +892,37 @@ static inline int btrfs_may_create(struct mnt_idmap *idmap,
* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
* inside this filesystem so it's quite a bit simpler.
*/
-static noinline int btrfs_mksubvol(const struct path *parent,
+static noinline int btrfs_mksubvol(struct dentry *parent,
struct mnt_idmap *idmap,
- const char *name, int namelen,
- struct btrfs_root *snap_src,
+ struct qstr *qname, struct btrfs_root *snap_src,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- struct inode *dir = d_inode(parent->dentry);
+ struct inode *dir = d_inode(parent);
struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
struct dentry *dentry;
- struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
- int error;
+ struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
+ int ret;
- error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (error == -EINTR)
- return error;
+ ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
+ if (ret == -EINTR)
+ return ret;
- dentry = lookup_one(idmap, &QSTR_LEN(name, namelen), parent->dentry);
- error = PTR_ERR(dentry);
+ dentry = lookup_one(idmap, qname, parent);
+ ret = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
- error = btrfs_may_create(idmap, dir, dentry);
- if (error)
+ ret = btrfs_may_create(idmap, dir, dentry);
+ if (ret)
goto out_dput;
/*
* even if this name doesn't exist, we may get hash collisions.
* check for them now when we can safely fail
*/
- error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
- dir->i_ino, &name_str);
- if (error)
+ ret = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, dir->i_ino, &name_str);
+ if (ret)
goto out_dput;
down_read(&fs_info->subvol_sem);
@@ -933,11 +931,11 @@ static noinline int btrfs_mksubvol(const struct path *parent,
goto out_up_read;
if (snap_src)
- error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
+ ret = create_snapshot(snap_src, dir, dentry, readonly, inherit);
else
- error = create_subvol(idmap, dir, dentry, inherit);
+ ret = create_subvol(idmap, dir, dentry, inherit);
- if (!error)
+ if (!ret)
fsnotify_mkdir(dir, dentry);
out_up_read:
up_read(&fs_info->subvol_sem);
@@ -945,12 +943,12 @@ out_dput:
dput(dentry);
out_unlock:
btrfs_inode_unlock(BTRFS_I(dir), 0);
- return error;
+ return ret;
}
-static noinline int btrfs_mksnapshot(const struct path *parent,
+static noinline int btrfs_mksnapshot(struct dentry *parent,
struct mnt_idmap *idmap,
- const char *name, int namelen,
+ struct qstr *qname,
struct btrfs_root *root,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
@@ -977,8 +975,8 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
- ret = btrfs_mksubvol(parent, idmap, name, namelen,
- root, readonly, inherit);
+ ret = btrfs_mksubvol(parent, idmap, qname, root, readonly, inherit);
+
atomic_dec(&root->snapshot_force_cow);
out:
btrfs_drew_read_unlock(&root->snapshot_lock);
@@ -1169,7 +1167,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
} /* equal, nothing need to do */
if (ret == 0 && new_size != old_size)
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"resize device %s (devid %llu) from %llu to %llu",
btrfs_dev_name(device), device->devid,
old_size, new_size);
@@ -1184,12 +1182,12 @@ out_drop:
static noinline int __btrfs_ioctl_snap_create(struct file *file,
struct mnt_idmap *idmap,
- const char *name, unsigned long fd, int subvol,
+ const char *name, unsigned long fd, bool subvol,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- int namelen;
int ret = 0;
+ struct qstr qname = QSTR_INIT(name, strlen(name));
if (!S_ISDIR(file_inode(file)->i_mode))
return -ENOTDIR;
@@ -1198,21 +1196,20 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
if (ret)
goto out;
- namelen = strlen(name);
if (strchr(name, '/')) {
ret = -EINVAL;
goto out_drop_write;
}
- if (name[0] == '.' &&
- (namelen == 1 || (name[1] == '.' && namelen == 2))) {
+ if (qname.name[0] == '.' &&
+ (qname.len == 1 || (qname.name[1] == '.' && qname.len == 2))) {
ret = -EEXIST;
goto out_drop_write;
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, idmap, name,
- namelen, NULL, readonly, inherit);
+ ret = btrfs_mksubvol(file_dentry(file), idmap, &qname, NULL,
+ readonly, inherit);
} else {
CLASS(fd, src)(fd);
struct inode *src_inode;
@@ -1242,8 +1239,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
*/
ret = -EINVAL;
} else {
- ret = btrfs_mksnapshot(&file->f_path, idmap,
- name, namelen,
+ ret = btrfs_mksnapshot(file_dentry(file), idmap, &qname,
BTRFS_I(src_inode)->root,
readonly, inherit);
}
@@ -1280,7 +1276,7 @@ out:
}
static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
- void __user *arg, int subvol)
+ void __user *arg, bool subvol)
{
struct btrfs_ioctl_vol_args_v2 *vol_args;
int ret;
@@ -2558,8 +2554,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EOPNOTSUPP;
goto out;
}
- /* compression requires us to start the IO */
- if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) &&
+ (range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ /* Compression or no-compression require to start the IO. */
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) ||
+ (range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) {
range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
range.extent_thresh = (u32)-1;
}
@@ -2700,7 +2702,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
err_drop:
mnt_drop_write_file(file);
if (bdev_file)
- fput(bdev_file);
+ bdev_fput(bdev_file);
out:
btrfs_put_dev_args_from_path(&args);
kfree(vol_args);
@@ -2751,7 +2753,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
mnt_drop_write_file(file);
if (bdev_file)
- fput(bdev_file);
+ bdev_fput(bdev_file);
out:
btrfs_put_dev_args_from_path(&args);
out_free:
@@ -2890,7 +2892,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
ret = PTR_ERR(new_root);
goto out;
}
- if (!is_fstree(btrfs_root_id(new_root))) {
+ if (!btrfs_is_fstree(btrfs_root_id(new_root))) {
ret = -ENOENT;
goto out_free;
}
@@ -3139,7 +3141,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
- btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
+ btrfs_err(fs_info, "scrub: extent tree v2 not yet supported");
return -EINVAL;
}
@@ -3357,7 +3359,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
int size;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
- struct btrfs_path *path = NULL;
bool ignore_offset;
if (!capable(CAP_SYS_ADMIN))
@@ -3391,14 +3392,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
goto out_loi;
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
- ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
- inodes, ignore_offset);
- btrfs_free_path(path);
+ ret = iterate_inodes_from_logical(loi->logical, fs_info, inodes, ignore_offset);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -3715,22 +3709,6 @@ drop_write:
return ret;
}
-/*
- * Quick check for ioctl handlers if quotas are enabled. Proper locking must be
- * done before any operations.
- */
-static bool qgroup_enabled(struct btrfs_fs_info *fs_info)
-{
- bool ret = true;
-
- mutex_lock(&fs_info->qgroup_ioctl_lock);
- if (!fs_info->quota_root)
- ret = false;
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
-
- return ret;
-}
-
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
@@ -3745,7 +3723,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!qgroup_enabled(root->fs_info))
+ if (!btrfs_qgroup_enabled(fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file);
@@ -3815,7 +3793,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!qgroup_enabled(root->fs_info))
+ if (!btrfs_qgroup_enabled(root->fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file);
@@ -3833,7 +3811,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
- if (sa->create && is_fstree(sa->qgroupid)) {
+ if (sa->create && btrfs_is_fstree(sa->qgroupid)) {
ret = -EINVAL;
goto out;
}
@@ -3874,7 +3852,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!qgroup_enabled(root->fs_info))
+ if (!btrfs_qgroup_enabled(root->fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file);
@@ -3922,7 +3900,7 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!qgroup_enabled(fs_info))
+ if (!btrfs_qgroup_enabled(fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file);
@@ -4200,7 +4178,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
}
spin_lock(&fs_info->super_lock);
- strcpy(super_block->label, label);
+ strscpy(super_block->label, label);
spin_unlock(&fs_info->super_lock);
ret = btrfs_commit_transaction(trans);
@@ -4659,7 +4637,7 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
struct btrfs_uring_priv *priv = bc->priv;
struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
struct extent_io_tree *io_tree = &inode->io_tree;
- unsigned long index;
+ pgoff_t index;
u64 cur;
size_t page_offset;
ssize_t ret;
@@ -4829,7 +4807,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags);
#else
- return -ENOTTY;
+ ret = -ENOTTY;
+ goto out_acct;
#endif
} else {
copy_end = copy_end_kernel;
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index 6abf81bb00c2..022ebc89af85 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -37,106 +37,46 @@ void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
btrfs_no_printk(fs_info, fmt, ##args)
#endif
-#define btrfs_emerg(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
-#define btrfs_alert(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
-#define btrfs_crit(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
-#define btrfs_err(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_ERR fmt, ##args)
-#define btrfs_warn(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
-#define btrfs_notice(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
-#define btrfs_info(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_INFO fmt, ##args)
-
/*
- * Wrappers that use printk_in_rcu
+ * Print a message with filesystem info, enclosed in RCU protection.
*/
-#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args)
-#define btrfs_alert_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args)
-#define btrfs_crit_in_rcu(fs_info, fmt, args...) \
+#define btrfs_crit(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args)
-#define btrfs_err_in_rcu(fs_info, fmt, args...) \
+#define btrfs_err(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args)
-#define btrfs_warn_in_rcu(fs_info, fmt, args...) \
+#define btrfs_warn(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args)
-#define btrfs_notice_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
-#define btrfs_info_in_rcu(fs_info, fmt, args...) \
+#define btrfs_info(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args)
/*
- * Wrappers that use a ratelimited printk_in_rcu
- */
-#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args)
-#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args)
-#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args)
-#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args)
-#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args)
-#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
-#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
-
-/*
* Wrappers that use a ratelimited printk
*/
-#define btrfs_emerg_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args)
-#define btrfs_alert_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args)
#define btrfs_crit_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args)
+ btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args)
#define btrfs_err_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args)
+ btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args)
#define btrfs_warn_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args)
-#define btrfs_notice_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args)
+ btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args)
#define btrfs_info_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args)
+ btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
#if defined(CONFIG_DYNAMIC_DEBUG)
#define btrfs_debug(fs_info, fmt, args...) \
- _dynamic_func_call_no_desc(fmt, btrfs_printk, \
- fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
_dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \
fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
- _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \
- fs_info, KERN_DEBUG fmt, ##args)
#define btrfs_debug_rl(fs_info, fmt, args...) \
- _dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \
+ _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \
fs_info, KERN_DEBUG fmt, ##args)
#elif defined(DEBUG)
#define btrfs_debug(fs_info, fmt, args...) \
- btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
#define btrfs_debug_rl(fs_info, fmt, args...) \
- btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args)
+ btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
#else
-#define btrfs_debug(fs_info, fmt, args...) \
- btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
- btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
- btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
-#define btrfs_debug_rl(fs_info, fmt, args...) \
- btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
+/* When printk() is no_printk(), expand to no-op. */
+#define btrfs_debug(fs_info, fmt, args...) do { (void)(fs_info); } while(0)
+#define btrfs_debug_rl(fs_info, fmt, args...) do { (void)(fs_info); } while(0)
#endif
#define btrfs_printk_in_rcu(fs_info, fmt, args...) \
@@ -146,26 +86,15 @@ do { \
rcu_read_unlock(); \
} while (0)
-#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \
-do { \
- rcu_read_lock(); \
- btrfs_no_printk(fs_info, fmt, ##args); \
- rcu_read_unlock(); \
-} while (0)
-
-#define btrfs_printk_ratelimited(fs_info, fmt, args...) \
+#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \
do { \
static DEFINE_RATELIMIT_STATE(_rs, \
DEFAULT_RATELIMIT_INTERVAL, \
DEFAULT_RATELIMIT_BURST); \
+ \
+ rcu_read_lock(); \
if (__ratelimit(&_rs)) \
btrfs_printk(fs_info, fmt, ##args); \
-} while (0)
-
-#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \
-do { \
- rcu_read_lock(); \
- btrfs_printk_ratelimited(fs_info, fmt, ##args); \
rcu_read_unlock(); \
} while (0)
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 0d599fd847c9..ff5eac84d819 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -7,6 +7,8 @@
#include <linux/bitmap.h>
#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <linux/math64.h>
#include <linux/rbtree.h>
@@ -119,28 +121,23 @@ static inline struct rb_node *rb_simple_search_first(const struct rb_root *root,
return ret;
}
-static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
- struct rb_node *node)
+static int rb_simple_node_bytenr_cmp(struct rb_node *new, const struct rb_node *existing)
{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct rb_simple_node *entry;
+ struct rb_simple_node *new_entry = rb_entry(new, struct rb_simple_node, rb_node);
+ struct rb_simple_node *existing_entry = rb_entry(existing, struct rb_simple_node, rb_node);
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct rb_simple_node, rb_node);
+ if (new_entry->bytenr < existing_entry->bytenr)
+ return -1;
+ else if (new_entry->bytenr > existing_entry->bytenr)
+ return 1;
- if (bytenr < entry->bytenr)
- p = &(*p)->rb_left;
- else if (bytenr > entry->bytenr)
- p = &(*p)->rb_right;
- else
- return parent;
- }
+ return 0;
+}
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
+static inline struct rb_node *rb_simple_insert(struct rb_root *root,
+ struct rb_simple_node *simple_node)
+{
+ return rb_find_add(&simple_node->rb_node, root, rb_simple_node_bytenr_cmp);
}
static inline bool bitmap_test_range_all_set(const unsigned long *addr,
@@ -163,4 +160,9 @@ static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
return (found_set == start + nbits);
}
+static inline u64 folio_end(struct folio *folio)
+{
+ return folio_pos(folio) + folio_size(folio);
+}
+
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9212ce110cde..2829f20d7bb5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -359,7 +359,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
if (folio) {
ASSERT(folio->mapping);
ASSERT(folio_pos(folio) <= file_offset);
- ASSERT(file_offset + len <= folio_pos(folio) + folio_size(folio));
+ ASSERT(file_offset + len <= folio_end(folio));
/*
* Ordered flag indicates whether we still have
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index fc821aa446f0..74e38da9bd39 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -190,7 +190,7 @@ static void print_uuid_item(const struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
if (!IS_ALIGNED(item_size, sizeof(u64))) {
- pr_warn("BTRFS: uuid item with illegal size %lu!\n",
+ btrfs_warn(l->fs_info, "uuid item with illegal size %lu",
(unsigned long)item_size);
return;
}
@@ -223,7 +223,7 @@ static void print_eb_refs_lock(const struct extent_buffer *eb)
{
#ifdef CONFIG_BTRFS_DEBUG
btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
- atomic_read(&eb->refs), eb->lock_owner, current->pid);
+ refcount_read(&eb->refs), eb->lock_owner, current->pid);
#endif
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b3176edbde82..1a5972178b3a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -160,23 +160,34 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
int init_flags);
static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
+static int btrfs_qgroup_qgroupid_key_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *qgroupid = key;
+ const struct btrfs_qgroup *qgroup = rb_entry(node, struct btrfs_qgroup, node);
+
+ if (qgroup->qgroupid < *qgroupid)
+ return -1;
+ else if (qgroup->qgroupid > *qgroupid)
+ return 1;
+
+ return 0;
+}
+
/* must be called with qgroup_ioctl_lock held */
static struct btrfs_qgroup *find_qgroup_rb(const struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
- struct rb_node *n = fs_info->qgroup_tree.rb_node;
- struct btrfs_qgroup *qgroup;
+ struct rb_node *node;
- while (n) {
- qgroup = rb_entry(n, struct btrfs_qgroup, node);
- if (qgroup->qgroupid < qgroupid)
- n = n->rb_left;
- else if (qgroup->qgroupid > qgroupid)
- n = n->rb_right;
- else
- return qgroup;
- }
- return NULL;
+ node = rb_find(&qgroupid, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_key_cmp);
+ return rb_entry_safe(node, struct btrfs_qgroup, node);
+}
+
+static int btrfs_qgroup_qgroupid_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct btrfs_qgroup *new_qgroup = rb_entry(new, struct btrfs_qgroup, node);
+
+ return btrfs_qgroup_qgroupid_key_cmp(&new_qgroup->qgroupid, existing);
}
/*
@@ -191,39 +202,25 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *prealloc,
u64 qgroupid)
{
- struct rb_node **p = &fs_info->qgroup_tree.rb_node;
- struct rb_node *parent = NULL;
- struct btrfs_qgroup *qgroup;
+ struct rb_node *node;
/* Caller must have pre-allocated @prealloc. */
ASSERT(prealloc);
- while (*p) {
- parent = *p;
- qgroup = rb_entry(parent, struct btrfs_qgroup, node);
-
- if (qgroup->qgroupid < qgroupid) {
- p = &(*p)->rb_left;
- } else if (qgroup->qgroupid > qgroupid) {
- p = &(*p)->rb_right;
- } else {
- kfree(prealloc);
- return qgroup;
- }
+ prealloc->qgroupid = qgroupid;
+ node = rb_find_add(&prealloc->node, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_cmp);
+ if (node) {
+ kfree(prealloc);
+ return rb_entry(node, struct btrfs_qgroup, node);
}
- qgroup = prealloc;
- qgroup->qgroupid = qgroupid;
- INIT_LIST_HEAD(&qgroup->groups);
- INIT_LIST_HEAD(&qgroup->members);
- INIT_LIST_HEAD(&qgroup->dirty);
- INIT_LIST_HEAD(&qgroup->iterator);
- INIT_LIST_HEAD(&qgroup->nested_iterator);
+ INIT_LIST_HEAD(&prealloc->groups);
+ INIT_LIST_HEAD(&prealloc->members);
+ INIT_LIST_HEAD(&prealloc->dirty);
+ INIT_LIST_HEAD(&prealloc->iterator);
+ INIT_LIST_HEAD(&prealloc->nested_iterator);
- rb_link_node(&qgroup->node, parent, p);
- rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
-
- return qgroup;
+ return prealloc;
}
static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
@@ -349,13 +346,27 @@ int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid
}
#endif
-static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info)
+__printf(2, 3)
+static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
+ const u64 old_flags = fs_info->qgroup_flags;
+
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
return;
fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
+ if (!(old_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_warn_rl(fs_info, "qgroup marked inconsistent, %pV", &vaf);
+ va_end(args);
+ }
}
static void qgroup_read_enable_gen(struct btrfs_fs_info *fs_info,
@@ -386,12 +397,6 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
if (!fs_info->quota_root)
return 0;
- fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
- if (!fs_info->qgroup_ulist) {
- ret = -ENOMEM;
- goto out;
- }
-
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
@@ -434,13 +439,10 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
goto out;
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr);
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) {
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE)
qgroup_read_enable_gen(fs_info, l, slot, ptr);
- } else if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_err(fs_info,
- "qgroup generation mismatch, marked as inconsistent");
- }
+ else if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation)
+ qgroup_mark_inconsistent(fs_info, "qgroup generation mismatch");
rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
goto next1;
}
@@ -451,10 +453,8 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = find_qgroup_rb(fs_info, found_key.offset);
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
- (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
- btrfs_err(fs_info, "inconsistent qgroup config");
- qgroup_mark_inconsistent(fs_info);
- }
+ (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY))
+ qgroup_mark_inconsistent(fs_info, "inconsistent qgroup config");
if (!qgroup) {
struct btrfs_qgroup *prealloc;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -476,7 +476,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
* during mount before we start doing things like creating
* subvolumes.
*/
- if (is_fstree(qgroup->qgroupid) &&
+ if (btrfs_is_fstree(qgroup->qgroupid) &&
qgroup->qgroupid > tree_root->free_objectid)
/*
* Don't need to check against BTRFS_LAST_FREE_OBJECTID,
@@ -581,8 +581,6 @@ out:
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
} else {
- ulist_free(fs_info->qgroup_ulist);
- fs_info->qgroup_ulist = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
btrfs_sysfs_del_qgroups(fs_info);
}
@@ -630,29 +628,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
/*
* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
- * first two are in single-threaded paths.And for the third one, we have set
- * quota_root to be null with qgroup_lock held before, so it is safe to clean
- * up the in-memory structures without qgroup_lock held.
+ * first two are in single-threaded paths.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
+ /*
+ * btrfs_quota_disable() can be called concurrently with
+ * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the
+ * lock.
+ */
+ spin_lock(&fs_info->qgroup_lock);
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
+ spin_unlock(&fs_info->qgroup_lock);
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
kfree(qgroup);
+ spin_lock(&fs_info->qgroup_lock);
}
- /*
- * We call btrfs_free_qgroup_config() when unmounting
- * filesystem and disabling quota, so we set qgroup_ulist
- * to be null here to avoid double free.
- */
- ulist_free(fs_info->qgroup_ulist);
- fs_info->qgroup_ulist = NULL;
+ spin_unlock(&fs_info->qgroup_lock);
+
btrfs_sysfs_del_qgroups(fs_info);
}
@@ -998,7 +997,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup = NULL;
struct btrfs_qgroup *prealloc = NULL;
struct btrfs_trans_handle *trans = NULL;
- struct ulist *ulist = NULL;
const bool simple = (quota_ctl_args->cmd == BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA);
int ret = 0;
int slot;
@@ -1021,12 +1019,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
if (fs_info->quota_root)
goto out;
- ulist = ulist_alloc(GFP_KERNEL);
- if (!ulist) {
- ret = -ENOMEM;
- goto out;
- }
-
ret = btrfs_sysfs_add_qgroups(fs_info);
if (ret < 0)
goto out;
@@ -1066,9 +1058,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
if (fs_info->quota_root)
goto out;
- fs_info->qgroup_ulist = ulist;
- ulist = NULL;
-
/*
* initially create the quota tree
*/
@@ -1155,11 +1144,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset);
prealloc = NULL;
- if (IS_ERR(qgroup)) {
- ret = PTR_ERR(qgroup);
- btrfs_abort_transaction(trans, ret);
- goto out_free_path;
- }
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -1272,17 +1256,13 @@ out_free_root:
if (ret)
btrfs_put_root(quota_root);
out:
- if (ret) {
- ulist_free(fs_info->qgroup_ulist);
- fs_info->qgroup_ulist = NULL;
+ if (ret)
btrfs_sysfs_del_qgroups(fs_info);
- }
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (ret && trans)
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_end_transaction(trans);
- ulist_free(ulist);
kfree(prealloc);
return ret;
}
@@ -1354,11 +1334,14 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
/*
* We have nothing held here and no trans handle, just return the error
- * if there is one.
+ * if there is one and set back the quota enabled bit since we didn't
+ * actually disable quotas.
*/
ret = flush_reservations(fs_info);
- if (ret)
+ if (ret) {
+ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
return ret;
+ }
/*
* 1 For the root item
@@ -1679,9 +1662,6 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
struct btrfs_qgroup *prealloc = NULL;
int ret = 0;
- if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED)
- return 0;
-
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root) {
ret = -ENOTCONN;
@@ -1844,13 +1824,12 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
if (qgroup->rfer || qgroup->excl ||
qgroup->rfer_cmpr || qgroup->excl_cmpr) {
DEBUG_WARN();
- btrfs_warn_rl(fs_info,
-"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
- btrfs_qgroup_level(qgroup->qgroupid),
- btrfs_qgroup_subvolid(qgroup->qgroupid),
- qgroup->rfer, qgroup->rfer_cmpr,
- qgroup->excl, qgroup->excl_cmpr);
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info,
+ "to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
+ btrfs_qgroup_level(qgroup->qgroupid),
+ btrfs_qgroup_subvolid(qgroup->qgroupid),
+ qgroup->rfer, qgroup->rfer_cmpr,
+ qgroup->excl, qgroup->excl_cmpr);
}
}
del_qgroup_rb(fs_info, qgroupid);
@@ -1873,7 +1852,8 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su
struct btrfs_trans_handle *trans;
int ret;
- if (!is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || !fs_info->quota_root)
+ if (!btrfs_is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) ||
+ !fs_info->quota_root)
return 0;
/*
@@ -1968,11 +1948,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
spin_unlock(&fs_info->qgroup_lock);
ret = update_qgroup_limit_item(trans, qgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info, "unable to update quota limit for %llu",
- qgroupid);
- }
+ if (ret)
+ qgroup_mark_inconsistent(fs_info, "qgroup item update error %d", ret);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
@@ -2027,7 +2004,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC);
xa_unlock(&delayed_refs->dirty_extents);
if (xa_is_err(ret)) {
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info, "xarray insert error: %d", xa_err(ret));
return xa_err(ret);
}
@@ -2094,10 +2071,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
ret = btrfs_find_all_roots(&ctx, true);
if (ret < 0) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_warn(fs_info,
-"error accounting new delayed refs extent (err code: %d), quota inconsistent",
- ret);
+ qgroup_mark_inconsistent(fs_info,
+ "error accounting new delayed refs extent: %d", ret);
return 0;
}
@@ -2341,7 +2316,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
/* For src_path */
- atomic_inc(&src_eb->refs);
+ refcount_inc(&src_eb->refs);
src_path->nodes[root_level] = src_eb;
src_path->slots[root_level] = dst_path->slots[root_level];
src_path->locks[root_level] = 0;
@@ -2574,7 +2549,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
goto out;
}
/* For dst_path */
- atomic_inc(&dst_eb->refs);
+ refcount_inc(&dst_eb->refs);
dst_path->nodes[level] = dst_eb;
dst_path->slots[level] = 0;
dst_path->locks[level] = 0;
@@ -2589,7 +2564,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(dst_path);
if (ret < 0)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret);
return ret;
}
@@ -2633,7 +2608,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
* mark qgroup inconsistent.
*/
if (root_level >= drop_subptree_thres) {
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info, "subtree level reached threshold");
return 0;
}
@@ -2666,7 +2641,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
* walk back up the tree (adjusting slot pointers as we go)
* and restart the search process.
*/
- atomic_inc(&root_eb->refs); /* For path */
+ refcount_inc(&root_eb->refs); /* For path */
path->nodes[root_level] = root_eb;
path->slots[root_level] = 0;
path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
@@ -2932,7 +2907,7 @@ static int maybe_fs_roots(struct ulist *roots)
* trees.
* If it contains a non-fs tree, it won't be shared with fs/subvol trees.
*/
- return is_fstree(unode->val);
+ return btrfs_is_fstree(unode->val);
}
int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
@@ -3133,10 +3108,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
spin_unlock(&fs_info->qgroup_lock);
ret = update_qgroup_info_item(trans, qgroup);
if (ret)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info,
+ "qgroup info item update error %d", ret);
ret = update_qgroup_limit_item(trans, qgroup);
if (ret)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info,
+ "qgroup limit item update error %d", ret);
spin_lock(&fs_info->qgroup_lock);
}
if (btrfs_qgroup_enabled(fs_info))
@@ -3147,7 +3124,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
ret = update_qgroup_status_item(trans);
if (ret)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info,
+ "qgroup status item update error %d", ret);
return ret;
}
@@ -3329,6 +3307,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
u32 level_size = 0;
u64 nums;
+ if (!btrfs_qgroup_enabled(fs_info))
+ return 0;
+
prealloc = kzalloc(sizeof(*prealloc), GFP_NOFS);
if (!prealloc)
return -ENOMEM;
@@ -3352,8 +3333,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
if (!committing)
mutex_lock(&fs_info->qgroup_ioctl_lock);
- if (!btrfs_qgroup_enabled(fs_info))
- goto out;
quota_root = fs_info->quota_root;
if (!quota_root) {
@@ -3554,7 +3533,7 @@ out:
if (!committing)
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (need_rescan)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info, "qgroup inherit needs a rescan");
if (qlist_prealloc) {
for (int i = 0; i < inherit->num_qgroups; i++)
kfree(qlist_prealloc[i]);
@@ -3588,7 +3567,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
int ret = 0;
LIST_HEAD(qgroup_list);
- if (!is_fstree(ref_root))
+ if (!btrfs_is_fstree(ref_root))
return 0;
if (num_bytes == 0)
@@ -3648,7 +3627,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup;
LIST_HEAD(qgroup_list);
- if (!is_fstree(ref_root))
+ if (!btrfs_is_fstree(ref_root))
return;
if (num_bytes == 0)
@@ -4036,12 +4015,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
qgroup_rescan_zero_tracking(fs_info);
mutex_lock(&fs_info->qgroup_rescan_lock);
- fs_info->qgroup_rescan_running = true;
- btrfs_queue_work(fs_info->qgroup_rescan_workers,
- &fs_info->qgroup_rescan_work);
+ /*
+ * The rescan worker is only for full accounting qgroups, check if it's
+ * enabled as it is pointless to queue it otherwise. A concurrent quota
+ * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED.
+ */
+ if (btrfs_qgroup_full_accounting(fs_info)) {
+ fs_info->qgroup_rescan_running = true;
+ btrfs_queue_work(fs_info->qgroup_rescan_workers,
+ &fs_info->qgroup_rescan_work);
+ } else {
+ ret = -ENOTCONN;
+ }
mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ return ret;
}
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
@@ -4128,8 +4116,8 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
* Now the entry is in [start, start + len), revert the
* EXTENT_QGROUP_RESERVED bit.
*/
- clear_ret = btrfs_clear_extent_bits(&inode->io_tree, entry_start,
- entry_end, EXTENT_QGROUP_RESERVED);
+ clear_ret = btrfs_clear_extent_bit(&inode->io_tree, entry_start, entry_end,
+ EXTENT_QGROUP_RESERVED, NULL);
if (!ret && clear_ret < 0)
ret = clear_ret;
@@ -4216,7 +4204,7 @@ static int qgroup_reserve_data(struct btrfs_inode *inode,
int ret;
if (btrfs_qgroup_mode(root->fs_info) == BTRFS_QGROUP_MODE_DISABLED ||
- !is_fstree(btrfs_root_id(root)) || len == 0)
+ !btrfs_is_fstree(btrfs_root_id(root)) || len == 0)
return 0;
/* @reserved parameter is mandatory for qgroup */
@@ -4469,7 +4457,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
int ret;
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED ||
- !is_fstree(btrfs_root_id(root)) || num_bytes == 0)
+ !btrfs_is_fstree(btrfs_root_id(root)) || num_bytes == 0)
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
@@ -4514,7 +4502,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED ||
- !is_fstree(btrfs_root_id(root)))
+ !btrfs_is_fstree(btrfs_root_id(root)))
return;
/* TODO: Update trace point to handle such free */
@@ -4530,7 +4518,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
struct btrfs_fs_info *fs_info = root->fs_info;
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED ||
- !is_fstree(btrfs_root_id(root)))
+ !btrfs_is_fstree(btrfs_root_id(root)))
return;
/*
@@ -4589,7 +4577,7 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
struct btrfs_fs_info *fs_info = root->fs_info;
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED ||
- !is_fstree(btrfs_root_id(root)))
+ !btrfs_is_fstree(btrfs_root_id(root)))
return;
/* Same as btrfs_qgroup_free_meta_prealloc() */
num_bytes = sub_root_meta_rsv(root, num_bytes,
@@ -4673,6 +4661,28 @@ out:
spin_unlock(&swapped_blocks->lock);
}
+static int qgroup_swapped_block_bytenr_key_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *bytenr = key;
+ const struct btrfs_qgroup_swapped_block *block = rb_entry(node,
+ struct btrfs_qgroup_swapped_block, node);
+
+ if (block->subvol_bytenr < *bytenr)
+ return -1;
+ else if (block->subvol_bytenr > *bytenr)
+ return 1;
+
+ return 0;
+}
+
+static int qgroup_swapped_block_bytenr_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct btrfs_qgroup_swapped_block *new_block = rb_entry(new,
+ struct btrfs_qgroup_swapped_block, node);
+
+ return qgroup_swapped_block_bytenr_key_cmp(&new_block->subvol_bytenr, existing);
+}
+
/*
* Add subtree roots record into @subvol_root.
*
@@ -4692,8 +4702,7 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
struct btrfs_fs_info *fs_info = subvol_root->fs_info;
struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
struct btrfs_qgroup_swapped_block *block;
- struct rb_node **cur;
- struct rb_node *parent = NULL;
+ struct rb_node *node;
int level = btrfs_header_level(subvol_parent) - 1;
int ret = 0;
@@ -4742,46 +4751,32 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
/* Insert @block into @blocks */
spin_lock(&blocks->lock);
- cur = &blocks->blocks[level].rb_node;
- while (*cur) {
+ node = rb_find_add(&block->node, &blocks->blocks[level], qgroup_swapped_block_bytenr_cmp);
+ if (node) {
struct btrfs_qgroup_swapped_block *entry;
- parent = *cur;
- entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
- node);
+ entry = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
- if (entry->subvol_bytenr < block->subvol_bytenr) {
- cur = &(*cur)->rb_left;
- } else if (entry->subvol_bytenr > block->subvol_bytenr) {
- cur = &(*cur)->rb_right;
- } else {
- if (entry->subvol_generation !=
- block->subvol_generation ||
- entry->reloc_bytenr != block->reloc_bytenr ||
- entry->reloc_generation !=
- block->reloc_generation) {
- /*
- * Duplicated but mismatch entry found.
- * Shouldn't happen.
- *
- * Marking qgroup inconsistent should be enough
- * for end users.
- */
- DEBUG_WARN("duplicated but mismatched entry found");
- ret = -EEXIST;
- }
- kfree(block);
- goto out_unlock;
+ if (entry->subvol_generation != block->subvol_generation ||
+ entry->reloc_bytenr != block->reloc_bytenr ||
+ entry->reloc_generation != block->reloc_generation) {
+ /*
+ * Duplicated but mismatch entry found. Shouldn't happen.
+ * Marking qgroup inconsistent should be enough for end
+ * users.
+ */
+ DEBUG_WARN("duplicated but mismatched entry found");
+ ret = -EEXIST;
}
+ kfree(block);
+ goto out_unlock;
}
- rb_link_node(&block->node, parent, cur);
- rb_insert_color(&block->node, &blocks->blocks[level]);
blocks->swapped = true;
out_unlock:
spin_unlock(&blocks->lock);
out:
if (ret < 0)
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret);
return ret;
}
@@ -4801,7 +4796,6 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_swapped_block *block;
struct extent_buffer *reloc_eb = NULL;
struct rb_node *node;
- bool found = false;
bool swapped = false;
int level = btrfs_header_level(subvol_eb);
int ret = 0;
@@ -4809,7 +4803,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
- if (!is_fstree(btrfs_root_id(root)) || !root->reloc_root)
+ if (!btrfs_is_fstree(btrfs_root_id(root)) || !root->reloc_root)
return 0;
spin_lock(&blocks->lock);
@@ -4817,23 +4811,14 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
spin_unlock(&blocks->lock);
return 0;
}
- node = blocks->blocks[level].rb_node;
-
- while (node) {
- block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
- if (block->subvol_bytenr < subvol_eb->start) {
- node = node->rb_left;
- } else if (block->subvol_bytenr > subvol_eb->start) {
- node = node->rb_right;
- } else {
- found = true;
- break;
- }
- }
- if (!found) {
+ node = rb_find(&subvol_eb->start, &blocks->blocks[level],
+ qgroup_swapped_block_bytenr_key_cmp);
+ if (!node) {
spin_unlock(&blocks->lock);
goto out;
}
+ block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
+
/* Found one, remove it from @blocks first and update blocks->swapped */
rb_erase(&block->node, &blocks->blocks[level]);
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
@@ -4869,10 +4854,9 @@ free_out:
free_extent_buffer(reloc_eb);
out:
if (ret < 0) {
- btrfs_err_rl(fs_info,
- "failed to account subtree at bytenr %llu: %d",
- subvol_eb->start, ret);
- qgroup_mark_inconsistent(fs_info);
+ qgroup_mark_inconsistent(fs_info,
+ "failed to account subtree at bytenr %llu: %d",
+ subvol_eb->start, ret);
}
return ret;
}
@@ -4903,7 +4887,7 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE)
return 0;
- if (!is_fstree(root))
+ if (!btrfs_is_fstree(root))
return 0;
/* If the extent predates enabling quotas, don't count it. */
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 1834011ccc49..cab0b291088c 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -329,11 +329,14 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
item_size);
- if (ret == -EEXIST)
+ if (ret == -EEXIST) {
ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
item_size);
- if (ret)
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ } else if (ret) {
btrfs_abort_transaction(trans, ret);
+ }
kfree(stripe_extent);
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
deleted file mode 100644
index 1c2d7cb1fe6f..000000000000
--- a/fs/btrfs/rcu-string.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2012 Red Hat. All rights reserved.
- */
-
-#ifndef BTRFS_RCU_STRING_H
-#define BTRFS_RCU_STRING_H
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-#include <linux/printk.h>
-
-struct rcu_string {
- struct rcu_head rcu;
- char str[];
-};
-
-static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
-{
- size_t len = strlen(src) + 1;
- struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
- (len * sizeof(char)), mask);
- if (!ret)
- return ret;
- /* Warn if the source got unexpectedly truncated. */
- if (WARN_ON(strscpy(ret->str, src, len) < 0)) {
- kfree(ret);
- return NULL;
- }
- return ret;
-}
-
-static inline void rcu_string_free(struct rcu_string *str)
-{
- if (str)
- kfree_rcu(str, rcu);
-}
-
-#define printk_in_rcu(fmt, ...) do { \
- rcu_read_lock(); \
- printk(fmt, __VA_ARGS__); \
- rcu_read_unlock(); \
-} while (0)
-
-#define printk_ratelimited_in_rcu(fmt, ...) do { \
- rcu_read_lock(); \
- printk_ratelimited(fmt, __VA_ARGS__); \
- rcu_read_unlock(); \
-} while (0)
-
-#define rcu_str_deref(rcu_str) ({ \
- struct rcu_string *__str = rcu_dereference(rcu_str); \
- __str->str; \
-})
-
-#endif
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 2928abf7eb82..3871c3a6c743 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -75,69 +75,70 @@ struct block_entry {
struct list_head actions;
};
+static int block_entry_bytenr_key_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *bytenr = key;
+ const struct block_entry *entry = rb_entry(node, struct block_entry, node);
+
+ if (entry->bytenr < *bytenr)
+ return 1;
+ else if (entry->bytenr > *bytenr)
+ return -1;
+
+ return 0;
+}
+
+static int block_entry_bytenr_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct block_entry *new_entry = rb_entry(new, struct block_entry, node);
+
+ return block_entry_bytenr_key_cmp(&new_entry->bytenr, existing);
+}
+
static struct block_entry *insert_block_entry(struct rb_root *root,
struct block_entry *be)
{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct block_entry *entry;
-
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct block_entry, node);
- if (entry->bytenr > be->bytenr)
- p = &(*p)->rb_left;
- else if (entry->bytenr < be->bytenr)
- p = &(*p)->rb_right;
- else
- return entry;
- }
+ struct rb_node *node;
- rb_link_node(&be->node, parent_node, p);
- rb_insert_color(&be->node, root);
- return NULL;
+ node = rb_find_add(&be->node, root, block_entry_bytenr_cmp);
+ return rb_entry_safe(node, struct block_entry, node);
}
static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
{
- struct rb_node *n;
- struct block_entry *entry = NULL;
+ struct rb_node *node;
- n = root->rb_node;
- while (n) {
- entry = rb_entry(n, struct block_entry, node);
- if (entry->bytenr < bytenr)
- n = n->rb_right;
- else if (entry->bytenr > bytenr)
- n = n->rb_left;
- else
- return entry;
- }
- return NULL;
+ node = rb_find(&bytenr, root, block_entry_bytenr_key_cmp);
+ return rb_entry_safe(node, struct block_entry, node);
+}
+
+static int root_entry_root_objectid_key_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *objectid = key;
+ const struct root_entry *entry = rb_entry(node, struct root_entry, node);
+
+ if (entry->root_objectid < *objectid)
+ return 1;
+ else if (entry->root_objectid > *objectid)
+ return -1;
+
+ return 0;
+}
+
+static int root_entry_root_objectid_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct root_entry *new_entry = rb_entry(new, struct root_entry, node);
+
+ return root_entry_root_objectid_key_cmp(&new_entry->root_objectid, existing);
}
static struct root_entry *insert_root_entry(struct rb_root *root,
struct root_entry *re)
{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct root_entry *entry;
-
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct root_entry, node);
- if (entry->root_objectid > re->root_objectid)
- p = &(*p)->rb_left;
- else if (entry->root_objectid < re->root_objectid)
- p = &(*p)->rb_right;
- else
- return entry;
- }
-
- rb_link_node(&re->node, parent_node, p);
- rb_insert_color(&re->node, root);
- return NULL;
+ struct rb_node *node;
+ node = rb_find_add(&re->node, root, root_entry_root_objectid_cmp);
+ return rb_entry_safe(node, struct root_entry, node);
}
static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
@@ -161,48 +162,29 @@ static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
return 0;
}
+static int ref_entry_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ struct ref_entry *new_entry = rb_entry(new, struct ref_entry, node);
+ struct ref_entry *existing_entry = rb_entry(existing, struct ref_entry, node);
+
+ return comp_refs(new_entry, existing_entry);
+}
+
static struct ref_entry *insert_ref_entry(struct rb_root *root,
struct ref_entry *ref)
{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct ref_entry *entry;
- int cmp;
-
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct ref_entry, node);
- cmp = comp_refs(entry, ref);
- if (cmp > 0)
- p = &(*p)->rb_left;
- else if (cmp < 0)
- p = &(*p)->rb_right;
- else
- return entry;
- }
-
- rb_link_node(&ref->node, parent_node, p);
- rb_insert_color(&ref->node, root);
- return NULL;
+ struct rb_node *node;
+ node = rb_find_add(&ref->node, root, ref_entry_cmp);
+ return rb_entry_safe(node, struct ref_entry, node);
}
static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
{
- struct rb_node *n;
- struct root_entry *entry = NULL;
+ struct rb_node *node;
- n = root->rb_node;
- while (n) {
- entry = rb_entry(n, struct root_entry, node);
- if (entry->root_objectid < objectid)
- n = n->rb_right;
- else if (entry->root_objectid > objectid)
- n = n->rb_left;
- else
- return entry;
- }
- return NULL;
+ node = rb_find(&objectid, root, root_entry_root_objectid_key_cmp);
+ return rb_entry_safe(node, struct root_entry, node);
}
#ifdef CONFIG_STACKTRACE
@@ -668,7 +650,7 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info,
* our sanity checks pass as they are no longer needed.
*/
int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
- struct btrfs_ref *generic_ref)
+ const struct btrfs_ref *generic_ref)
{
struct ref_entry *ref = NULL, *exist;
struct ref_action *ra = NULL;
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index 3511e1a5c96b..559bd25a2b7a 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -19,7 +19,7 @@ struct btrfs_ref;
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
- struct btrfs_ref *generic_ref);
+ const struct btrfs_ref *generic_ref);
void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
u64 len);
@@ -39,7 +39,7 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
}
static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
- struct btrfs_ref *generic_ref)
+ const struct btrfs_ref *generic_ref)
{
return 0;
}
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 62161beca559..ce25ab7f0e99 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -46,11 +46,9 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
- goto out;
+ return ret;
}
- ret = btrfs_end_transaction(trans);
-out:
- return ret;
+ return btrfs_end_transaction(trans);
}
static int copy_inline_to_page(struct btrfs_inode *inode,
@@ -95,8 +93,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
if (ret < 0)
goto out_unlock;
- btrfs_clear_extent_bits(&inode->io_tree, file_offset, range_end,
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG);
+ btrfs_clear_extent_bit(&inode->io_tree, file_offset, range_end,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, NULL);
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
if (ret)
goto out_unlock;
@@ -270,11 +268,15 @@ copy_inline_extent:
drop_args.end = aligned_end;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out;
+ }
write_extent_buffer(path->nodes[0], inline_data,
btrfs_item_ptr_offset(path->nodes[0],
@@ -283,6 +285,8 @@ copy_inline_extent:
btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found);
btrfs_set_inode_full_sync(inode);
ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
out:
if (!ret && !trans) {
/*
@@ -297,10 +301,8 @@ out:
trans = NULL;
}
}
- if (ret && trans) {
- btrfs_abort_transaction(trans, ret);
+ if (ret && trans)
btrfs_end_transaction(trans);
- }
if (!ret)
*trans_out = trans;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 02086191630d..e58151933844 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -90,10 +90,15 @@
* map address of tree root to tree
*/
struct mapping_node {
- struct {
- struct rb_node rb_node;
- u64 bytenr;
- }; /* Use rb_simle_node for search/insert */
+ union {
+ /* Use rb_simple_node for search/insert */
+ struct {
+ struct rb_node rb_node;
+ u64 bytenr;
+ };
+
+ struct rb_simple_node simple_node;
+ };
void *data;
};
@@ -106,10 +111,15 @@ struct mapping_tree {
* present a tree block to process
*/
struct tree_block {
- struct {
- struct rb_node rb_node;
- u64 bytenr;
- }; /* Use rb_simple_node for search/insert */
+ union {
+ /* Use rb_simple_node for search/insert */
+ struct {
+ struct rb_node rb_node;
+ u64 bytenr;
+ };
+
+ struct rb_simple_node simple_node;
+ };
u64 owner;
struct btrfs_key key;
u8 level;
@@ -480,8 +490,7 @@ static int __add_reloc_root(struct btrfs_root *root)
node->data = root;
spin_lock(&rc->reloc_root_tree.lock);
- rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
- node->bytenr, &node->rb_node);
+ rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, &node->simple_node);
spin_unlock(&rc->reloc_root_tree.lock);
if (rb_node) {
btrfs_err(fs_info,
@@ -564,8 +573,7 @@ static int __update_reloc_root(struct btrfs_root *root)
spin_lock(&rc->reloc_root_tree.lock);
node->bytenr = root->node->start;
- rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
- node->bytenr, &node->rb_node);
+ rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, &node->simple_node);
spin_unlock(&rc->reloc_root_tree.lock);
if (rb_node)
btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
@@ -1516,7 +1524,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_root_level(root_item);
- atomic_inc(&reloc_root->node->refs);
+ refcount_inc(&reloc_root->node->refs);
path->nodes[level] = reloc_root->node;
path->slots[level] = 0;
} else {
@@ -2617,7 +2625,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
* tree.
*/
if (block->owner &&
- (!is_fstree(block->owner) ||
+ (!btrfs_is_fstree(block->owner) ||
block->owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
ret = relocate_cowonly_block(trans, rc, block, path);
if (ret)
@@ -2658,66 +2666,24 @@ static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control
u64 num_bytes;
int nr;
int ret = 0;
- u64 i_size = i_size_read(&inode->vfs_inode);
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset = prealloc_start;
/*
- * For subpage case, previous i_size may not be aligned to PAGE_SIZE.
- * This means the range [i_size, PAGE_END + 1) is filled with zeros by
- * btrfs_do_readpage() call of previously relocated file cluster.
+ * For blocksize < folio size case (either bs < page size or large folios),
+ * beyond i_size, all blocks are filled with zero.
*
- * If the current cluster starts in the above range, btrfs_do_readpage()
+ * If the current cluster covers the above range, btrfs_do_readpage()
* will skip the read, and relocate_one_folio() will later writeback
* the padding zeros as new data, causing data corruption.
*
- * Here we have to manually invalidate the range (i_size, PAGE_END + 1).
+ * Here we have to invalidate the cache covering our cluster.
*/
- if (!PAGE_ALIGNED(i_size)) {
- struct address_space *mapping = inode->vfs_inode.i_mapping;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- const u32 sectorsize = fs_info->sectorsize;
- struct folio *folio;
-
- ASSERT(sectorsize < PAGE_SIZE);
- ASSERT(IS_ALIGNED(i_size, sectorsize));
-
- /*
- * Subpage can't handle page with DIRTY but without UPTODATE
- * bit as it can lead to the following deadlock:
- *
- * btrfs_read_folio()
- * | Page already *locked*
- * |- btrfs_lock_and_flush_ordered_range()
- * |- btrfs_start_ordered_extent()
- * |- extent_write_cache_pages()
- * |- lock_page()
- * We try to lock the page we already hold.
- *
- * Here we just writeback the whole data reloc inode, so that
- * we will be ensured to have no dirty range in the page, and
- * are safe to clear the uptodate bits.
- *
- * This shouldn't cause too much overhead, as we need to write
- * the data back anyway.
- */
- ret = filemap_write_and_wait(mapping);
- if (ret < 0)
- return ret;
-
- folio = filemap_lock_folio(mapping, i_size >> PAGE_SHIFT);
- /*
- * If page is freed we don't need to do anything then, as we
- * will re-read the whole page anyway.
- */
- if (!IS_ERR(folio)) {
- btrfs_subpage_clear_uptodate(fs_info, folio, i_size,
- round_up(i_size, PAGE_SIZE) - i_size);
- folio_unlock(folio);
- folio_put(folio);
- }
- }
+ ret = filemap_invalidate_inode(&inode->vfs_inode, true, prealloc_start,
+ prealloc_end);
+ if (ret < 0)
+ return ret;
BUG_ON(cluster->start != cluster->boundary[0]);
ret = btrfs_alloc_data_chunk_ondemand(inode,
@@ -2806,13 +2772,15 @@ static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster,
static int relocate_one_folio(struct reloc_control *rc,
struct file_ra_state *ra,
- int *cluster_nr, unsigned long index)
+ int *cluster_nr, u64 *file_offset_ret)
{
const struct file_extent_cluster *cluster = &rc->cluster;
struct inode *inode = rc->data_inode;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
+ const u64 orig_file_offset = *file_offset_ret;
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
- const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
+ const pgoff_t last_index = (cluster->end - offset) >> PAGE_SHIFT;
+ const pgoff_t index = orig_file_offset >> PAGE_SHIFT;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
struct folio *folio;
u64 folio_start;
@@ -2845,8 +2813,6 @@ again:
return PTR_ERR(folio);
}
- WARN_ON(folio_order(folio));
-
if (folio_test_readahead(folio) && !use_rst)
page_cache_async_readahead(inode->i_mapping, ra, NULL,
folio, last_index + 1 - index);
@@ -2875,7 +2841,7 @@ again:
goto release_folio;
folio_start = folio_pos(folio);
- folio_end = folio_start + PAGE_SIZE - 1;
+ folio_end = folio_start + folio_size(folio) - 1;
/*
* Start from the cluster, as for subpage case, the cluster can start
@@ -2923,7 +2889,8 @@ again:
* EXTENT_BOUNDARY bit prevents current extent from being merged
* with previous extent.
*/
- if (in_range(cluster->boundary[*cluster_nr] - offset, folio_start, PAGE_SIZE)) {
+ if (in_range(cluster->boundary[*cluster_nr] - offset,
+ folio_start, folio_size(folio))) {
u64 boundary_start = cluster->boundary[*cluster_nr] -
offset;
u64 boundary_end = boundary_start +
@@ -2953,6 +2920,7 @@ again:
btrfs_throttle(fs_info);
if (btrfs_should_cancel_balance(fs_info))
ret = -ECANCELED;
+ *file_offset_ret = folio_end + 1;
return ret;
release_folio:
@@ -2966,8 +2934,7 @@ static int relocate_file_extent_cluster(struct reloc_control *rc)
struct inode *inode = rc->data_inode;
const struct file_extent_cluster *cluster = &rc->cluster;
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
- unsigned long index;
- unsigned long last_index;
+ u64 cur_file_offset = cluster->start - offset;
struct file_ra_state *ra;
int cluster_nr = 0;
int ret = 0;
@@ -2989,10 +2956,11 @@ static int relocate_file_extent_cluster(struct reloc_control *rc)
if (ret)
goto out;
- last_index = (cluster->end - offset) >> PAGE_SHIFT;
- for (index = (cluster->start - offset) >> PAGE_SHIFT;
- index <= last_index && !ret; index++)
- ret = relocate_one_folio(rc, ra, &cluster_nr, index);
+ while (cur_file_offset < cluster->end - offset) {
+ ret = relocate_one_folio(rc, ra, &cluster_nr, &cur_file_offset);
+ if (ret)
+ break;
+ }
if (ret == 0)
WARN_ON(cluster_nr != cluster->nr);
out:
@@ -3155,7 +3123,7 @@ static int add_tree_block(struct reloc_control *rc,
block->key_ready = false;
block->owner = owner;
- rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
+ rb_node = rb_simple_insert(blocks, &block->simple_node);
if (rb_node)
btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
-EEXIST);
@@ -3643,7 +3611,7 @@ restart:
}
btrfs_release_path(path);
- btrfs_clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
+ btrfs_clear_extent_bit(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, NULL);
if (trans) {
btrfs_end_transaction_throttle(trans);
@@ -3880,7 +3848,7 @@ static void free_reloc_control(struct reloc_control *rc)
*/
static void describe_relocation(struct btrfs_block_group *block_group)
{
- char buf[128] = {'\0'};
+ char buf[128] = "NONE";
btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
@@ -3900,7 +3868,8 @@ static const char *stage_to_string(enum reloc_stage stage)
/*
* function to relocate all extents in a block group.
*/
-int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
+int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
+ bool verbose)
{
struct btrfs_block_group *bg;
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, group_start);
@@ -3992,7 +3961,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
goto out;
}
- describe_relocation(rc->block_group);
+ if (verbose)
+ describe_relocation(rc->block_group);
btrfs_wait_block_group_reservations(rc->block_group);
btrfs_wait_nocow_writers(rc->block_group);
@@ -4036,8 +4006,10 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
if (rc->extents_found == 0)
break;
- btrfs_info(fs_info, "found %llu extents, stage: %s",
- rc->extents_found, stage_to_string(finishes_stage));
+ if (verbose)
+ btrfs_info(fs_info, "found %llu extents, stage: %s",
+ rc->extents_found,
+ stage_to_string(finishes_stage));
}
WARN_ON(rc->block_group->pinned > 0);
@@ -4339,7 +4311,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
}
btrfs_backref_drop_node_buffer(node);
- atomic_inc(&cow->refs);
+ refcount_inc(&cow->refs);
node->eb = cow;
node->new_bytenr = cow->start;
diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h
index 788c86d8633a..5c36b3f84b57 100644
--- a/fs/btrfs/relocation.h
+++ b/fs/btrfs/relocation.h
@@ -12,7 +12,8 @@ struct btrfs_trans_handle;
struct btrfs_ordered_extent;
struct btrfs_pending_snapshot;
-int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
+int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
+ bool verbose);
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ce36fafc771e..6776e6ab8d10 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -556,8 +556,8 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
* hold all of the paths here
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
- btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
+ btrfs_warn(fs_info,
+"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@@ -570,8 +570,8 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
return 0;
err:
- btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
+ btrfs_warn(fs_info,
+ "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
/* Super block error, no need to search extent tree. */
if (is_super) {
- btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
+ btrfs_warn(fs_info, "scrub: %s on device %s, physical %llu",
errstr, btrfs_dev_name(dev), physical);
return;
}
@@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
&ref_level);
if (ret < 0) {
btrfs_warn(fs_info,
- "failed to resolve tree backref for logical %llu: %d",
- swarn.logical, ret);
+ "scrub: failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
break;
}
if (ret > 0)
break;
- btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
+ btrfs_warn(fs_info,
+"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical, btrfs_dev_name(dev),
swarn.physical, (ref_level ? "node" : "leaf"),
ref_level, ref_root);
@@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad bytenr, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_bytenr(header), logical);
return;
@@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad fsid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
header->fsid, fs_info->fs_devices->fsid);
return;
@@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
logical, stripe->mirror_num,
header->chunk_tree_uuid, fs_info->chunk_tree_uuid);
return;
@@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
+"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
logical, stripe->mirror_num,
CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
@@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad generation, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_generation(header),
stripe->sectors[sector_nr].generation);
@@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
*/
if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) {
btrfs_warn_rl(fs_info,
- "tree block at %llu crosses stripe boundary %llu",
+ "scrub: tree block at %llu crosses stripe boundary %llu",
stripe->logical +
(sector_nr << fs_info->sectorsize_bits),
stripe->logical);
@@ -1045,13 +1045,13 @@ skip:
*/
if (repaired) {
if (dev) {
- btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on dev %s physical %llu",
+ btrfs_err_rl(fs_info,
+ "scrub: fixed up error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
- btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on mirror %u",
+ btrfs_err_rl(fs_info,
+ "scrub: fixed up error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
continue;
@@ -1059,13 +1059,13 @@ skip:
/* The remaining are all for unrepaired. */
if (dev) {
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on dev %s physical %llu",
+ btrfs_err_rl(fs_info,
+"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on mirror %u",
+ btrfs_err_rl(fs_info,
+ "scrub: unable to fixup (regular) error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
@@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
physical,
sctx->write_pointer);
if (ret)
- btrfs_err(fs_info,
- "zoned: failed to recover write pointer");
+ btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer");
}
mutex_unlock(&sctx->wr_lock);
btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
@@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
int ret;
if (unlikely(!extent_root || !csum_root)) {
- btrfs_err(fs_info, "no valid extent or csum root for scrub");
+ btrfs_err(fs_info, "scrub: no valid extent or csum root found");
return -EUCLEAN;
}
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
@@ -1807,7 +1806,7 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
struct btrfs_io_context *bioc = NULL;
const u64 logical = stripe->logical +
(i << fs_info->sectorsize_bits);
- int err;
+ int ret;
io_stripe.rst_search_commit_root = true;
stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
@@ -1815,11 +1814,11 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
* For RST cases, we need to manually split the bbio to
* follow the RST boundary.
*/
- err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&stripe_len, &bioc, &io_stripe, &mirror);
btrfs_put_bioc(bioc);
- if (err < 0) {
- if (err != -ENODATA) {
+ if (ret < 0) {
+ if (ret != -ENODATA) {
/*
* Earlier btrfs_get_raid_extent_offset()
* returned -ENODATA, which means there's
@@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
btrfs_err(fs_info,
- "stripe %llu has unrepaired metadata sector at %llu",
+ "scrub: stripe %llu has unrepaired metadata sector at logical %llu",
stripe->logical,
stripe->logical + (i << fs_info->sectorsize_bits));
return true;
@@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
btrfs_err(fs_info,
-"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
+"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
&error);
ret = -EIO;
@@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ro_set = 0;
} else if (ret == -ETXTBSY) {
btrfs_warn(fs_info,
- "skipping scrub of block group %llu due to active swapfile",
+ "scrub: skipping scrub of block group %llu due to active swapfile",
cache->start);
scrub_pause_off(fs_info);
ret = 0;
goto skip_unfreeze;
} else {
- btrfs_warn(fs_info,
- "failed setting block group ro: %d", ret);
+ btrfs_warn(fs_info, "scrub: failed setting block group ro: %d",
+ ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
@@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
ret = btrfs_check_super_csum(fs_info, sb);
if (ret != 0) {
btrfs_err_rl(fs_info,
- "super block at physical %llu devid %llu has bad csum",
+ "scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
if (btrfs_super_generation(sb) != generation) {
btrfs_err_rl(fs_info,
-"super block at physical %llu devid %llu has bad generation %llu expect %llu",
+"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
btrfs_super_generation(sb), generation);
return -EUCLEAN;
@@ -3058,8 +3057,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- btrfs_err_in_rcu(fs_info,
- "scrub on devid %llu: filesystem on %s is not writable",
+ btrfs_err(fs_info,
+ "scrub: devid %llu: filesystem on %s is not writable",
devid, btrfs_dev_name(dev));
ret = -EROFS;
goto out;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2891ec4056c6..7664025a5af4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4,6 +4,7 @@
*/
#include <linux/bsearch.h>
+#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/sort.h>
@@ -758,7 +759,7 @@ static int send_header(struct send_ctx *sctx)
{
struct btrfs_stream_header hdr;
- strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
+ strscpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
hdr.version = cpu_to_le32(sctx->proto);
return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
&sctx->send_off);
@@ -1804,7 +1805,7 @@ static int gen_unique_name(struct send_ctx *sctx,
ino, gen, idx);
ASSERT(len < sizeof(tmp));
tmp_name.name = tmp;
- tmp_name.len = strlen(tmp);
+ tmp_name.len = len;
di = btrfs_lookup_dir_item(NULL, sctx->send_root,
path, BTRFS_FIRST_FREE_OBJECTID,
@@ -1843,7 +1844,7 @@ static int gen_unique_name(struct send_ctx *sctx,
break;
}
- ret = fs_path_add(dest, tmp, strlen(tmp));
+ ret = fs_path_add(dest, tmp, len);
out:
btrfs_free_path(path);
@@ -4628,7 +4629,6 @@ static int rbtree_ref_comp(const void *k, const struct rb_node *node)
{
const struct recorded_ref *data = k;
const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
- int result;
if (data->dir > ref->dir)
return 1;
@@ -4642,12 +4642,7 @@ static int rbtree_ref_comp(const void *k, const struct rb_node *node)
return 1;
if (data->name_len < ref->name_len)
return -1;
- result = strcmp(data->name, ref->name);
- if (result > 0)
- return 1;
- if (result < 0)
- return -1;
- return 0;
+ return strcmp(data->name, ref->name);
}
static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
@@ -5411,6 +5406,30 @@ tlv_put_failure:
return ret;
}
+static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
+{
+ struct fs_path *path;
+ int ret;
+
+ path = get_cur_inode_path(sctx);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+ if (ret < 0)
+ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
+ TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+
+ ret = send_cmd(sctx);
+
+tlv_put_failure:
+ return ret;
+}
+
static int send_hole(struct send_ctx *sctx, u64 end)
{
struct fs_path *p = NULL;
@@ -5419,6 +5438,14 @@ static int send_hole(struct send_ctx *sctx, u64 end)
int ret = 0;
/*
+ * Starting with send stream v2 we have fallocate and can use it to
+ * punch holes instead of sending writes full of zeroes.
+ */
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
+ return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, end - offset);
+
+ /*
* A hole that starts at EOF or beyond it. Since we do not yet support
* fallocate (for extent preallocation and hole punching), sending a
* write of zeroes starting at EOF or beyond would later require issuing
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d9087aa81b21..0481c693ac2e 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -615,7 +615,7 @@ static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups)
+ bool dump_block_groups)
{
struct btrfs_block_group *cache;
u64 total_avail = 0;
@@ -1887,7 +1887,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
space_info->flags, orig_bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, orig_bytes, 0);
+ btrfs_dump_space_info(fs_info, space_info, orig_bytes, false);
}
return ret;
}
@@ -1918,7 +1918,7 @@ int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
space_info->flags, bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, space_info, bytes, 0);
+ btrfs_dump_space_info(fs_info, space_info, bytes, false);
}
return ret;
}
@@ -1973,13 +1973,13 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
static u64 calc_pct_ratio(u64 x, u64 y)
{
- int err;
+ int ret;
if (!y)
return 0;
again:
- err = check_mul_overflow(100, x, &x);
- if (err)
+ ret = check_mul_overflow(100, x, &x);
+ if (ret)
goto lose_precision;
return div64_u64(x, y);
lose_precision:
@@ -2139,7 +2139,7 @@ void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool
}
}
-bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
+static bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
{
bool ret;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 92b7f5e2b850..679f22efb407 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -278,7 +278,7 @@ u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups);
+ bool dump_block_groups);
int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
@@ -306,7 +306,6 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
-bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info);
void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info);
void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len);
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index d4f019233493..c9b3821957f7 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -49,7 +49,7 @@
* Implementation:
*
* - Common
- * Both metadata and data will use a new structure, btrfs_subpage, to
+ * Both metadata and data will use a new structure, btrfs_folio_state, to
* record the status of each sector inside a page. This provides the extra
* granularity needed.
*
@@ -63,10 +63,10 @@
* This means a slightly higher tree locking latency.
*/
-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
- struct folio *folio, enum btrfs_subpage_type type)
+int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, enum btrfs_folio_type type)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
/* For metadata we don't support large folio yet. */
if (type == BTRFS_SUBPAGE_METADATA)
@@ -87,18 +87,18 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
return 0;
- subpage = btrfs_alloc_subpage(fs_info, folio_size(folio), type);
- if (IS_ERR(subpage))
- return PTR_ERR(subpage);
+ bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type);
+ if (IS_ERR(bfs))
+ return PTR_ERR(bfs);
- folio_attach_private(folio, subpage);
+ folio_attach_private(folio, bfs);
return 0;
}
-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio,
- enum btrfs_subpage_type type)
+void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
+ enum btrfs_folio_type type)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
/* Either not subpage, or the folio already has private attached. */
if (!folio_test_private(folio))
@@ -108,15 +108,15 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *fol
if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
return;
- subpage = folio_detach_private(folio);
- ASSERT(subpage);
- btrfs_free_subpage(subpage);
+ bfs = folio_detach_private(folio);
+ ASSERT(bfs);
+ btrfs_free_folio_state(bfs);
}
-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
- size_t fsize, enum btrfs_subpage_type type)
+struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
+ size_t fsize, enum btrfs_folio_type type)
{
- struct btrfs_subpage *ret;
+ struct btrfs_folio_state *ret;
unsigned int real_size;
ASSERT(fs_info->sectorsize < fsize);
@@ -136,11 +136,6 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
return ret;
}
-void btrfs_free_subpage(struct btrfs_subpage *subpage)
-{
- kfree(subpage);
-}
-
/*
* Increase the eb_refs of current subpage.
*
@@ -152,7 +147,7 @@ void btrfs_free_subpage(struct btrfs_subpage *subpage)
*/
void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
if (!btrfs_meta_is_subpage(fs_info))
return;
@@ -160,13 +155,13 @@ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *
ASSERT(folio_test_private(folio) && folio->mapping);
lockdep_assert_held(&folio->mapping->i_private_lock);
- subpage = folio_get_private(folio);
- atomic_inc(&subpage->eb_refs);
+ bfs = folio_get_private(folio);
+ atomic_inc(&bfs->eb_refs);
}
void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
if (!btrfs_meta_is_subpage(fs_info))
return;
@@ -174,9 +169,9 @@ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *
ASSERT(folio_test_private(folio) && folio->mapping);
lockdep_assert_held(&folio->mapping->i_private_lock);
- subpage = folio_get_private(folio);
- ASSERT(atomic_read(&subpage->eb_refs));
- atomic_dec(&subpage->eb_refs);
+ bfs = folio_get_private(folio);
+ ASSERT(atomic_read(&bfs->eb_refs));
+ atomic_dec(&bfs->eb_refs);
}
static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
@@ -191,8 +186,9 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
* unmapped page like dummy extent buffer pages.
*/
if (folio->mapping)
- ASSERT(folio_pos(folio) <= start &&
- start + len <= folio_pos(folio) + folio_size(folio));
+ ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio),
+ "start=%llu len=%u folio_pos=%llu folio_size=%zu",
+ start, len, folio_pos(folio), folio_size(folio));
}
#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
@@ -221,14 +217,13 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
if (folio_pos(folio) >= orig_start + orig_len)
*len = 0;
else
- *len = min_t(u64, folio_pos(folio) + folio_size(folio),
- orig_start + orig_len) - *start;
+ *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start;
}
static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
unsigned long flags;
@@ -238,7 +233,7 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
btrfs_subpage_assert(fs_info, folio, start, len);
- spin_lock_irqsave(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
/*
* We have call sites passing @lock_page into
* extent_clear_unlock_delalloc() for compression path.
@@ -246,18 +241,18 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
* This @locked_page is locked by plain lock_page(), thus its
* subpage::locked is 0. Handle them in a special way.
*/
- if (atomic_read(&subpage->nr_locked) == 0) {
- spin_unlock_irqrestore(&subpage->lock, flags);
+ if (atomic_read(&bfs->nr_locked) == 0) {
+ spin_unlock_irqrestore(&bfs->lock, flags);
return true;
}
- for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) {
- clear_bit(bit, subpage->bitmaps);
+ for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) {
+ clear_bit(bit, bfs->bitmaps);
cleared++;
}
- ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
- last = atomic_sub_and_test(cleared, &subpage->nr_locked);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
+ last = atomic_sub_and_test(cleared, &bfs->nr_locked);
+ spin_unlock_irqrestore(&bfs->lock, flags);
return last;
}
@@ -280,7 +275,7 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
ASSERT(folio_test_locked(folio));
@@ -296,7 +291,7 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
* Since we own the page lock, no one else could touch subpage::locked
* and we are safe to do several atomic operations without spinlock.
*/
- if (atomic_read(&subpage->nr_locked) == 0) {
+ if (atomic_read(&bfs->nr_locked) == 0) {
/* No subpage lock, locked by plain lock_page(). */
folio_unlock(folio);
return;
@@ -310,7 +305,7 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked;
unsigned long flags;
@@ -323,42 +318,42 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
return;
}
- if (atomic_read(&subpage->nr_locked) == 0) {
+ if (atomic_read(&bfs->nr_locked) == 0) {
/* No subpage lock, locked by plain lock_page(). */
folio_unlock(folio);
return;
}
- spin_lock_irqsave(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
for_each_set_bit(bit, &bitmap, blocks_per_folio) {
- if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
+ if (test_and_clear_bit(bit + start_bit, bfs->bitmaps))
cleared++;
}
- ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
- last = atomic_sub_and_test(cleared, &subpage->nr_locked);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
+ last = atomic_sub_and_test(cleared, &bfs->nr_locked);
+ spin_unlock_irqrestore(&bfs->lock, flags);
if (last)
folio_unlock(folio);
}
#define subpage_test_bitmap_all_set(fs_info, folio, name) \
({ \
- struct btrfs_subpage *subpage = folio_get_private(folio); \
+ struct btrfs_folio_state *bfs = folio_get_private(folio); \
const unsigned int blocks_per_folio = \
btrfs_blocks_per_folio(fs_info, folio); \
\
- bitmap_test_range_all_set(subpage->bitmaps, \
+ bitmap_test_range_all_set(bfs->bitmaps, \
blocks_per_folio * btrfs_bitmap_nr_##name, \
blocks_per_folio); \
})
#define subpage_test_bitmap_all_zero(fs_info, folio, name) \
({ \
- struct btrfs_subpage *subpage = folio_get_private(folio); \
+ struct btrfs_folio_state *bfs = folio_get_private(folio); \
const unsigned int blocks_per_folio = \
btrfs_blocks_per_folio(fs_info, folio); \
\
- bitmap_test_range_all_zero(subpage->bitmaps, \
+ bitmap_test_range_all_zero(bfs->bitmaps, \
blocks_per_folio * btrfs_bitmap_nr_##name, \
blocks_per_folio); \
})
@@ -366,43 +361,43 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
uptodate, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_set(fs_info, folio, uptodate))
folio_mark_uptodate(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
uptodate, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
folio_clear_uptodate(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
dirty, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_unlock_irqrestore(&bfs->lock, flags);
folio_mark_dirty(folio);
}
@@ -419,17 +414,17 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
dirty, start, len);
unsigned long flags;
bool last = false;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, folio, dirty))
last = true;
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
return last;
}
@@ -446,91 +441,91 @@ void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info,
void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
writeback, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (!folio_test_writeback(folio))
folio_start_writeback(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
writeback, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) {
ASSERT(folio_test_writeback(folio));
folio_end_writeback(folio);
}
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
ordered, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
folio_set_ordered(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
ordered, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, folio, ordered))
folio_clear_ordered(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
checked, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_set(fs_info, folio, checked))
folio_set_checked(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage = folio_get_private(folio);
+ struct btrfs_folio_state *bfs = folio_get_private(folio);
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
checked, start, len);
unsigned long flags;
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
folio_clear_checked(folio);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
/*
@@ -541,16 +536,16 @@ void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
struct folio *folio, u64 start, u32 len) \
{ \
- struct btrfs_subpage *subpage = folio_get_private(folio); \
+ struct btrfs_folio_state *bfs = folio_get_private(folio); \
unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \
name, start, len); \
unsigned long flags; \
bool ret; \
\
- spin_lock_irqsave(&subpage->lock, flags); \
- ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \
+ spin_lock_irqsave(&bfs->lock, flags); \
+ ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \
len >> fs_info->sectorsize_bits); \
- spin_unlock_irqrestore(&subpage->lock, flags); \
+ spin_unlock_irqrestore(&bfs->lock, flags); \
return ret; \
}
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
@@ -662,10 +657,10 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
{ \
const unsigned int blocks_per_folio = \
btrfs_blocks_per_folio(fs_info, folio); \
- const struct btrfs_subpage *subpage = folio_get_private(folio); \
+ const struct btrfs_folio_state *bfs = folio_get_private(folio); \
\
ASSERT(blocks_per_folio <= BITS_PER_LONG); \
- *dst = bitmap_read(subpage->bitmaps, \
+ *dst = bitmap_read(bfs->bitmaps, \
blocks_per_folio * btrfs_bitmap_nr_##name, \
blocks_per_folio); \
}
@@ -690,7 +685,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
unsigned int start_bit;
unsigned int nbits;
unsigned long flags;
@@ -705,15 +700,15 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
nbits = len >> fs_info->sectorsize_bits;
- subpage = folio_get_private(folio);
- ASSERT(subpage);
- spin_lock_irqsave(&subpage->lock, flags);
- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
+ bfs = folio_get_private(folio);
+ ASSERT(bfs);
+ spin_lock_irqsave(&bfs->lock, flags);
+ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len);
- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
}
- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
- spin_unlock_irqrestore(&subpage->lock, flags);
+ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
/*
@@ -726,7 +721,7 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
unsigned long flags;
unsigned int start_bit;
unsigned int nbits;
@@ -736,19 +731,19 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio))
return;
- subpage = folio_get_private(folio);
+ bfs = folio_get_private(folio);
start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
nbits = len >> fs_info->sectorsize_bits;
- spin_lock_irqsave(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
/* Target range should not yet be locked. */
- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
+ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len);
- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
}
- bitmap_set(subpage->bitmaps, start_bit, nbits);
- ret = atomic_add_return(nbits, &subpage->nr_locked);
+ bitmap_set(bfs->bitmaps, start_bit, nbits);
+ ret = atomic_add_return(nbits, &bfs->nr_locked);
ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio));
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
/*
@@ -776,7 +771,7 @@ bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct ext
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
unsigned long uptodate_bitmap;
unsigned long dirty_bitmap;
@@ -788,18 +783,18 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(blocks_per_folio > 1);
- subpage = folio_get_private(folio);
+ bfs = folio_get_private(folio);
- spin_lock_irqsave(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap);
GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap);
GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap);
GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
- dump_page(folio_page(folio, 0), "btrfs subpage dump");
+ dump_page(folio_page(folio, 0), "btrfs folio state dump");
btrfs_warn(fs_info,
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
start, len, folio_pos(folio),
@@ -815,14 +810,14 @@ void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
struct folio *folio,
unsigned long *ret_bitmap)
{
- struct btrfs_subpage *subpage;
+ struct btrfs_folio_state *bfs;
unsigned long flags;
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1);
- subpage = folio_get_private(folio);
+ bfs = folio_get_private(folio);
- spin_lock_irqsave(&subpage->lock, flags);
+ spin_lock_irqsave(&bfs->lock, flags);
GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap);
- spin_unlock_irqrestore(&subpage->lock, flags);
+ spin_unlock_irqrestore(&bfs->lock, flags);
}
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 3042c5ea840a..ee0710eb13fd 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -32,9 +32,31 @@ struct folio;
enum {
btrfs_bitmap_nr_uptodate = 0,
btrfs_bitmap_nr_dirty,
+
+ /*
+ * This can be changed to atomic eventually. But this change will rely
+ * on the async delalloc range rework for locked bitmap. As async
+ * delalloc can unlock its range and mark blocks writeback at random
+ * timing.
+ */
btrfs_bitmap_nr_writeback,
+
+ /*
+ * The ordered and checked flags are for COW fixup, already marked
+ * deprecated, and will be removed eventually.
+ */
btrfs_bitmap_nr_ordered,
btrfs_bitmap_nr_checked,
+
+ /*
+ * The locked bit is for async delalloc range (compression), currently
+ * async extent is queued with the range locked, until the compression
+ * is done.
+ * So an async extent can unlock the range at any random timing.
+ *
+ * This will need a rework on the async extent lifespan (mark writeback
+ * and do compression) before deprecating this flag.
+ */
btrfs_bitmap_nr_locked,
btrfs_bitmap_nr_max
};
@@ -43,7 +65,7 @@ enum {
* Structure to trace status of each sector inside a page, attached to
* page::private for both data and metadata inodes.
*/
-struct btrfs_subpage {
+struct btrfs_folio_state {
/* Common members for both data and metadata pages */
spinlock_t lock;
union {
@@ -51,7 +73,7 @@ struct btrfs_subpage {
* Structures only used by metadata
*
* @eb_refs should only be operated under private_lock, as it
- * manages whether the subpage can be detached.
+ * manages whether the btrfs_folio_state can be detached.
*/
atomic_t eb_refs;
@@ -65,12 +87,11 @@ struct btrfs_subpage {
unsigned long bitmaps[];
};
-enum btrfs_subpage_type {
+enum btrfs_folio_type {
BTRFS_SUBPAGE_METADATA,
BTRFS_SUBPAGE_DATA,
};
-#if PAGE_SIZE > BTRFS_MIN_BLOCKSIZE
/*
* Subpage support for metadata is more complex, as we can have dummy extent
* buffers, where folios have no mapping to determine the owning inode.
@@ -91,29 +112,19 @@ static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
ASSERT(is_data_inode(BTRFS_I(folio->mapping->host)));
return fs_info->sectorsize < folio_size(folio);
}
-#else
-static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info)
-{
- return false;
-}
-static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
- struct folio *folio)
-{
- if (folio->mapping && folio->mapping->host)
- ASSERT(is_data_inode(BTRFS_I(folio->mapping->host)));
- return false;
-}
-#endif
-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
- struct folio *folio, enum btrfs_subpage_type type);
-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio,
- enum btrfs_subpage_type type);
+int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, enum btrfs_folio_type type);
+void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
+ enum btrfs_folio_type type);
/* Allocate additional data where page represents more than one sector */
-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
- size_t fsize, enum btrfs_subpage_type type);
-void btrfs_free_subpage(struct btrfs_subpage *subpage);
+struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
+ size_t fsize, enum btrfs_folio_type type);
+static inline void btrfs_free_folio_state(struct btrfs_folio_state *bfs)
+{
+ kfree(bfs);
+}
void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a0c65adce1ab..68e35a3700ff 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -261,10 +261,65 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = {
{}
};
-/* No support for restricting writes to btrfs devices yet... */
-static inline blk_mode_t btrfs_open_mode(struct fs_context *fc)
+static bool btrfs_match_compress_type(const char *string, const char *type, bool may_have_level)
{
- return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES;
+ const int len = strlen(type);
+
+ return (strncmp(string, type, len) == 0) &&
+ ((may_have_level && string[len] == ':') || string[len] == '\0');
+}
+
+static int btrfs_parse_compress(struct btrfs_fs_context *ctx,
+ const struct fs_parameter *param, int opt)
+{
+ const char *string = param->string;
+
+ /*
+ * Provide the same semantics as older kernels that don't use fs
+ * context, specifying the "compress" option clears "force-compress"
+ * without the need to pass "compress-force=[no|none]" before
+ * specifying "compress".
+ */
+ if (opt != Opt_compress_force && opt != Opt_compress_force_type)
+ btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
+
+ if (opt == Opt_compress || opt == Opt_compress_force) {
+ ctx->compress_type = BTRFS_COMPRESS_ZLIB;
+ ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
+ btrfs_set_opt(ctx->mount_opt, COMPRESS);
+ btrfs_clear_opt(ctx->mount_opt, NODATACOW);
+ btrfs_clear_opt(ctx->mount_opt, NODATASUM);
+ } else if (btrfs_match_compress_type(string, "zlib", true)) {
+ ctx->compress_type = BTRFS_COMPRESS_ZLIB;
+ ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB,
+ string + 4);
+ btrfs_set_opt(ctx->mount_opt, COMPRESS);
+ btrfs_clear_opt(ctx->mount_opt, NODATACOW);
+ btrfs_clear_opt(ctx->mount_opt, NODATASUM);
+ } else if (btrfs_match_compress_type(string, "lzo", false)) {
+ ctx->compress_type = BTRFS_COMPRESS_LZO;
+ ctx->compress_level = 0;
+ btrfs_set_opt(ctx->mount_opt, COMPRESS);
+ btrfs_clear_opt(ctx->mount_opt, NODATACOW);
+ btrfs_clear_opt(ctx->mount_opt, NODATASUM);
+ } else if (btrfs_match_compress_type(string, "zstd", true)) {
+ ctx->compress_type = BTRFS_COMPRESS_ZSTD;
+ ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD,
+ string + 4);
+ btrfs_set_opt(ctx->mount_opt, COMPRESS);
+ btrfs_clear_opt(ctx->mount_opt, NODATACOW);
+ btrfs_clear_opt(ctx->mount_opt, NODATASUM);
+ } else if (btrfs_match_compress_type(string, "no", false) ||
+ btrfs_match_compress_type(string, "none", false)) {
+ ctx->compress_level = 0;
+ ctx->compress_type = 0;
+ btrfs_clear_opt(ctx->mount_opt, COMPRESS);
+ btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
+ } else {
+ btrfs_err(NULL, "unrecognized compression value %s", string);
+ return -EINVAL;
+ }
+ return 0;
}
static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
@@ -303,10 +358,9 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_device: {
struct btrfs_device *device;
- blk_mode_t mode = btrfs_open_mode(fc);
mutex_lock(&uuid_mutex);
- device = btrfs_scan_one_device(param->string, mode, false);
+ device = btrfs_scan_one_device(param->string, false);
mutex_unlock(&uuid_mutex);
if (IS_ERR(device))
return PTR_ERR(device);
@@ -336,53 +390,8 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
fallthrough;
case Opt_compress:
case Opt_compress_type:
- /*
- * Provide the same semantics as older kernels that don't use fs
- * context, specifying the "compress" option clears
- * "force-compress" without the need to pass
- * "compress-force=[no|none]" before specifying "compress".
- */
- if (opt != Opt_compress_force && opt != Opt_compress_force_type)
- btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
-
- if (opt == Opt_compress || opt == Opt_compress_force) {
- ctx->compress_type = BTRFS_COMPRESS_ZLIB;
- ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
- btrfs_set_opt(ctx->mount_opt, COMPRESS);
- btrfs_clear_opt(ctx->mount_opt, NODATACOW);
- btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (strncmp(param->string, "zlib", 4) == 0) {
- ctx->compress_type = BTRFS_COMPRESS_ZLIB;
- ctx->compress_level =
- btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB,
- param->string + 4);
- btrfs_set_opt(ctx->mount_opt, COMPRESS);
- btrfs_clear_opt(ctx->mount_opt, NODATACOW);
- btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (strncmp(param->string, "lzo", 3) == 0) {
- ctx->compress_type = BTRFS_COMPRESS_LZO;
- ctx->compress_level = 0;
- btrfs_set_opt(ctx->mount_opt, COMPRESS);
- btrfs_clear_opt(ctx->mount_opt, NODATACOW);
- btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (strncmp(param->string, "zstd", 4) == 0) {
- ctx->compress_type = BTRFS_COMPRESS_ZSTD;
- ctx->compress_level =
- btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD,
- param->string + 4);
- btrfs_set_opt(ctx->mount_opt, COMPRESS);
- btrfs_clear_opt(ctx->mount_opt, NODATACOW);
- btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (strncmp(param->string, "no", 2) == 0) {
- ctx->compress_level = 0;
- ctx->compress_type = 0;
- btrfs_clear_opt(ctx->mount_opt, COMPRESS);
- btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
- } else {
- btrfs_err(NULL, "unrecognized compression value %s",
- param->string);
+ if (btrfs_parse_compress(ctx, param, opt))
return -EINVAL;
- }
break;
case Opt_ssd:
if (result.negated) {
@@ -945,12 +954,12 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct btrfs_inode *inode;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- int err;
+ int ret;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_magic = BTRFS_SUPER_MAGIC;
sb->s_op = &btrfs_super_ops;
- sb->s_d_op = &btrfs_dentry_operations;
+ set_default_d_op(sb, &btrfs_dentry_operations);
sb->s_export_op = &btrfs_export_ops;
#ifdef CONFIG_FS_VERITY
sb->s_vop = &btrfs_verityops;
@@ -959,28 +968,28 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_time_gran = 1;
sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
- err = super_setup_bdi(sb);
- if (err) {
+ ret = super_setup_bdi(sb);
+ if (ret) {
btrfs_err(fs_info, "super_setup_bdi failed");
- return err;
+ return ret;
}
- err = open_ctree(sb, fs_devices);
- if (err) {
- btrfs_err(fs_info, "open_ctree failed: %d", err);
- return err;
+ ret = open_ctree(sb, fs_devices);
+ if (ret) {
+ btrfs_err(fs_info, "open_ctree failed: %d", ret);
+ return ret;
}
inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- btrfs_handle_fs_error(fs_info, err, NULL);
+ ret = PTR_ERR(inode);
+ btrfs_handle_fs_error(fs_info, ret, NULL);
goto fail_close;
}
sb->s_root = d_make_root(&inode->vfs_inode);
if (!sb->s_root) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto fail_close;
}
@@ -989,7 +998,7 @@ static int btrfs_fill_super(struct super_block *sb,
fail_close:
close_ctree(fs_info);
- return err;
+ return ret;
}
int btrfs_sync_fs(struct super_block *sb, int wait)
@@ -1826,10 +1835,9 @@ static int btrfs_get_tree_super(struct fs_context *fc)
struct btrfs_fs_info *fs_info = fc->s_fs_info;
struct btrfs_fs_context *ctx = fc->fs_private;
struct btrfs_fs_devices *fs_devices = NULL;
- struct block_device *bdev;
struct btrfs_device *device;
struct super_block *sb;
- blk_mode_t mode = btrfs_open_mode(fc);
+ blk_mode_t mode = sb_open_mode(fc->sb_flags);
int ret;
btrfs_ctx_to_info(fs_info, ctx);
@@ -1839,47 +1847,60 @@ static int btrfs_get_tree_super(struct fs_context *fc)
* With 'true' passed to btrfs_scan_one_device() (mount time) we expect
* either a valid device or an error.
*/
- device = btrfs_scan_one_device(fc->source, mode, true);
+ device = btrfs_scan_one_device(fc->source, true);
ASSERT(device != NULL);
if (IS_ERR(device)) {
mutex_unlock(&uuid_mutex);
return PTR_ERR(device);
}
-
fs_devices = device->fs_devices;
+ /*
+ * We cannot hold uuid_mutex calling sget_fc(), it will lead to a
+ * locking order reversal with s_umount.
+ *
+ * So here we increase the holding number of fs_devices, this will ensure
+ * the fs_devices itself won't be freed.
+ */
+ btrfs_fs_devices_inc_holding(fs_devices);
fs_info->fs_devices = fs_devices;
-
- ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type);
mutex_unlock(&uuid_mutex);
- if (ret)
- return ret;
-
- if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
- ret = -EACCES;
- goto error;
- }
- bdev = fs_devices->latest_dev->bdev;
- /*
- * From now on the error handling is not straightforward.
- *
- * If successful, this will transfer the fs_info into the super block,
- * and fc->s_fs_info will be NULL. However if there's an existing
- * super, we'll still have fc->s_fs_info populated. If we error
- * completely out it'll be cleaned up when we drop the fs_context,
- * otherwise it's tied to the lifetime of the super_block.
- */
sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc);
if (IS_ERR(sb)) {
- ret = PTR_ERR(sb);
- goto error;
+ mutex_lock(&uuid_mutex);
+ btrfs_fs_devices_dec_holding(fs_devices);
+ /*
+ * Since the fs_devices is not opened, it can be freed at any
+ * time after unlocking uuid_mutex. We need to avoid double
+ * free through put_fs_context()->btrfs_free_fs_info().
+ * So here we reset fs_info->fs_devices to NULL, and let the
+ * regular fs_devices reclaim path to handle it.
+ *
+ * This applies to all later branches where no fs_devices is
+ * opened.
+ */
+ fs_info->fs_devices = NULL;
+ mutex_unlock(&uuid_mutex);
+ return PTR_ERR(sb);
}
set_device_specific_options(fs_info);
if (sb->s_root) {
- btrfs_close_devices(fs_devices);
+ /*
+ * Not the first mount of the fs thus got an existing super block.
+ * Will reuse the returned super block, fs_info and fs_devices.
+ *
+ * fc->s_fs_info is not touched and will be later freed by
+ * put_fs_context() through btrfs_free_fs_context().
+ */
+ ASSERT(fc->s_fs_info == fs_info);
+
+ mutex_lock(&uuid_mutex);
+ btrfs_fs_devices_dec_holding(fs_devices);
+ fs_info->fs_devices = NULL;
+ mutex_unlock(&uuid_mutex);
/*
* At this stage we may have RO flag mismatch between
* fc->sb_flags and sb->s_flags. Caller should detect such
@@ -1887,9 +1908,32 @@ static int btrfs_get_tree_super(struct fs_context *fc)
* needed.
*/
} else {
+ struct block_device *bdev;
+
+ /*
+ * The first mount of the fs thus a new superblock, fc->s_fs_info
+ * must be NULL, and the ownership of our fs_info and fs_devices is
+ * transferred to the super block.
+ */
+ ASSERT(fc->s_fs_info == NULL);
+
+ mutex_lock(&uuid_mutex);
+ btrfs_fs_devices_dec_holding(fs_devices);
+ ret = btrfs_open_devices(fs_devices, mode, sb);
+ if (ret < 0)
+ fs_info->fs_devices = NULL;
+ mutex_unlock(&uuid_mutex);
+ if (ret < 0) {
+ deactivate_locked_super(sb);
+ return ret;
+ }
+ if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
+ deactivate_locked_super(sb);
+ return -EACCES;
+ }
+ bdev = fs_devices->latest_dev->bdev;
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id);
- btrfs_sb(sb)->bdev_holder = &btrfs_fs_type;
ret = btrfs_fill_super(sb, fs_devices);
if (ret) {
deactivate_locked_super(sb);
@@ -1901,10 +1945,6 @@ static int btrfs_get_tree_super(struct fs_context *fc)
fc->root = dget(sb->s_root);
return 0;
-
-error:
- btrfs_close_devices(fs_devices);
- return ret;
}
/*
@@ -1980,17 +2020,13 @@ error:
* btrfs or not, setting the whole super block RO. To make per-subvolume mounting
* work with different options work we need to keep backward compatibility.
*/
-static int btrfs_reconfigure_for_mount(struct fs_context *fc, struct vfsmount *mnt)
+static int btrfs_reconfigure_for_mount(struct fs_context *fc)
{
int ret = 0;
- if (fc->sb_flags & SB_RDONLY)
- return ret;
-
- down_write(&mnt->mnt_sb->s_umount);
- if (!(fc->sb_flags & SB_RDONLY) && (mnt->mnt_sb->s_flags & SB_RDONLY))
+ if (!(fc->sb_flags & SB_RDONLY) && (fc->root->d_sb->s_flags & SB_RDONLY))
ret = btrfs_reconfigure(fc);
- up_write(&mnt->mnt_sb->s_umount);
+
return ret;
}
@@ -2035,25 +2071,18 @@ static int btrfs_get_tree_subvol(struct fs_context *fc)
*/
dup_fc->s_fs_info = fs_info;
- /*
- * We'll do the security settings in our btrfs_get_tree_super() mount
- * loop, they were duplicated into dup_fc, we can drop the originals
- * here.
- */
- security_free_mnt_opts(&fc->security);
- fc->security = NULL;
+ ret = btrfs_get_tree_super(dup_fc);
+ if (ret)
+ goto error;
- mnt = fc_mount(dup_fc);
- if (IS_ERR(mnt)) {
- put_fs_context(dup_fc);
- return PTR_ERR(mnt);
- }
- ret = btrfs_reconfigure_for_mount(dup_fc, mnt);
+ ret = btrfs_reconfigure_for_mount(dup_fc);
+ up_write(&dup_fc->root->d_sb->s_umount);
+ if (ret)
+ goto error;
+ mnt = vfs_create_mount(dup_fc);
put_fs_context(dup_fc);
- if (ret) {
- mntput(mnt);
- return ret;
- }
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
/*
* This free's ->subvol_name, because if it isn't set we have to
@@ -2067,25 +2096,15 @@ static int btrfs_get_tree_subvol(struct fs_context *fc)
fc->root = dentry;
return 0;
+error:
+ put_fs_context(dup_fc);
+ return ret;
}
static int btrfs_get_tree(struct fs_context *fc)
{
- /*
- * Since we use mount_subtree to mount the default/specified subvol, we
- * have to do mounts in two steps.
- *
- * First pass through we call btrfs_get_tree_subvol(), this is just a
- * wrapper around fc_mount() to call back into here again, and this time
- * we'll call btrfs_get_tree_super(). This will do the open_ctree() and
- * everything to open the devices and file system. Then we return back
- * with a fully constructed vfsmount in btrfs_get_tree_subvol(), and
- * from there we can do our mount_subvol() call, which will lookup
- * whichever subvol we're mounting and setup this fc with the
- * appropriate dentry for the subvol.
- */
- if (fc->s_fs_info)
- return btrfs_get_tree_super(fc);
+ ASSERT(fc->s_fs_info == NULL);
+
return btrfs_get_tree_subvol(fc);
}
@@ -2217,7 +2236,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
* Scanning outside of mount can return NULL which would turn
* into 0 error code.
*/
- device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false);
+ device = btrfs_scan_one_device(vol->name, false);
ret = PTR_ERR_OR_ZERO(device);
mutex_unlock(&uuid_mutex);
break;
@@ -2235,7 +2254,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
* Scanning outside of mount can return NULL which would turn
* into 0 error code.
*/
- device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false);
+ device = btrfs_scan_one_device(vol->name, false);
if (IS_ERR_OR_NULL(device)) {
mutex_unlock(&uuid_mutex);
if (IS_ERR(device))
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 5d93d9dd2c12..9d398f7a36ad 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -160,8 +160,7 @@ static int can_modify_feature(struct btrfs_feature_attr *fa)
clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
break;
default:
- pr_warn("btrfs: sysfs: unknown feature set %d\n",
- fa->feature_set);
+ btrfs_warn(NULL, "sysfs: unknown feature set %d", fa->feature_set);
return 0;
}
@@ -1138,13 +1137,21 @@ static ssize_t btrfs_commit_stats_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ u64 now = ktime_get_ns();
+ u64 start_time = fs_info->commit_stats.critical_section_start_time;
+ u64 pending = 0;
+
+ if (start_time)
+ pending = now - start_time;
return sysfs_emit(buf,
"commits %llu\n"
+ "cur_commit_ms %llu\n"
"last_commit_ms %llu\n"
"max_commit_ms %llu\n"
"total_commit_ms %llu\n",
fs_info->commit_stats.commit_count,
+ div_u64(pending, NSEC_PER_MSEC),
div_u64(fs_info->commit_stats.last_commit_dur, NSEC_PER_MSEC),
div_u64(fs_info->commit_stats.max_commit_dur, NSEC_PER_MSEC),
div_u64(fs_info->commit_stats.total_commit_dur, NSEC_PER_MSEC));
@@ -1202,7 +1209,7 @@ static ssize_t quota_override_store(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
unsigned long knob;
- int err;
+ int ret;
if (!fs_info)
return -EPERM;
@@ -1210,9 +1217,9 @@ static ssize_t quota_override_store(struct kobject *kobj,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
- err = kstrtoul(buf, 10, &knob);
- if (err)
- return err;
+ ret = kstrtoul(buf, 10, &knob);
+ if (ret)
+ return ret;
if (knob > 1)
return -EINVAL;
@@ -2239,7 +2246,7 @@ void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
if (ret)
- pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
+ btrfs_warn(NULL, "sending event %d to kobject: '%s' (%p): failed",
action, kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
&disk_to_dev(bdev->bd_disk)->kobj);
}
@@ -2282,15 +2289,15 @@ static struct kset *btrfs_kset;
*/
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
{
- int error;
+ int ret;
init_completion(&fs_devs->kobj_unregister);
fs_devs->fsid_kobj.kset = btrfs_kset;
- error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL,
- "%pU", fs_devs->fsid);
- if (error) {
+ ret = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL,
+ "%pU", fs_devs->fsid);
+ if (ret) {
kobject_put(&fs_devs->fsid_kobj);
- return error;
+ return ret;
}
fs_devs->devices_kobj = kobject_create_and_add("devices",
@@ -2316,71 +2323,70 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
{
- int error;
+ int ret;
struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
struct kobject *fsid_kobj = &fs_devs->fsid_kobj;
- error = btrfs_sysfs_add_fs_devices(fs_devs);
- if (error)
- return error;
+ ret = btrfs_sysfs_add_fs_devices(fs_devs);
+ if (ret)
+ return ret;
- error = sysfs_create_files(fsid_kobj, btrfs_attrs);
- if (error) {
+ ret = sysfs_create_files(fsid_kobj, btrfs_attrs);
+ if (ret) {
btrfs_sysfs_remove_fs_devices(fs_devs);
- return error;
+ return ret;
}
- error = sysfs_create_group(fsid_kobj,
- &btrfs_feature_attr_group);
- if (error)
+ ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+ if (ret)
goto failure;
#ifdef CONFIG_BTRFS_DEBUG
fs_info->debug_kobj = kobject_create_and_add("debug", fsid_kobj);
if (!fs_info->debug_kobj) {
- error = -ENOMEM;
+ ret = -ENOMEM;
goto failure;
}
- error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
- if (error)
+ ret = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
+ if (ret)
goto failure;
#endif
/* Discard directory */
fs_info->discard_kobj = kobject_create_and_add("discard", fsid_kobj);
if (!fs_info->discard_kobj) {
- error = -ENOMEM;
+ ret = -ENOMEM;
goto failure;
}
- error = sysfs_create_files(fs_info->discard_kobj, discard_attrs);
- if (error)
+ ret = sysfs_create_files(fs_info->discard_kobj, discard_attrs);
+ if (ret)
goto failure;
- error = addrm_unknown_feature_attrs(fs_info, true);
- if (error)
+ ret = addrm_unknown_feature_attrs(fs_info, true);
+ if (ret)
goto failure;
- error = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi");
- if (error)
+ ret = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi");
+ if (ret)
goto failure;
fs_info->space_info_kobj = kobject_create_and_add("allocation",
fsid_kobj);
if (!fs_info->space_info_kobj) {
- error = -ENOMEM;
+ ret = -ENOMEM;
goto failure;
}
- error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
- if (error)
+ ret = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
+ if (ret)
goto failure;
return 0;
failure:
btrfs_sysfs_remove_mounted(fs_info);
- return error;
+ return ret;
}
static ssize_t qgroup_enabled_show(struct kobject *qgroups_kobj,
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 00da54f0164c..b19328d077d3 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -23,8 +23,8 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
{
int ret;
struct folio_batch fbatch;
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
+ pgoff_t index = start >> PAGE_SHIFT;
+ pgoff_t end_index = end >> PAGE_SHIFT;
int i;
int count = 0;
int loops = 0;
@@ -75,7 +75,8 @@ static void extent_flag_to_str(const struct extent_state *state, char *dest)
dest[0] = 0;
PRINT_ONE_FLAG(state, dest, cur, DIRTY);
PRINT_ONE_FLAG(state, dest, cur, LOCKED);
- PRINT_ONE_FLAG(state, dest, cur, NEW);
+ PRINT_ONE_FLAG(state, dest, cur, DIRTY_LOG1);
+ PRINT_ONE_FLAG(state, dest, cur, DIRTY_LOG2);
PRINT_ONE_FLAG(state, dest, cur, DELALLOC);
PRINT_ONE_FLAG(state, dest, cur, DEFRAG);
PRINT_ONE_FLAG(state, dest, cur, BOUNDARY);
@@ -113,7 +114,6 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
struct extent_io_tree *tmp;
struct page *page;
struct page *locked_page = NULL;
- unsigned long index = 0;
/* In this test we need at least 2 file extents at its maximum size */
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
u64 total_dirty = 2 * max_bytes;
@@ -156,7 +156,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
* everything to make sure our pages don't get evicted and screw up our
* test.
*/
- for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
+ for (pgoff_t index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
test_err("failed to allocate test page");
@@ -326,7 +326,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
out_bits:
if (ret)
dump_extent_io_tree(tmp);
- btrfs_clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
+ btrfs_clear_extent_bit(tmp, 0, total_dirty - 1, (unsigned)-1, NULL);
out:
if (locked_page)
put_page(locked_page);
@@ -343,11 +343,11 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
unsigned long i;
for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
- int bit, bit1;
+ bool bit_set, bit1_set;
- bit = !!test_bit(i, bitmap);
- bit1 = !!extent_buffer_test_bit(eb, 0, i);
- if (bit1 != bit) {
+ bit_set = test_bit(i, bitmap);
+ bit1_set = extent_buffer_test_bit(eb, 0, i);
+ if (bit1_set != bit_set) {
u8 has;
u8 expect;
@@ -360,9 +360,9 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
return -EINVAL;
}
- bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
- i % BITS_PER_BYTE);
- if (bit1 != bit) {
+ bit1_set = extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+ i % BITS_PER_BYTE);
+ if (bit1_set != bit_set) {
u8 has;
u8 expect;
@@ -662,7 +662,7 @@ static int test_find_first_clear_extent_bit(void)
out:
if (ret)
dump_extent_io_tree(&tree);
- btrfs_clear_extent_bits(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ btrfs_clear_extent_bit(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
return ret;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index b61972046feb..c8822edd32e2 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -32,7 +32,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
unsigned int i;
int ret;
- info = search_free_space_info(trans, cache, path, 0);
+ info = btrfs_search_free_space_info(trans, cache, path, 0);
if (IS_ERR(info)) {
test_err("could not find free space info");
ret = PTR_ERR(info);
@@ -57,7 +57,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
goto invalid;
offset = key.objectid;
while (offset < key.objectid + key.offset) {
- bit = free_space_test_bit(cache, path, offset);
+ bit = btrfs_free_space_test_bit(cache, path, offset);
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
@@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
u32 flags;
int ret;
- info = search_free_space_info(trans, cache, path, 0);
+ info = btrfs_search_free_space_info(trans, cache, path, 0);
if (IS_ERR(info)) {
test_err("could not find free space info");
btrfs_release_path(path);
@@ -131,13 +131,13 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
/* Flip it to the other format and check that for good measure. */
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
- ret = convert_free_space_to_extents(trans, cache, path);
+ ret = btrfs_convert_free_space_to_extents(trans, cache, path);
if (ret) {
test_err("could not convert to extents");
return ret;
}
} else {
- ret = convert_free_space_to_bitmaps(trans, cache, path);
+ ret = btrfs_convert_free_space_to_bitmaps(trans, cache, path);
if (ret) {
test_err("could not convert to bitmaps");
return ret;
@@ -170,9 +170,8 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
const struct free_space_extent extents[] = {};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start,
- cache->length);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -193,8 +192,8 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start, alignment);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, alignment);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -216,7 +215,7 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
cache->start + cache->length - alignment,
alignment);
if (ret) {
@@ -240,9 +239,9 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start + alignment,
- alignment);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start + alignment,
+ alignment);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -263,23 +262,22 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start, cache->length);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->start,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start,
+ alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + alignment,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -300,24 +298,23 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start, cache->length);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + 2 * alignment,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + 2 * alignment,
+ alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + alignment,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -338,29 +335,29 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start, cache->length);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->start,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start,
+ alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + 2 * alignment, alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + 2 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + alignment, alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -383,29 +380,29 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
};
int ret;
- ret = __remove_from_free_space_tree(trans, cache, path,
- cache->start, cache->length);
+ ret = __btrfs_remove_from_free_space_tree(trans, cache, path,
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->start,
- alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start,
+ alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + 4 * alignment, alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + 4 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path,
- cache->start + 2 * alignment, alignment);
+ ret = __btrfs_add_to_free_space_tree(trans, cache, path,
+ cache->start + 2 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -483,14 +480,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
goto out;
}
- ret = add_block_group_free_space(&trans, cache);
+ ret = btrfs_add_block_group_free_space(&trans, cache);
if (ret) {
test_err("could not add block group free space");
goto out;
}
if (bitmaps) {
- ret = convert_free_space_to_bitmaps(&trans, cache, path);
+ ret = btrfs_convert_free_space_to_bitmaps(&trans, cache, path);
if (ret) {
test_err("could not convert block group to bitmaps");
goto out;
@@ -501,7 +498,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
if (ret)
goto out;
- ret = remove_block_group_free_space(&trans, cache);
+ ret = btrfs_remove_block_group_free_space(&trans, cache);
if (ret) {
test_err("could not remove block group free space");
goto out;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a29d2c02c2c8..a4c2b7748b95 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -950,10 +950,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */
- ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
+ ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE >> 1,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1017,10 +1017,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
- ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE + sectorsize,
- BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
+ ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1051,8 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* Empty */
- ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
+ ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1066,8 +1066,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = 0;
out:
if (ret)
- btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW);
+ btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b96195d6480f..c5c0d9cf1a80 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1211,15 +1211,15 @@ static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages)
{
bool errors = false;
- int err;
+ int ret;
- err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
+ ret = __btrfs_wait_marked_extents(fs_info, dirty_pages);
if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags))
errors = true;
- if (errors && !err)
- err = -EIO;
- return err;
+ if (errors && !ret)
+ ret = -EIO;
+ return ret;
}
int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
@@ -1227,22 +1227,22 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
struct btrfs_fs_info *fs_info = log_root->fs_info;
struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages;
bool errors = false;
- int err;
+ int ret;
ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID);
- err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
- if ((mark & EXTENT_DIRTY) &&
+ ret = __btrfs_wait_marked_extents(fs_info, dirty_pages);
+ if ((mark & EXTENT_DIRTY_LOG1) &&
test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags))
errors = true;
- if ((mark & EXTENT_NEW) &&
+ if ((mark & EXTENT_DIRTY_LOG2) &&
test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags))
errors = true;
- if (errors && !err)
- err = -EIO;
- return err;
+ if (errors && !ret)
+ ret = -EIO;
+ return ret;
}
/*
@@ -1735,8 +1735,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_create_qgroup(trans, objectid);
if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, ret);
- goto fail;
+ if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
}
/*
@@ -2163,13 +2165,19 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
}
-static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval)
+static void update_commit_stats(struct btrfs_fs_info *fs_info)
{
+ ktime_t now = ktime_get_ns();
+ ktime_t interval = now - fs_info->commit_stats.critical_section_start_time;
+
+ ASSERT(fs_info->commit_stats.critical_section_start_time);
+
fs_info->commit_stats.commit_count++;
fs_info->commit_stats.last_commit_dur = interval;
fs_info->commit_stats.max_commit_dur =
max_t(u64, fs_info->commit_stats.max_commit_dur, interval);
fs_info->commit_stats.total_commit_dur += interval;
+ fs_info->commit_stats.critical_section_start_time = 0;
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2178,8 +2186,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_transaction *prev_trans = NULL;
int ret;
- ktime_t start_time;
- ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
@@ -2312,8 +2318,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* Get the time spent on the work done by the commit thread and not
* the time spent waiting on a previous commit
*/
- start_time = ktime_get_ns();
-
+ fs_info->commit_stats.critical_section_start_time = ktime_get_ns();
extwriter_counter_dec(cur_trans, trans->type);
ret = btrfs_start_delalloc_flush(fs_info);
@@ -2545,6 +2550,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto scrub_continue;
+ update_commit_stats(fs_info);
/*
* We needn't acquire the lock here because there is no other task
* which can change it.
@@ -2581,8 +2587,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
trace_btrfs_transaction_commit(fs_info);
- interval = ktime_get_ns() - start_time;
-
btrfs_scrub_continue(fs_info);
if (current->journal_info == trans)
@@ -2590,8 +2594,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
kmem_cache_free(btrfs_trans_handle_cachep, trans);
- update_commit_stats(fs_info, interval);
-
return ret;
unlock_reloc:
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8f4703b488b7..0f556f4de3f9 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -191,7 +191,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
* Only subvolume trees along with their reloc trees need this check.
* Things like log tree doesn't follow this ino requirement.
*/
- if (!is_fstree(btrfs_header_owner(leaf)))
+ if (!btrfs_is_fstree(btrfs_header_owner(leaf)))
return true;
if (key->objectid == prev_key->objectid)
@@ -475,7 +475,7 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
* to be COWed to be relocated.
*/
if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
- !is_fstree(key->offset))) {
+ !btrfs_is_fstree(key->offset))) {
generic_err(leaf, slot,
"invalid reloc tree for root %lld, root id is not a subvolume tree",
key->offset);
@@ -493,7 +493,7 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
}
/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
- if (unlikely(!is_fstree(key->objectid) && !is_root_item)) {
+ if (unlikely(!btrfs_is_fstree(key->objectid) && !is_root_item)) {
dir_item_err(leaf, slot,
"invalid location key objectid, have %llu expect [%llu, %llu]",
key->objectid, BTRFS_FIRST_FREE_OBJECTID,
@@ -1311,7 +1311,7 @@ static bool is_valid_dref_root(u64 rootid)
* - tree root
* For v1 space cache
*/
- return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
+ return btrfs_is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
rootid == BTRFS_ROOT_TREE_OBJECTID;
}
@@ -2167,7 +2167,7 @@ ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
{
- const bool is_subvol = is_fstree(root_owner);
+ const bool is_subvol = btrfs_is_fstree(root_owner);
const u64 eb_owner = btrfs_header_owner(eb);
/*
@@ -2209,7 +2209,7 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
* For subvolume trees, owners can mismatch, but they should all belong
* to subvolume trees.
*/
- if (unlikely(is_subvol != is_fstree(eb_owner))) {
+ if (unlikely(is_subvol != btrfs_is_fstree(eb_owner))) {
btrfs_crit(eb->fs_info,
"corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 97e933113b82..9f05d454b9df 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -112,7 +112,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *log,
struct btrfs_path *path,
- u64 dirid, int del_all);
+ u64 dirid, bool del_all);
static void wait_log_commit(struct btrfs_root *root, int transid);
/*
@@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
unsigned int nofs_flag;
struct btrfs_inode *inode;
+ /* Only meant to be called for subvolume roots and not for log roots. */
+ ASSERT(btrfs_is_fstree(btrfs_root_id(root)));
+
/*
* We're holding a transaction handle whether we are logging or
* replaying a log tree, so we must make sure NOFS semantics apply
@@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
return 0;
}
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
- u64 objectid)
-{
- struct btrfs_inode *inode;
-
- inode = btrfs_iget_logging(objectid, root);
- if (IS_ERR(inode))
- return NULL;
- return inode;
-}
-
/* replays a single extent in 'eb' at 'slot' with 'key' into the
* subvolume 'root'. path is released on entry and should be released
* on exit.
@@ -668,15 +656,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
extent_end = ALIGN(start + size,
fs_info->sectorsize);
} else {
- ret = 0;
- goto out;
+ btrfs_err(fs_info,
+ "unexpected extent type=%d root=%llu inode=%llu offset=%llu",
+ found_type, btrfs_root_id(root), key->objectid, key->offset);
+ return -EUCLEAN;
}
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- ret = -EIO;
- goto out;
- }
+ inode = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
/*
* first check to see if we already have this extent in the
@@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -961,7 +950,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
out:
kfree(name.name);
- iput(&inode->vfs_inode);
+ if (inode)
+ iput(&inode->vfs_inode);
return ret;
}
@@ -1051,6 +1041,126 @@ out:
return ret;
}
+static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_root *log_root,
+ struct btrfs_key *search_key,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
+ u64 parent_objectid)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ unsigned long ptr;
+ unsigned long ptr_end;
+
+ /*
+ * Check all the names in this back reference to see if they are in the
+ * log. If so, we allow them to stay otherwise they must be unlinked as
+ * a conflict.
+ */
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
+ while (ptr < ptr_end) {
+ struct fscrypt_str victim_name;
+ struct btrfs_inode_ref *victim_ref;
+ int ret;
+
+ victim_ref = (struct btrfs_inode_ref *)ptr;
+ ret = read_alloc_one_name(leaf, (victim_ref + 1),
+ btrfs_inode_ref_name_len(leaf, victim_ref),
+ &victim_name);
+ if (ret)
+ return ret;
+
+ ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name);
+ if (ret) {
+ kfree(victim_name.name);
+ if (ret < 0)
+ return ret;
+ ptr = (unsigned long)(victim_ref + 1) + victim_name.len;
+ continue;
+ }
+
+ inc_nlink(&inode->vfs_inode);
+ btrfs_release_path(path);
+
+ ret = unlink_inode_for_log_replay(trans, dir, inode, &victim_name);
+ kfree(victim_name.name);
+ if (ret)
+ return ret;
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static int unlink_extrefs_not_in_log(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_root *root,
+ struct btrfs_root *log_root,
+ struct btrfs_key *search_key,
+ struct btrfs_inode *inode,
+ u64 inode_objectid,
+ u64 parent_objectid)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ const unsigned long base = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ const u32 item_size = btrfs_item_size(leaf, path->slots[0]);
+ u32 cur_offset = 0;
+
+ while (cur_offset < item_size) {
+ struct btrfs_inode_extref *extref;
+ struct btrfs_inode *victim_parent;
+ struct fscrypt_str victim_name;
+ int ret;
+
+ extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
+
+ if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
+ goto next;
+
+ ret = read_alloc_one_name(leaf, &extref->name, victim_name.len,
+ &victim_name);
+ if (ret)
+ return ret;
+
+ search_key->objectid = inode_objectid;
+ search_key->type = BTRFS_INODE_EXTREF_KEY;
+ search_key->offset = btrfs_extref_hash(parent_objectid,
+ victim_name.name,
+ victim_name.len);
+ ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name);
+ if (ret) {
+ kfree(victim_name.name);
+ if (ret < 0)
+ return ret;
+next:
+ cur_offset += victim_name.len + sizeof(*extref);
+ continue;
+ }
+
+ victim_parent = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(victim_parent)) {
+ kfree(victim_name.name);
+ return PTR_ERR(victim_parent);
+ }
+
+ inc_nlink(&inode->vfs_inode);
+ btrfs_release_path(path);
+
+ ret = unlink_inode_for_log_replay(trans, victim_parent, inode,
+ &victim_name);
+ iput(&victim_parent->vfs_inode);
+ kfree(victim_name.name);
+ if (ret)
+ return ret;
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -1061,7 +1171,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
u64 ref_index, struct fscrypt_str *name)
{
int ret;
- struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key search_key;
struct btrfs_inode_extref *extref;
@@ -1072,121 +1181,37 @@ again:
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = parent_objectid;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret == 0) {
- struct btrfs_inode_ref *victim_ref;
- unsigned long ptr;
- unsigned long ptr_end;
-
- leaf = path->nodes[0];
-
- /* are we trying to overwrite a back ref for the root directory
- * if so, just jump out, we're done
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
+ /*
+ * Are we trying to overwrite a back ref for the root directory?
+ * If so, we're done.
*/
if (search_key.objectid == search_key.offset)
return 1;
- /* check all the names in this back reference to see
- * if they are in the log. if so, we allow them to stay
- * otherwise they must be unlinked as a conflict
- */
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
- ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
- while (ptr < ptr_end) {
- struct fscrypt_str victim_name;
-
- victim_ref = (struct btrfs_inode_ref *)ptr;
- ret = read_alloc_one_name(leaf, (victim_ref + 1),
- btrfs_inode_ref_name_len(leaf, victim_ref),
- &victim_name);
- if (ret)
- return ret;
-
- ret = backref_in_log(log_root, &search_key,
- parent_objectid, &victim_name);
- if (ret < 0) {
- kfree(victim_name.name);
- return ret;
- } else if (!ret) {
- inc_nlink(&inode->vfs_inode);
- btrfs_release_path(path);
-
- ret = unlink_inode_for_log_replay(trans, dir, inode,
- &victim_name);
- kfree(victim_name.name);
- if (ret)
- return ret;
- goto again;
- }
- kfree(victim_name.name);
-
- ptr = (unsigned long)(victim_ref + 1) + victim_name.len;
- }
+ ret = unlink_refs_not_in_log(trans, path, log_root, &search_key,
+ dir, inode, parent_objectid);
+ if (ret == -EAGAIN)
+ goto again;
+ else if (ret)
+ return ret;
}
btrfs_release_path(path);
/* Same search but for extended refs */
- extref = btrfs_lookup_inode_extref(NULL, root, path, name,
- inode_objectid, parent_objectid, 0,
- 0);
+ extref = btrfs_lookup_inode_extref(root, path, name, inode_objectid, parent_objectid);
if (IS_ERR(extref)) {
return PTR_ERR(extref);
} else if (extref) {
- u32 item_size;
- u32 cur_offset = 0;
- unsigned long base;
- struct btrfs_inode *victim_parent;
-
- leaf = path->nodes[0];
-
- item_size = btrfs_item_size(leaf, path->slots[0]);
- base = btrfs_item_ptr_offset(leaf, path->slots[0]);
-
- while (cur_offset < item_size) {
- struct fscrypt_str victim_name;
-
- extref = (struct btrfs_inode_extref *)(base + cur_offset);
-
- if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
- goto next;
-
- ret = read_alloc_one_name(leaf, &extref->name,
- btrfs_inode_extref_name_len(leaf, extref),
- &victim_name);
- if (ret)
- return ret;
-
- search_key.objectid = inode_objectid;
- search_key.type = BTRFS_INODE_EXTREF_KEY;
- search_key.offset = btrfs_extref_hash(parent_objectid,
- victim_name.name,
- victim_name.len);
- ret = backref_in_log(log_root, &search_key,
- parent_objectid, &victim_name);
- if (ret < 0) {
- kfree(victim_name.name);
- return ret;
- } else if (!ret) {
- ret = -ENOENT;
- victim_parent = read_one_inode(root,
- parent_objectid);
- if (victim_parent) {
- inc_nlink(&inode->vfs_inode);
- btrfs_release_path(path);
-
- ret = unlink_inode_for_log_replay(trans,
- victim_parent,
- inode, &victim_name);
- }
- iput(&victim_parent->vfs_inode);
- kfree(victim_name.name);
- if (ret)
- return ret;
- goto again;
- }
- kfree(victim_name.name);
-next:
- cur_offset += victim_name.len + sizeof(*extref);
- }
+ ret = unlink_extrefs_not_in_log(trans, path, root, log_root,
+ &search_key, inode,
+ inode_objectid, parent_objectid);
+ if (ret == -EAGAIN)
+ goto again;
+ else if (ret)
+ return ret;
}
btrfs_release_path(path);
@@ -1314,9 +1339,9 @@ again:
struct btrfs_inode *dir;
btrfs_release_path(path);
- dir = read_one_inode(root, parent_id);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_id, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
kfree(name.name);
goto out;
}
@@ -1360,7 +1385,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
unsigned long ref_end;
struct fscrypt_str name = { 0 };
int ret;
- int log_ref_ver = 0;
+ const bool is_extref_item = (key->type == BTRFS_INODE_EXTREF_KEY);
u64 parent_objectid;
u64 inode_objectid;
u64 ref_index = 0;
@@ -1369,11 +1394,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ref_ptr = btrfs_item_ptr_offset(eb, slot);
ref_end = ref_ptr + btrfs_item_size(eb, slot);
- if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ if (is_extref_item) {
struct btrfs_inode_extref *r;
ref_struct_size = sizeof(struct btrfs_inode_extref);
- log_ref_ver = 1;
r = (struct btrfs_inode_extref *)ref_ptr;
parent_objectid = btrfs_inode_extref_parent(eb, r);
} else {
@@ -1388,37 +1412,61 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* copy the back ref in. The link count fixup code will take
* care of the rest
*/
- dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
+ dir = NULL;
goto out;
}
- inode = read_one_inode(root, inode_objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(inode_objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
while (ref_ptr < ref_end) {
- if (log_ref_ver) {
+ if (is_extref_item) {
ret = extref_get_fields(eb, ref_ptr, &name,
&ref_index, &parent_objectid);
+ if (ret)
+ goto out;
/*
* parent object can change from one array
* item to another.
*/
- if (!dir)
- dir = read_one_inode(root, parent_objectid);
if (!dir) {
- ret = -ENOENT;
- goto out;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
+ /*
+ * A new parent dir may have not been
+ * logged and not exist in the subvolume
+ * tree, see the comment above before
+ * the loop when getting the first
+ * parent dir.
+ */
+ if (ret == -ENOENT) {
+ /*
+ * The next extref may refer to
+ * another parent dir that
+ * exists, so continue.
+ */
+ ret = 0;
+ goto next;
+ }
+ goto out;
+ }
}
} else {
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
+ if (ret)
+ goto out;
}
- if (ret)
- goto out;
ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
ref_index, &name);
@@ -1452,10 +1500,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
/* Else, ret == 1, we already have a perfect match, we're done. */
+next:
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len;
kfree(name.name);
name.name = NULL;
- if (log_ref_ver) {
+ if (is_extref_item && dir) {
iput(&dir->vfs_inode);
dir = NULL;
}
@@ -1632,8 +1681,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (inode->vfs_inode.i_nlink == 0) {
if (S_ISDIR(inode->vfs_inode.i_mode)) {
- ret = replay_dir_deletes(trans, root, NULL, path,
- ino, 1);
+ ret = replay_dir_deletes(trans, root, NULL, path, ino, true);
if (ret)
goto out;
}
@@ -1681,9 +1729,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
btrfs_release_path(path);
- inode = read_one_inode(root, key.offset);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.offset, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
@@ -1719,9 +1767,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode;
struct inode *vfs_inode;
- inode = read_one_inode(root, objectid);
- if (!inode)
- return -EIO;
+ inode = btrfs_iget_logging(objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
vfs_inode = &inode->vfs_inode;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
@@ -1760,14 +1808,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir;
int ret;
- inode = read_one_inode(root, location->objectid);
- if (!inode)
- return -ENOENT;
+ inode = btrfs_iget_logging(location->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- dir = read_one_inode(root, dirid);
- if (!dir) {
+ dir = btrfs_iget_logging(dirid, root);
+ if (IS_ERR(dir)) {
iput(&inode->vfs_inode);
- return -EIO;
+ return PTR_ERR(dir);
}
ret = btrfs_add_link(trans, dir, inode, name, 1, index);
@@ -1844,9 +1892,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
bool update_size = true;
bool name_added = false;
- dir = read_one_inode(root, key->objectid);
- if (!dir)
- return -EIO;
+ dir = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
if (ret)
@@ -2146,9 +2194,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -2284,7 +2333,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_root *log,
struct btrfs_path *path,
- u64 dirid, int del_all)
+ u64 dirid, bool del_all)
{
u64 range_start;
u64 range_end;
@@ -2300,14 +2349,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (!log_path)
return -ENOMEM;
- dir = read_one_inode(root, dirid);
- /* it isn't an error if the inode isn't there, that can happen
- * because we replay the deletes before we copy in the inode item
- * from the log
+ dir = btrfs_iget_logging(dirid, root);
+ /*
+ * It isn't an error if the inode isn't there, that can happen because
+ * we replay the deletes before we copy in the inode item from the log.
*/
- if (!dir) {
+ if (IS_ERR(dir)) {
btrfs_free_path(log_path);
- return 0;
+ ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
}
range_start = 0;
@@ -2443,8 +2495,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
break;
mode = btrfs_inode_mode(eb, inode_item);
if (S_ISDIR(mode)) {
- ret = replay_dir_deletes(wc->trans,
- root, log, path, key.objectid, 0);
+ ret = replay_dir_deletes(wc->trans, root, log, path,
+ key.objectid, false);
if (ret)
break;
}
@@ -2466,9 +2518,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_inode *inode;
u64 from;
- inode = read_one_inode(root, key.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
from = ALIGN(i_size_read(&inode->vfs_inode),
@@ -2519,9 +2571,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
key.type == BTRFS_INODE_EXTREF_KEY) {
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
- if (ret && ret != -ENOENT)
+ if (ret)
break;
- ret = 0;
} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
@@ -2720,7 +2771,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(log->node);
orig_level = level;
path->nodes[level] = log->node;
- atomic_inc(&log->node->refs);
+ refcount_inc(&log->node->refs);
path->slots[level] = 0;
while (1) {
@@ -2961,9 +3012,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
if (log_transid % 2 == 0)
- mark = EXTENT_DIRTY;
+ mark = EXTENT_DIRTY_LOG1;
else
- mark = EXTENT_NEW;
+ mark = EXTENT_DIRTY_LOG2;
/* we start IO on all the marked extents here, but we don't actually
* wait for them until later.
@@ -3094,7 +3145,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_write_marked_extents(fs_info,
&log_root_tree->dirty_log_pages,
- EXTENT_DIRTY | EXTENT_NEW);
+ EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2);
blk_finish_plug(&plug);
/*
* As described above, -EAGAIN indicates a hole in the extents. We
@@ -3114,7 +3165,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_wait_tree_log_extents(log, mark);
if (!ret)
ret = btrfs_wait_tree_log_extents(log_root_tree,
- EXTENT_NEW | EXTENT_DIRTY);
+ EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2);
if (ret) {
btrfs_set_log_full_commit(trans);
mutex_unlock(&log_root_tree->log_mutex);
@@ -3240,9 +3291,9 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
*/
btrfs_write_marked_extents(log->fs_info,
&log->dirty_log_pages,
- EXTENT_DIRTY | EXTENT_NEW);
+ EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2);
btrfs_wait_tree_log_extents(log,
- EXTENT_DIRTY | EXTENT_NEW);
+ EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2);
if (trans)
btrfs_abort_transaction(trans, ret);
@@ -3432,7 +3483,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
* inode item because on log replay we update the field to reflect
* all existing entries in the directory (see overwrite_item()).
*/
- return btrfs_delete_one_dir_name(trans, log, path, di);
+ return btrfs_del_item(trans, log, path);
}
/*
@@ -3472,26 +3523,27 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
return;
}
- ret = join_running_log_trans(root);
- if (ret)
- return;
-
- mutex_lock(&dir->log_mutex);
-
path = btrfs_alloc_path();
if (!path) {
- ret = -ENOMEM;
- goto out_unlock;
+ btrfs_set_log_full_commit(trans);
+ return;
}
+ ret = join_running_log_trans(root);
+ ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret);
+ if (WARN_ON(ret))
+ goto out;
+
+ mutex_lock(&dir->log_mutex);
+
ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir),
name, index);
- btrfs_free_path(path);
-out_unlock:
mutex_unlock(&dir->log_mutex);
if (ret < 0)
btrfs_set_log_full_commit(trans);
btrfs_end_log_trans(root);
+out:
+ btrfs_free_path(path);
}
/* see comments for btrfs_del_dir_entries_in_log */
@@ -3501,7 +3553,6 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 dirid)
{
struct btrfs_root *log;
- u64 index;
int ret;
ret = inode_logged(trans, inode, NULL);
@@ -3513,13 +3564,13 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
}
ret = join_running_log_trans(root);
- if (ret)
+ ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret);
+ if (WARN_ON(ret))
return;
log = root->log_root;
mutex_lock(&inode->log_mutex);
- ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode),
- dirid, &index);
+ ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, NULL);
mutex_unlock(&inode->log_mutex);
if (ret < 0 && ret != -ENOENT)
btrfs_set_log_full_commit(trans);
@@ -3684,7 +3735,7 @@ static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx)
* Add extra ref to scratch eb so that it is not freed when callers
* release the path, so we can reuse it later if needed.
*/
- atomic_inc(&ctx->scratch_eb->refs);
+ refcount_inc(&ctx->scratch_eb->refs);
return 0;
}
@@ -4172,44 +4223,37 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode, int log_inode_only,
u64 logged_isize)
{
- struct btrfs_map_token token;
u64 flags;
- btrfs_init_map_token(&token, leaf);
-
if (log_inode_only) {
/* set the generation to zero so the recover code
* can tell the difference between an logging
* just to say 'this inode exists' and a logging
* to say 'update this inode with these values'
*/
- btrfs_set_token_inode_generation(&token, item, 0);
- btrfs_set_token_inode_size(&token, item, logged_isize);
+ btrfs_set_inode_generation(leaf, item, 0);
+ btrfs_set_inode_size(leaf, item, logged_isize);
} else {
- btrfs_set_token_inode_generation(&token, item,
- BTRFS_I(inode)->generation);
- btrfs_set_token_inode_size(&token, item, inode->i_size);
+ btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
+ btrfs_set_inode_size(leaf, item, inode->i_size);
}
- btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
- btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
- btrfs_set_token_inode_mode(&token, item, inode->i_mode);
- btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
+ btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
+ btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
+ btrfs_set_inode_mode(leaf, item, inode->i_mode);
+ btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
- btrfs_set_token_timespec_sec(&token, &item->atime,
- inode_get_atime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->atime,
- inode_get_atime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->atime, inode_get_atime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->atime, inode_get_atime_nsec(inode));
- btrfs_set_token_timespec_sec(&token, &item->mtime,
- inode_get_mtime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->mtime,
- inode_get_mtime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->mtime, inode_get_mtime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->mtime, inode_get_mtime_nsec(inode));
- btrfs_set_token_timespec_sec(&token, &item->ctime,
- inode_get_ctime_sec(inode));
- btrfs_set_token_timespec_nsec(&token, &item->ctime,
- inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
+ btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
/*
* We do not need to set the nbytes field, in fact during a fast fsync
@@ -4220,13 +4264,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* inode item in subvolume tree as needed (see overwrite_item()).
*/
- btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
- btrfs_set_token_inode_transid(&token, item, trans->transid);
- btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
+ btrfs_set_inode_sequence(leaf, item, inode_peek_iversion(inode));
+ btrfs_set_inode_transid(leaf, item, trans->transid);
+ btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
BTRFS_I(inode)->ro_flags);
- btrfs_set_token_inode_flags(&token, item, flags);
- btrfs_set_token_inode_block_group(&token, item, 0);
+ btrfs_set_inode_flags(leaf, item, flags);
+ btrfs_set_inode_block_group(leaf, item, 0);
}
static int log_inode_item(struct btrfs_trans_handle *trans,
@@ -7192,8 +7236,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_root *log;
struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
struct walk_control wc = {
.process_func = process_one_buffer,
@@ -7227,6 +7269,9 @@ again:
key.offset = (u64)-1;
while (1) {
+ struct btrfs_root *log;
+ struct btrfs_key found_key;
+
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
if (ret < 0) {
@@ -7255,6 +7300,12 @@ again:
true);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
+ wc.replay_dest = NULL;
+ if (ret != -ENOENT) {
+ btrfs_put_root(log);
+ btrfs_abort_transaction(trans, ret);
+ goto error;
+ }
/*
* We didn't find the subvol, likely because it was
@@ -7267,36 +7318,36 @@ again:
* block from being modified, and we'll just bail for
* each subsequent pass.
*/
- if (ret == -ENOENT)
- ret = btrfs_pin_extent_for_log_replay(trans, log->node);
- btrfs_put_root(log);
-
- if (!ret)
- goto next;
- btrfs_abort_transaction(trans, ret);
- goto error;
+ ret = btrfs_pin_extent_for_log_replay(trans, log->node);
+ if (ret) {
+ btrfs_put_root(log);
+ btrfs_abort_transaction(trans, ret);
+ goto error;
+ }
+ goto next;
}
wc.replay_dest->log_root = log;
ret = btrfs_record_root_in_trans(trans, wc.replay_dest);
- if (ret)
- /* The loop needs to continue due to the root refs */
+ if (ret) {
btrfs_abort_transaction(trans, ret);
- else
- ret = walk_log_tree(trans, log, &wc);
+ goto next;
+ }
- if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
- ret = fixup_inode_link_counts(trans, wc.replay_dest,
- path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
+ ret = walk_log_tree(trans, log, &wc);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto next;
}
- if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+ if (wc.stage == LOG_WALK_REPLAY_ALL) {
struct btrfs_root *root = wc.replay_dest;
- btrfs_release_path(path);
-
+ ret = fixup_inode_link_counts(trans, wc.replay_dest, path);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto next;
+ }
/*
* We have just replayed everything, and the highest
* objectid of fs roots probably has changed in case
@@ -7306,17 +7357,20 @@ again:
* could only happen during mount.
*/
ret = btrfs_init_root_free_objectid(root);
- if (ret)
+ if (ret) {
btrfs_abort_transaction(trans, ret);
+ goto next;
+ }
+ }
+next:
+ if (wc.replay_dest) {
+ wc.replay_dest->log_root = NULL;
+ btrfs_put_root(wc.replay_dest);
}
-
- wc.replay_dest->log_root = NULL;
- btrfs_put_root(wc.replay_dest);
btrfs_put_root(log);
if (ret)
goto error;
-next:
if (found_key.offset == 0)
break;
key.offset = found_key.offset - 1;
@@ -7447,6 +7501,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full log sync.
* Also we don't need to worry with renames, since btrfs_rename() marks the log
* for full commit when renaming a subvolume.
+ *
+ * Must be called before creating the subvolume entry in its parent directory.
*/
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir)
@@ -7483,6 +7539,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
bool log_pinned = false;
int ret;
+ btrfs_init_log_ctx(&ctx, inode);
+ ctx.logging_new_name = true;
+
/*
* this will force the logging code to walk the dentry chain
* up for the file
@@ -7514,6 +7573,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
ret = 0;
/*
+ * Now that we know we need to update the log, allocate the scratch eb
+ * for the context before joining a log transaction below, as this can
+ * take time and therefore we could delay log commits from other tasks.
+ */
+ btrfs_init_log_ctx_scratch_eb(&ctx);
+
+ /*
* If we are doing a rename (old_dir is not NULL) from a directory that
* was previously logged, make sure that on log replay we get the old
* dir entry deleted. This is needed because we will also log the new
@@ -7531,6 +7597,14 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
&old_dentry->d_name, 0, &fname);
if (ret)
goto out;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ fscrypt_free_filename(&fname);
+ goto out;
+ }
+
/*
* We have two inodes to update in the log, the old directory and
* the inode that got renamed, so we must pin the log to prevent
@@ -7544,19 +7618,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* mark the log for a full commit.
*/
if (WARN_ON_ONCE(ret < 0)) {
+ btrfs_free_path(path);
fscrypt_free_filename(&fname);
goto out;
}
log_pinned = true;
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- fscrypt_free_filename(&fname);
- goto out;
- }
-
/*
* Other concurrent task might be logging the old directory,
* as it can be triggered when logging other inode that had or
@@ -7588,9 +7656,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
goto out;
}
- btrfs_init_log_ctx(&ctx, inode);
- ctx.logging_new_name = true;
- btrfs_init_log_ctx_scratch_eb(&ctx);
/*
* We don't care about the return value. If we fail to log the new name
* then we know the next attempt to sync the log will fallback to a full
@@ -7599,7 +7664,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* inconsistent state after a rename operation.
*/
btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
- free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.conflict_inodes));
out:
/*
@@ -7612,5 +7676,6 @@ out:
btrfs_set_log_full_commit(trans);
if (log_pinned)
btrfs_end_log_trans(root);
+ free_extent_buffer(ctx.scratch_eb);
}
diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c
index 1ac2678fc4ca..9e8cb3b7c064 100644
--- a/fs/btrfs/tree-mod-log.c
+++ b/fs/btrfs/tree-mod-log.c
@@ -27,18 +27,29 @@ struct tree_mod_elem {
/* This is used for BTRFS_MOD_LOG_KEY* and BTRFS_MOD_LOG_ROOT_REPLACE. */
u64 generation;
- /* Those are used for op == BTRFS_MOD_LOG_KEY_{REPLACE,REMOVE}. */
- struct btrfs_disk_key key;
- u64 blockptr;
-
- /* This is used for op == BTRFS_MOD_LOG_MOVE_KEYS. */
- struct {
- int dst_slot;
- int nr_items;
- } move;
-
- /* This is used for op == BTRFS_MOD_LOG_ROOT_REPLACE. */
- struct tree_mod_root old_root;
+ union {
+ /*
+ * This is used for the following op types:
+ *
+ * BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING
+ * BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING
+ * BTRFS_MOD_LOG_KEY_REMOVE
+ * BTRFS_MOD_LOG_KEY_REPLACE
+ */
+ struct {
+ struct btrfs_disk_key key;
+ u64 blockptr;
+ } slot_change;
+
+ /* This is used for op == BTRFS_MOD_LOG_MOVE_KEYS. */
+ struct {
+ int dst_slot;
+ int nr_items;
+ } move;
+
+ /* This is used for op == BTRFS_MOD_LOG_ROOT_REPLACE. */
+ struct tree_mod_root old_root;
+ };
};
/*
@@ -164,6 +175,30 @@ static noinline int tree_mod_log_insert(struct btrfs_fs_info *fs_info,
return 0;
}
+static inline bool skip_eb_logging(const struct extent_buffer *eb)
+{
+ const u64 owner = btrfs_header_owner(eb);
+
+ if (btrfs_header_level(eb) == 0)
+ return true;
+
+ /*
+ * Tree mod logging exists so that there's a consistent view of the
+ * extents and backrefs of inodes even if while a task is iterating over
+ * them other tasks are modifying subvolume trees and the extent tree
+ * (including running delayed refs). So we only need to log extent
+ * buffers from the extent tree and subvolume trees.
+ */
+
+ if (owner == BTRFS_EXTENT_TREE_OBJECTID)
+ return false;
+
+ if (btrfs_is_fstree(owner))
+ return false;
+
+ return true;
+}
+
/*
* Determines if logging can be omitted. Returns true if it can. Otherwise, it
* returns false with the tree_mod_log_lock acquired. The caller must hold
@@ -174,7 +209,7 @@ static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, const struct extent
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return true;
- if (eb && btrfs_header_level(eb) == 0)
+ if (eb && skip_eb_logging(eb))
return true;
write_lock(&fs_info->tree_mod_log_lock);
@@ -192,7 +227,7 @@ static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return false;
- if (eb && btrfs_header_level(eb) == 0)
+ if (eb && skip_eb_logging(eb))
return false;
return true;
@@ -204,15 +239,17 @@ static struct tree_mod_elem *alloc_tree_mod_elem(const struct extent_buffer *eb,
{
struct tree_mod_elem *tm;
+ /* Can't be one of these types, due to union in struct tree_mod_elem. */
+ ASSERT(op != BTRFS_MOD_LOG_MOVE_KEYS);
+ ASSERT(op != BTRFS_MOD_LOG_ROOT_REPLACE);
+
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm)
return NULL;
tm->logical = eb->start;
- if (op != BTRFS_MOD_LOG_KEY_ADD) {
- btrfs_node_key(eb, &tm->key, slot);
- tm->blockptr = btrfs_node_blockptr(eb, slot);
- }
+ btrfs_node_key(eb, &tm->slot_change.key, slot);
+ tm->slot_change.blockptr = btrfs_node_blockptr(eb, slot);
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
@@ -830,8 +867,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
fallthrough;
case BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case BTRFS_MOD_LOG_KEY_REMOVE:
- btrfs_set_node_key(eb, &tm->key, tm->slot);
- btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+ btrfs_set_node_key(eb, &tm->slot_change.key, tm->slot);
+ btrfs_set_node_blockptr(eb, tm->slot, tm->slot_change.blockptr);
btrfs_set_node_ptr_generation(eb, tm->slot,
tm->generation);
n++;
@@ -840,8 +877,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
break;
case BTRFS_MOD_LOG_KEY_REPLACE:
BUG_ON(tm->slot >= n);
- btrfs_set_node_key(eb, &tm->key, tm->slot);
- btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+ btrfs_set_node_key(eb, &tm->slot_change.key, tm->slot);
+ btrfs_set_node_blockptr(eb, tm->slot, tm->slot_change.blockptr);
btrfs_set_node_ptr_generation(eb, tm->slot,
tm->generation);
break;
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index fc59b57257d6..7e16a253fb35 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -129,21 +129,25 @@ void ulist_free(struct ulist *ulist)
kfree(ulist);
}
+static int ulist_node_val_key_cmp(const void *key, const struct rb_node *node)
+{
+ const u64 *val = key;
+ const struct ulist_node *unode = rb_entry(node, struct ulist_node, rb_node);
+
+ if (unode->val < *val)
+ return 1;
+ else if (unode->val > *val)
+ return -1;
+
+ return 0;
+}
+
static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
{
- struct rb_node *n = ulist->root.rb_node;
- struct ulist_node *u = NULL;
-
- while (n) {
- u = rb_entry(n, struct ulist_node, rb_node);
- if (u->val < val)
- n = n->rb_right;
- else if (u->val > val)
- n = n->rb_left;
- else
- return u;
- }
- return NULL;
+ struct rb_node *node;
+
+ node = rb_find(&val, &ulist->root, ulist_node_val_key_cmp);
+ return rb_entry_safe(node, struct ulist_node, rb_node);
}
static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
@@ -155,25 +159,20 @@ static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
ulist->nnodes--;
}
+static int ulist_node_val_cmp(struct rb_node *new, const struct rb_node *existing)
+{
+ const struct ulist_node *unode = rb_entry(new, struct ulist_node, rb_node);
+
+ return ulist_node_val_key_cmp(&unode->val, existing);
+}
+
static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
{
- struct rb_node **p = &ulist->root.rb_node;
- struct rb_node *parent = NULL;
- struct ulist_node *cur = NULL;
-
- while (*p) {
- parent = *p;
- cur = rb_entry(parent, struct ulist_node, rb_node);
-
- if (cur->val < ins->val)
- p = &(*p)->rb_right;
- else if (cur->val > ins->val)
- p = &(*p)->rb_left;
- else
- return -EEXIST;
- }
- rb_link_node(&ins->rb_node, parent, p);
- rb_insert_color(&ins->rb_node, &ulist->root);
+ struct rb_node *node;
+
+ node = rb_find_add(&ins->rb_node, &ulist->root, ulist_node_val_cmp);
+ if (node)
+ return -EEXIST;
return 0;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 89835071cfea..fa7a929a0461 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -18,7 +18,6 @@
#include "transaction.h"
#include "volumes.h"
#include "raid56.h"
-#include "rcu-string.h"
#include "dev-replace.h"
#include "sysfs.h"
#include "tree-checker.h"
@@ -214,10 +213,8 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
u64 flags = bg_flags;
u32 size_bp = size_buf;
- if (!flags) {
- strcpy(bp, "NONE");
+ if (!flags)
return;
- }
#define DESCRIBE_FLAG(flag, desc) \
do { \
@@ -403,7 +400,11 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
- rcu_string_free(device->name);
+ /*
+ * No need to call kfree_rcu() nor do RCU lock/unlock, nothing is
+ * reading the device name.
+ */
+ kfree(rcu_dereference_raw(device->name));
btrfs_extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
@@ -414,6 +415,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
struct btrfs_device *device;
WARN_ON(fs_devices->opened);
+ WARN_ON(fs_devices->holding);
while (!list_empty(&fs_devices->devices)) {
device = list_first_entry(&fs_devices->devices,
struct btrfs_device, dev_list);
@@ -473,7 +475,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
struct block_device *bdev;
int ret;
- *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL);
+ *bdev_file = bdev_file_open_by_path(device_path, flags, holder, &fs_holder_ops);
if (IS_ERR(*bdev_file)) {
ret = PTR_ERR(*bdev_file);
@@ -488,7 +490,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
if (holder) {
ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE);
if (ret) {
- fput(*bdev_file);
+ bdev_fput(*bdev_file);
goto error;
}
}
@@ -496,7 +498,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
*disk_super = btrfs_read_disk_super(bdev, 0, false);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
- fput(*bdev_file);
+ bdev_fput(*bdev_file);
goto error;
}
@@ -541,7 +543,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
continue;
if (devt && devt != device->devt)
continue;
- if (fs_devices->opened) {
+ if (fs_devices->opened || fs_devices->holding) {
if (devt)
ret = -EBUSY;
break;
@@ -657,7 +659,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
if (!device->name)
return -EINVAL;
- ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+ ret = btrfs_get_bdev_and_sb(rcu_dereference_raw(device->name), flags, holder, 1,
&bdev_file, &disk_super);
if (ret)
return ret;
@@ -674,8 +676,8 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
if (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
- pr_err(
- "BTRFS: Invalid seeding and uuid-changed device detected\n");
+ btrfs_err(NULL,
+ "invalid seeding and uuid-changed device detected");
goto error_free_page;
}
@@ -701,7 +703,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
if (device->devt != device->bdev->bd_dev) {
btrfs_warn(NULL,
"device %s maj:min changed from %d:%d to %d:%d",
- device->name->str, MAJOR(device->devt),
+ rcu_dereference_raw(device->name), MAJOR(device->devt),
MINOR(device->devt), MAJOR(device->bdev->bd_dev),
MINOR(device->bdev->bd_dev));
@@ -720,7 +722,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
error_free_page:
btrfs_release_disk_super(disk_super);
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EINVAL;
}
@@ -749,7 +751,7 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path)
goto out;
rcu_read_lock();
- ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX);
+ ret = strscpy(old_path, rcu_dereference(device->name), PATH_MAX);
rcu_read_unlock();
if (ret < 0)
goto out;
@@ -782,11 +784,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = NULL;
- struct rcu_string *name;
+ const char *name;
u64 found_transid = btrfs_super_generation(disk_super);
u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
dev_t path_devt;
- int error;
+ int ret;
bool same_fsid_diff_dev = false;
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
@@ -798,11 +800,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(-EAGAIN);
}
- error = lookup_bdev(path, &path_devt);
- if (error) {
+ ret = lookup_bdev(path, &path_devt);
+ if (ret) {
btrfs_err(NULL, "failed to lookup block device for path %s: %d",
- path, error);
- return ERR_PTR(error);
+ path, ret);
+ return ERR_PTR(ret);
}
fs_devices = find_fsid_by_device(disk_super, path_devt, &same_fsid_diff_dev);
@@ -819,7 +821,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (same_fsid_diff_dev) {
generate_random_uuid(fs_devices->fsid);
fs_devices->temp_fsid = true;
- pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n",
+ btrfs_info(NULL, "device %s (%d:%d) using temp-fsid %pU",
path, MAJOR(path_devt), MINOR(path_devt),
fs_devices->fsid);
}
@@ -890,6 +892,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
current->comm, task_pid_nr(current));
} else if (!device->name || !is_same_device(device, path)) {
+ const char *old_name;
+
/*
* When FS is already mounted.
* 1. If you are here and if the device->name is NULL that
@@ -943,27 +947,31 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (device->bdev) {
if (device->devt != path_devt) {
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_warn_in_rcu(NULL,
+ btrfs_warn(NULL,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
path, devid, found_transid,
current->comm,
task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
- btrfs_info_in_rcu(NULL,
+ btrfs_info(NULL,
"devid %llu device path %s changed to %s scanned by %s (%d)",
devid, btrfs_dev_name(device),
path, current->comm,
task_pid_nr(current));
}
- name = rcu_string_strdup(path, GFP_NOFS);
+ name = kstrdup(path, GFP_NOFS);
if (!name) {
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-ENOMEM);
}
- rcu_string_free(device->name);
+ rcu_read_lock();
+ old_name = rcu_dereference(device->name);
+ rcu_read_unlock();
rcu_assign_pointer(device->name, name);
+ kfree_rcu_mightsleep(old_name);
+
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
fs_devices->missing_devices--;
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
@@ -1012,7 +1020,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* uuid mutex so nothing we touch in here is going to disappear.
*/
if (orig_dev->name)
- dev_path = orig_dev->name->str;
+ dev_path = rcu_dereference_raw(orig_dev->name);
device = btrfs_alloc_device(NULL, &orig_dev->devid,
orig_dev->uuid, dev_path);
@@ -1070,7 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
continue;
if (device->bdev_file) {
- fput(device->bdev_file);
+ bdev_fput(device->bdev_file);
device->bdev = NULL;
device->bdev_file = NULL;
fs_devices->open_devices--;
@@ -1117,7 +1125,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
invalidate_bdev(device->bdev);
}
- fput(device->bdev_file);
+ bdev_fput(device->bdev_file);
}
static void btrfs_close_one_device(struct btrfs_device *device)
@@ -1197,7 +1205,7 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&uuid_mutex);
close_fs_devices(fs_devices);
- if (!fs_devices->opened) {
+ if (!fs_devices->opened && !fs_devices->holding) {
list_splice_init(&fs_devices->seed_list, &list);
/*
@@ -1414,7 +1422,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev && (device->bdev->bd_dev == devt) &&
- strcmp(device->name->str, path) != 0) {
+ strcmp(rcu_dereference_raw(device->name), path) != 0) {
mutex_unlock(&fs_devices->device_list_mutex);
/* Do not skip registration. */
@@ -1440,7 +1448,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
* the device or return an error. Multi-device and seeding devices are registered
* in both cases.
*/
-struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
+struct btrfs_device *btrfs_scan_one_device(const char *path,
bool mount_arg_dev)
{
struct btrfs_super_block *disk_super;
@@ -1461,7 +1469,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
* values temporarily, as the device paths of the fsid are the only
* required information for assembling the volume.
*/
- bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL);
+ bdev_file = bdev_file_open_by_path(path, BLK_OPEN_READ, NULL, NULL);
if (IS_ERR(bdev_file))
return ERR_CAST(bdev_file);
@@ -1473,7 +1481,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
devt = file_bdev(bdev_file)->bd_dev;
if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) {
- pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
+ btrfs_debug(NULL, "skip registering single non-seed device %s (%d:%d)",
path, MAJOR(devt), MINOR(devt));
btrfs_free_stale_devices(devt, NULL);
@@ -1490,7 +1498,7 @@ free_disk_super:
btrfs_release_disk_super(disk_super);
error_bdev_put:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return device;
}
@@ -2164,7 +2172,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_devic
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
- update_dev_time(device->name->str);
+ update_dev_time(rcu_dereference_raw(device->name));
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
@@ -2204,7 +2212,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
}
if (btrfs_pinned_by_swapfile(fs_info, device)) {
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"cannot remove device %s (devid %llu) due to active swapfile",
btrfs_dev_name(device), device->devid);
return -ETXTBSY;
@@ -2294,7 +2302,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
* free the device.
*
* We cannot call btrfs_close_bdev() here because we're holding the sb
- * write lock, and fput() on the block device will pull in the
+ * write lock, and bdev_fput() on the block device will pull in the
* ->open_mutex on the block device and it's dependencies. Instead
* just flush the device and let the caller do the final bdev_release.
*/
@@ -2473,7 +2481,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
else
memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
btrfs_release_disk_super(disk_super);
- fput(bdev_file);
+ bdev_fput(bdev_file);
return 0;
}
@@ -2705,7 +2713,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
return -EROFS;
bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE,
- fs_info->bdev_holder, NULL);
+ fs_info->sb, &fs_holder_ops);
if (IS_ERR(bdev_file))
return PTR_ERR(bdev_file);
@@ -2921,7 +2929,7 @@ error_free_zone:
error_free_device:
btrfs_free_device(device);
error:
- fput(bdev_file);
+ bdev_fput(bdev_file);
if (locked) {
mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount);
@@ -3282,6 +3290,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
device->bytes_used - dev_extent_len);
atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
btrfs_clear_space_info_full(fs_info);
+
+ if (list_empty(&device->post_commit_list)) {
+ list_add_tail(&device->post_commit_list,
+ &trans->transaction->dev_update_list);
+ }
+
mutex_unlock(&fs_info->chunk_mutex);
}
}
@@ -3398,7 +3412,8 @@ out:
return ret;
}
-int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ bool verbose)
{
struct btrfs_root *root = fs_info->chunk_root;
struct btrfs_trans_handle *trans;
@@ -3428,7 +3443,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
/* step one, relocate all the extents inside this chunk */
btrfs_scrub_pause(fs_info);
- ret = btrfs_relocate_block_group(fs_info, chunk_offset);
+ ret = btrfs_relocate_block_group(fs_info, chunk_offset, true);
btrfs_scrub_continue(fs_info);
if (ret) {
/*
@@ -3538,7 +3553,8 @@ again:
btrfs_release_path(path);
if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
- ret = btrfs_relocate_chunk(fs_info, found_key.offset);
+ ret = btrfs_relocate_chunk(fs_info, found_key.offset,
+ true);
if (ret == -ENOSPC)
failed++;
else
@@ -4203,7 +4219,7 @@ again:
}
}
- ret = btrfs_relocate_chunk(fs_info, found_key.offset);
+ ret = btrfs_relocate_chunk(fs_info, found_key.offset, true);
mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret == -ENOSPC) {
enospc_errors++;
@@ -4971,7 +4987,7 @@ again:
goto done;
}
- ret = btrfs_relocate_chunk(fs_info, chunk_offset);
+ ret = btrfs_relocate_chunk(fs_info, chunk_offset, true);
mutex_unlock(&fs_info->reclaim_bgs_lock);
if (ret == -ENOSPC) {
failed++;
@@ -5003,8 +5019,8 @@ again:
mutex_lock(&fs_info->chunk_mutex);
/* Clear all state bits beyond the shrunk device size */
- btrfs_clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
- CHUNK_STATE_MASK);
+ btrfs_clear_extent_bit(&device->alloc_state, new_size, (u64)-1,
+ CHUNK_STATE_MASK, NULL);
btrfs_device_set_disk_total_bytes(device, new_size);
if (list_empty(&device->post_commit_list))
@@ -5431,9 +5447,9 @@ static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned in
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- btrfs_clear_extent_bits(&device->alloc_state, stripe->physical,
- stripe->physical + map->stripe_size - 1,
- bits | EXTENT_NOWAIT);
+ btrfs_clear_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + map->stripe_size - 1,
+ bits | EXTENT_NOWAIT, NULL);
}
}
@@ -6917,9 +6933,9 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
generate_random_uuid(dev->uuid);
if (path) {
- struct rcu_string *name;
+ const char *name;
- name = rcu_string_strdup(path, GFP_KERNEL);
+ name = kstrdup(path, GFP_KERNEL);
if (!name) {
btrfs_free_device(dev);
return ERR_PTR(-ENOMEM);
@@ -7168,7 +7184,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
if (IS_ERR(fs_devices))
return fs_devices;
- ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->bdev_holder);
+ ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->sb);
if (ret) {
free_fs_devices(fs_devices);
return ERR_PTR(ret);
@@ -7700,7 +7716,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"error %d while searching for dev_stats item for device %s",
ret, btrfs_dev_name(device));
goto out;
@@ -7711,7 +7727,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
/* need to delete old one and insert a new one */
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"delete too small dev_stats item for device %s failed %d",
btrfs_dev_name(device), ret);
goto out;
@@ -7725,7 +7741,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
- btrfs_warn_in_rcu(fs_info,
+ btrfs_warn(fs_info,
"insert dev_stats item for device %s failed %d",
btrfs_dev_name(device), ret);
goto out;
@@ -7788,7 +7804,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
if (!dev->dev_stats_valid)
return;
- btrfs_err_rl_in_rcu(dev->fs_info,
+ btrfs_err_rl(dev->fs_info,
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
btrfs_dev_name(dev),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
@@ -7808,7 +7824,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
if (i == BTRFS_DEV_STAT_VALUES_MAX)
return; /* all values == 0, suppress message */
- btrfs_info_in_rcu(dev->fs_info,
+ btrfs_info(dev->fs_info,
"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
btrfs_dev_name(dev),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
@@ -7932,7 +7948,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
}
/*
- * Very old mkfs.btrfs (before v4.1) will not respect the reserved
+ * Very old mkfs.btrfs (before v4.15) will not respect the reserved
* space. Although kernel can handle it without problem, better to warn
* the users.
*/
@@ -8184,7 +8200,7 @@ static int relocating_repair_kthread(void *data)
btrfs_info(fs_info,
"zoned: relocating block group %llu to repair IO failure",
target);
- ret = btrfs_relocate_chunk(fs_info, target);
+ ret = btrfs_relocate_chunk(fs_info, target, true);
out:
if (cache)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 137cc232f58e..a56e873a3029 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -21,7 +21,6 @@
#include <uapi/linux/btrfs.h>
#include <uapi/linux/btrfs_tree.h>
#include "messages.h"
-#include "rcu-string.h"
#include "extent-io-tree.h"
struct block_device;
@@ -114,7 +113,8 @@ struct btrfs_device {
struct btrfs_fs_devices *fs_devices;
struct btrfs_fs_info *fs_info;
- struct rcu_string __rcu *name;
+ /* Device path or NULL if missing. */
+ const char __rcu *name;
u64 generation;
@@ -422,6 +422,16 @@ struct btrfs_fs_devices {
/* Count fs-devices opened. */
int opened;
+ /*
+ * Counter of the processes that are holding this fs_devices but not
+ * yet opened.
+ * This is for mounting handling, as we can only open the fs_devices
+ * after a super block is created. But we cannot take uuid_mutex
+ * during sget_fc(), thus we have to hold the fs_devices (meaning it
+ * cannot be released) until a super block is returned.
+ */
+ int holding;
+
/* Set when we find or add a device that doesn't have the nonrot flag set. */
bool rotating;
/* Devices support TRIM/discard commands. */
@@ -667,7 +677,7 @@ enum btrfs_map_op {
BTRFS_MAP_GET_READ_MIRRORS,
};
-static inline enum btrfs_map_op btrfs_op(struct bio *bio)
+static inline enum btrfs_map_op btrfs_op(const struct bio *bio)
{
switch (bio_op(bio)) {
case REQ_OP_WRITE:
@@ -719,8 +729,7 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
blk_mode_t flags, void *holder);
-struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
- bool mount_arg_dev);
+struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev);
int btrfs_forget_devices(dev_t devt);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
@@ -754,7 +763,8 @@ void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
-int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
+int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ bool verbose);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
@@ -846,7 +856,7 @@ static inline const char *btrfs_dev_name(const struct btrfs_device *device)
if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
return "<missing disk>";
else
- return rcu_str_deref(device->name);
+ return rcu_dereference(device->name);
}
static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol)
@@ -854,6 +864,20 @@ static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocati
WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol);
}
+static inline void btrfs_fs_devices_inc_holding(struct btrfs_fs_devices *fs_devices)
+{
+ lockdep_assert_held(&uuid_mutex);
+ ASSERT(fs_devices->holding >= 0);
+ fs_devices->holding++;
+}
+
+static inline void btrfs_fs_devices_dec_holding(struct btrfs_fs_devices *fs_devices)
+{
+ lockdep_assert_held(&uuid_mutex);
+ ASSERT(fs_devices->holding > 0);
+ fs_devices->holding--;
+}
+
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 3e0edbcf73e1..79fb1614bd0c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -510,14 +510,15 @@ static int btrfs_initxattrs(struct inode *inode,
*/
nofs_flag = memalloc_nofs_save();
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
- strlen(xattr->name) + 1, GFP_KERNEL);
+ const size_t name_len = XATTR_SECURITY_PREFIX_LEN +
+ strlen(xattr->name) + 1;
+
+ name = kmalloc(name_len, GFP_KERNEL);
if (!name) {
ret = -ENOMEM;
break;
}
- strcpy(name, XATTR_SECURITY_PREFIX);
- strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
+ scnprintf(name, name_len, "%s%s", XATTR_SECURITY_PREFIX, xattr->name);
if (strcmp(name, XATTR_NAME_CAPS) == 0)
clear_bit(BTRFS_INODE_NO_CAP_XATTR, &BTRFS_I(inode)->runtime_flags);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b5b0156d5b95..245e813ecd78 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -9,7 +9,6 @@
#include "ctree.h"
#include "volumes.h"
#include "zoned.h"
-#include "rcu-string.h"
#include "disk-io.h"
#include "block-group.h"
#include "dev-replace.h"
@@ -17,6 +16,7 @@
#include "fs.h"
#include "accessors.h"
#include "bio.h"
+#include "transaction.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -263,9 +263,9 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
copy_zone_info_cb, zones);
if (ret < 0) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to read zone %llu on %s (devid %llu)",
- pos, rcu_str_deref(device->name),
+ pos, rcu_dereference(device->name),
device->devid);
return ret;
}
@@ -395,16 +395,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
/* We reject devices with a zone size larger than 8GB */
if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: zone size %llu larger than supported maximum %llu",
- rcu_str_deref(device->name),
+ rcu_dereference(device->name),
zone_info->zone_size, BTRFS_MAX_ZONE_SIZE);
ret = -EINVAL;
goto out;
} else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: zone size %llu smaller than supported minimum %u",
- rcu_str_deref(device->name),
+ rcu_dereference(device->name),
zone_info->zone_size, BTRFS_MIN_ZONE_SIZE);
ret = -EINVAL;
goto out;
@@ -418,9 +418,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
max_active_zones = bdev_max_active_zones(bdev);
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: max active zones %u is too small, need at least %u active zones",
- rcu_str_deref(device->name), max_active_zones,
+ rcu_dereference(device->name), max_active_zones,
BTRFS_MIN_ACTIVE_ZONES);
ret = -EINVAL;
goto out;
@@ -460,9 +460,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
zone_info->zone_cache = vcalloc(zone_info->nr_zones,
sizeof(struct blk_zone));
if (!zone_info->zone_cache) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to allocate zone cache for %s",
- rcu_str_deref(device->name));
+ rcu_dereference(device->name));
ret = -ENOMEM;
goto out;
}
@@ -497,9 +497,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
if (nreported != zone_info->nr_zones) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"inconsistent number of zones on %s (%u/%u)",
- rcu_str_deref(device->name), nreported,
+ rcu_dereference(device->name), nreported,
zone_info->nr_zones);
ret = -EIO;
goto out;
@@ -507,9 +507,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (max_active_zones) {
if (nactive > max_active_zones) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: %u active zones on %s exceeds max_active_zones %u",
- nactive, rcu_str_deref(device->name),
+ nactive, rcu_dereference(device->name),
max_active_zones);
ret = -EIO;
goto out;
@@ -538,7 +538,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
goto out;
if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to read super block log zone info at devid %llu zone %u",
device->devid, sb_zone);
ret = -EUCLEAN;
@@ -556,7 +556,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
ret = sb_write_pointer(device->bdev,
&zone_info->sb_zones[sb_pos], &sb_wp);
if (ret != -ENOENT && ret) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: super block log zone corrupted devid %llu zone %u",
device->devid, sb_zone);
ret = -EUCLEAN;
@@ -575,9 +575,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
emulated = "emulated ";
}
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"%s block device %s, %u %szones of %llu bytes",
- model, rcu_str_deref(device->name), zone_info->nr_zones,
+ model, rcu_dereference(device->name), zone_info->nr_zones,
emulated, zone_info->zone_size);
return 0;
@@ -1182,10 +1182,10 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
continue;
/* Free regions should be empty */
- btrfs_warn_in_rcu(
+ btrfs_warn(
device->fs_info,
"zoned: resetting device %s (devid %llu) zone %llu for allocation",
- rcu_str_deref(device->name), device->devid, pos >> shift);
+ rcu_dereference(device->name), device->devid, pos >> shift);
WARN_ON_ONCE(1);
ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size,
@@ -1345,9 +1345,9 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
}
if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
- zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
+ zone.start << SECTOR_SHIFT, rcu_dereference(device->name),
device->devid);
up_read(&dev_replace->rwsem);
return -EIO;
@@ -1358,10 +1358,10 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
switch (zone.cond) {
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
(info->physical >> device->zone_info->zone_size_shift),
- rcu_str_deref(device->name), device->devid);
+ rcu_dereference(device->name), device->devid);
info->alloc_offset = WP_MISSING_DEV;
break;
case BLK_ZONE_COND_EMPTY:
@@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
zone_info[1].physical);
return -EIO;
}
+
+ if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
+ zone_info[0].alloc_offset = last_alloc;
+
+ if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
+ zone_info[1].alloc_offset = last_alloc;
+
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
@@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
int i;
@@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
for (i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ zone_info[i].alloc_offset = last_alloc;
+
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_err(fs_info,
@@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
+ div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > i)
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == i)
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
@@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (test_bit(0, active) != test_bit(i, active)) {
@@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr,
+ map->num_stripes / map->sub_stripes);
+ div_u64_rem(stripe_nr,
+ (map->num_stripes / map->sub_stripes),
+ &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > (i / map->sub_stripes))
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == (i / map->sub_stripes))
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if ((i % map->sub_stripes) == 0) {
bg->zone_capacity += zone_info[i].capacity;
bg->alloc_offset += zone_info[i].alloc_offset;
@@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
+ last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
- ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid1(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID0:
- ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid0(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID10:
- ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid10(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID5:
case BTRFS_BLOCK_GROUP_RAID6:
@@ -2427,7 +2485,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
/* For the work */
btrfs_get_block_group(bg);
- atomic_inc(&eb->refs);
+ refcount_inc(&eb->refs);
bg->last_eb = eb;
INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
queue_work(system_unbound_wq, &bg->zone_finish_work);
@@ -2443,6 +2501,66 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
spin_unlock(&fs_info->relocation_bg_lock);
}
+void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+ struct btrfs_space_info *space_info = data_sinfo->sub_group[0];
+ struct btrfs_trans_handle *trans;
+ struct btrfs_block_group *bg;
+ struct list_head *bg_list;
+ u64 alloc_flags;
+ bool initial = false;
+ bool did_chunk_alloc = false;
+ int index;
+ int ret;
+
+ if (!btrfs_is_zoned(fs_info))
+ return;
+
+ if (fs_info->data_reloc_bg)
+ return;
+
+ if (sb_rdonly(fs_info->sb))
+ return;
+
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
+ index = btrfs_bg_flags_to_raid_index(alloc_flags);
+
+ bg_list = &data_sinfo->block_groups[index];
+again:
+ list_for_each_entry(bg, bg_list, list) {
+ if (bg->used > 0)
+ continue;
+
+ if (!initial) {
+ initial = true;
+ continue;
+ }
+
+ fs_info->data_reloc_bg = bg->start;
+ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
+ btrfs_zone_activate(bg);
+
+ return;
+ }
+
+ if (did_chunk_alloc)
+ return;
+
+ trans = btrfs_join_transaction(fs_info->tree_root);
+ if (IS_ERR(trans))
+ return;
+
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
+ btrfs_end_transaction(trans);
+ if (ret == 1) {
+ did_chunk_alloc = true;
+ bg_list = &space_info->block_groups[index];
+ goto again;
+ }
+}
+
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2465,8 +2583,8 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
+ u64 total = btrfs_super_total_bytes(fs_info->super_copy);
u64 used = 0;
- u64 total = 0;
u64 factor;
ASSERT(btrfs_is_zoned(fs_info));
@@ -2479,7 +2597,6 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
if (!device->bdev)
continue;
- total += device->disk_total_bytes;
used += device->bytes_used;
}
mutex_unlock(&fs_devices->device_list_mutex);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 9672bf4c3335..6e11533b8e14 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -88,6 +88,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb);
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
+void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info);
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info);
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
@@ -241,6 +242,8 @@ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
+static inline void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) { }
+
static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
static inline bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 4a796a049b5a..ff0292615e1f 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -200,8 +200,7 @@ void zstd_init_workspace_manager(void)
ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
if (IS_ERR(ws)) {
- pr_warn(
- "BTRFS: cannot preallocate zstd compression workspace\n");
+ btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
} else {
set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
diff --git a/fs/buffer.c b/fs/buffer.c
index 8cf4a1dc481e..ead4dc85debd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1122,15 +1122,9 @@ __getblk_slow(struct block_device *bdev, sector_t block,
{
bool blocking = gfpflags_allow_blocking(gfp);
- /* Size must be multiple of hard sectorsize */
- if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
- (size < 512 || size > PAGE_SIZE))) {
- printk(KERN_ERR "getblk(): invalid block size %d requested\n",
- size);
- printk(KERN_ERR "logical block size: %d\n",
- bdev_logical_block_size(bdev));
-
- dump_stack();
+ if (WARN_ON_ONCE(!IS_ALIGNED(size, bdev_logical_block_size(bdev)))) {
+ printk(KERN_ERR "getblk(): block size %d not aligned to logical block size %d\n",
+ size, bdev_logical_block_size(bdev));
return NULL;
}
@@ -2271,9 +2265,8 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
-int block_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+int block_write_end(loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio)
{
size_t start = pos - folio_pos(folio);
@@ -2304,15 +2297,15 @@ int block_write_end(struct file *file, struct address_space *mapping,
}
EXPORT_SYMBOL(block_write_end);
-int generic_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+int generic_write_end(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool i_size_changed = false;
- copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ copied = block_write_end(pos, len, copied, folio);
/*
* No need to use i_size_read() here, the i_size cannot change under us
@@ -2501,7 +2494,8 @@ out:
}
EXPORT_SYMBOL(generic_cont_expand_simple);
-static int cont_expand_zero(struct file *file, struct address_space *mapping,
+static int cont_expand_zero(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
@@ -2525,12 +2519,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = PAGE_SIZE - zerofrom;
- err = aops->write_begin(file, mapping, curpos, len,
+ err = aops->write_begin(iocb, mapping, curpos, len,
&folio, &fsdata);
if (err)
goto out;
folio_zero_range(folio, offset_in_folio(folio, curpos), len);
- err = aops->write_end(file, mapping, curpos, len, len,
+ err = aops->write_end(iocb, mapping, curpos, len, len,
folio, fsdata);
if (err < 0)
goto out;
@@ -2558,12 +2552,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = offset - zerofrom;
- err = aops->write_begin(file, mapping, curpos, len,
+ err = aops->write_begin(iocb, mapping, curpos, len,
&folio, &fsdata);
if (err)
goto out;
folio_zero_range(folio, offset_in_folio(folio, curpos), len);
- err = aops->write_end(file, mapping, curpos, len, len,
+ err = aops->write_end(iocb, mapping, curpos, len, len,
folio, fsdata);
if (err < 0)
goto out;
@@ -2578,17 +2572,16 @@ out:
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
-int cont_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata,
- get_block_t *get_block, loff_t *bytes)
+int cont_write_begin(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, unsigned len, struct folio **foliop,
+ void **fsdata, get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
unsigned int blocksize = i_blocksize(inode);
unsigned int zerofrom;
int err;
- err = cont_expand_zero(file, mapping, pos, bytes);
+ err = cont_expand_zero(iocb, mapping, pos, bytes);
if (err)
return err;
@@ -2610,7 +2603,7 @@ EXPORT_SYMBOL(cont_write_begin);
* holes and correct delalloc and unwritten extent mapping on filesystems that
* support these features.
*
- * We are not allowed to take the i_mutex here so we have to play games to
+ * We are not allowed to take the i_rwsem here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
* truncate writes the inode size before removing pages, once we have the
* page lock we can determine safely if the page is beyond EOF. If it is not
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index c08e4a66ac07..3e0576d9db1d 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -347,8 +347,6 @@ int __cachefiles_write(struct cachefiles_object *object,
default:
ki->was_async = false;
cachefiles_write_complete(&ki->iocb, ret);
- if (ret > 0)
- ret = 0;
break;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index aecfc5c37b49..91dfd0231877 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -388,10 +388,10 @@ try_again:
} else {
struct renamedata rd = {
.old_mnt_idmap = &nop_mnt_idmap,
- .old_dir = d_inode(dir),
+ .old_parent = dir,
.old_dentry = rep,
.new_mnt_idmap = &nop_mnt_idmap,
- .new_dir = d_inode(cache->graveyard),
+ .new_parent = cache->graveyard,
.new_dentry = grave,
};
trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index d9bc67176128..a7ed86fa98bb 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -83,10 +83,8 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len);
ret = __cachefiles_write(object, file, pos, iter, NULL, NULL);
- if (!ret) {
- ret = len;
+ if (ret > 0)
kiocb->ki_pos += ret;
- }
out:
fput(file);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 60a621b00c65..02468c848cce 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1864,10 +1864,12 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
* We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context.
*/
-static int ceph_write_begin(struct file *file, struct address_space *mapping,
+static int ceph_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
int r;
@@ -1885,10 +1887,12 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
* we don't do anything in here that simple_write_end doesn't do
* except adjust dirty page accounting
*/
-static int ceph_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
+static int ceph_write_end(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct ceph_client *cl = ceph_inode_to_client(inode);
bool check_cap = false;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a8d8b56cf9d2..b1a8ff612c41 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4957,24 +4957,20 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
cl = ceph_inode_to_client(dir);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
+ int len = dentry->d_name.len;
doutc(cl, "%p mds%d seq %d\n", dentry, mds,
(int)di->lease_seq);
rel->dname_seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
+ memcpy(*p, dentry->d_name.name, len);
spin_unlock(&dentry->d_lock);
if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
- int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
-
- if (ret2 < 0)
- return ret2;
-
- rel->dname_len = cpu_to_le32(ret2);
- *p += ret2;
- } else {
- rel->dname_len = cpu_to_le32(dentry->d_name.len);
- memcpy(*p, dentry->d_name.name, dentry->d_name.len);
- *p += dentry->d_name.len;
+ len = ceph_encode_encrypted_dname(dir, *p, len);
+ if (len < 0)
+ return len;
}
+ rel->dname_len = cpu_to_le32(len);
+ *p += len;
} else {
spin_unlock(&dentry->d_lock);
}
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 3b3c4d8d401e..e312f52f48e4 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -215,35 +215,31 @@ static struct inode *parse_longname(const struct inode *parent,
struct ceph_client *cl = ceph_inode_to_client(parent);
struct inode *dir = NULL;
struct ceph_vino vino = { .snap = CEPH_NOSNAP };
- char *inode_number;
- char *name_end;
- int orig_len = *name_len;
+ char *name_end, *inode_number;
int ret = -EIO;
-
+ /* NUL-terminate */
+ char *str __free(kfree) = kmemdup_nul(name, *name_len, GFP_KERNEL);
+ if (!str)
+ return ERR_PTR(-ENOMEM);
/* Skip initial '_' */
- name++;
- name_end = strrchr(name, '_');
+ str++;
+ name_end = strrchr(str, '_');
if (!name_end) {
- doutc(cl, "failed to parse long snapshot name: %s\n", name);
+ doutc(cl, "failed to parse long snapshot name: %s\n", str);
return ERR_PTR(-EIO);
}
- *name_len = (name_end - name);
+ *name_len = (name_end - str);
if (*name_len <= 0) {
pr_err_client(cl, "failed to parse long snapshot name\n");
return ERR_PTR(-EIO);
}
/* Get the inode number */
- inode_number = kmemdup_nul(name_end + 1,
- orig_len - *name_len - 2,
- GFP_KERNEL);
- if (!inode_number)
- return ERR_PTR(-ENOMEM);
+ inode_number = name_end + 1;
ret = kstrtou64(inode_number, 10, &vino.ino);
if (ret) {
- doutc(cl, "failed to parse inode number: %s\n", name);
- dir = ERR_PTR(ret);
- goto out;
+ doutc(cl, "failed to parse inode number: %s\n", str);
+ return ERR_PTR(ret);
}
/* And finally the inode */
@@ -254,42 +250,29 @@ static struct inode *parse_longname(const struct inode *parent,
if (IS_ERR(dir))
doutc(cl, "can't find inode %s (%s)\n", inode_number, name);
}
-
-out:
- kfree(inode_number);
return dir;
}
-int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
- char *buf)
+int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen)
{
struct ceph_client *cl = ceph_inode_to_client(parent);
struct inode *dir = parent;
- struct qstr iname;
+ char *p = buf;
u32 len;
- int name_len;
- int elen;
+ int name_len = elen;
int ret;
u8 *cryptbuf = NULL;
- iname.name = d_name->name;
- name_len = d_name->len;
-
/* Handle the special case of snapshot names that start with '_' */
- if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
- (iname.name[0] == '_')) {
- dir = parse_longname(parent, iname.name, &name_len);
+ if (ceph_snap(dir) == CEPH_SNAPDIR && *p == '_') {
+ dir = parse_longname(parent, p, &name_len);
if (IS_ERR(dir))
return PTR_ERR(dir);
- iname.name++; /* skip initial '_' */
+ p++; /* skip initial '_' */
}
- iname.len = name_len;
- if (!fscrypt_has_encryption_key(dir)) {
- memcpy(buf, d_name->name, d_name->len);
- elen = d_name->len;
+ if (!fscrypt_has_encryption_key(dir))
goto out;
- }
/*
* Convert cleartext d_name to ciphertext. If result is longer than
@@ -297,7 +280,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
*
* See: fscrypt_setup_filename
*/
- if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) {
+ if (!fscrypt_fname_encrypted_size(dir, name_len, NAME_MAX, &len)) {
elen = -ENAMETOOLONG;
goto out;
}
@@ -310,7 +293,9 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
goto out;
}
- ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len);
+ ret = fscrypt_fname_encrypt(dir,
+ &(struct qstr)QSTR_INIT(p, name_len),
+ cryptbuf, len);
if (ret) {
elen = ret;
goto out;
@@ -331,18 +316,13 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
}
/* base64 encode the encrypted name */
- elen = ceph_base64_encode(cryptbuf, len, buf);
- doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, buf);
+ elen = ceph_base64_encode(cryptbuf, len, p);
+ doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, p);
/* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
WARN_ON(elen > 240);
- if ((elen > 0) && (dir != parent)) {
- char tmp_buf[NAME_MAX];
-
- elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
- elen, buf, dir->i_ino);
- memcpy(buf, tmp_buf, elen);
- }
+ if (dir != parent) // leading _ is already there; append _<inum>
+ elen += 1 + sprintf(p + elen, "_%ld", dir->i_ino);
out:
kfree(cryptbuf);
@@ -355,14 +335,6 @@ out:
return elen;
}
-int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
- char *buf)
-{
- WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
-
- return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf);
-}
-
/**
* ceph_fname_to_usr - convert a filename for userland presentation
* @fname: ceph_fname to be converted
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
index d0768239a1c9..f752bbb2eb06 100644
--- a/fs/ceph/crypto.h
+++ b/fs/ceph/crypto.h
@@ -102,10 +102,7 @@ int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
struct ceph_acl_sec_ctx *as);
void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
struct ceph_acl_sec_ctx *as);
-int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
- char *buf);
-int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
- char *buf);
+int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int len);
static inline int ceph_fname_alloc_buffer(struct inode *parent,
struct fscrypt_str *fname)
@@ -194,17 +191,10 @@ static inline void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
{
}
-static inline int ceph_encode_encrypted_dname(struct inode *parent,
- struct qstr *d_name, char *buf)
+static inline int ceph_encode_encrypted_dname(struct inode *parent, char *buf,
+ int len)
{
- memcpy(buf, d_name->name, d_name->len);
- return d_name->len;
-}
-
-static inline int ceph_encode_encrypted_fname(struct inode *parent,
- struct dentry *dentry, char *buf)
-{
- return -EOPNOTSUPP;
+ return len;
}
static inline int ceph_fname_alloc_buffer(struct inode *parent,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a321aa6d0ed2..8478e7e75df6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -423,17 +423,16 @@ more:
req->r_inode_drop = CEPH_CAP_FILE_EXCL;
}
if (dfi->last_name) {
- struct qstr d_name = { .name = dfi->last_name,
- .len = strlen(dfi->last_name) };
+ int len = strlen(dfi->last_name);
req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
if (!req->r_path2) {
ceph_mdsc_put_request(req);
return -ENOMEM;
}
+ memcpy(req->r_path2, dfi->last_name, len);
- err = ceph_encode_encrypted_dname(inode, &d_name,
- req->r_path2);
+ err = ceph_encode_encrypted_dname(inode, req->r_path2, len);
if (err < 0) {
ceph_mdsc_put_request(req);
return err;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 230e0c3f341f..0f497c39ff82 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2766,8 +2766,8 @@ retry:
}
if (fscrypt_has_encryption_key(d_inode(parent))) {
- len = ceph_encode_encrypted_fname(d_inode(parent),
- cur, buf);
+ len = ceph_encode_encrypted_dname(d_inode(parent),
+ buf, len);
if (len < 0) {
dput(parent);
dput(cur);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2b8438d8a324..c3eb651862c5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1219,7 +1219,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
s->s_op = &ceph_super_ops;
- s->s_d_op = &ceph_dentry_ops;
+ set_default_d_op(s, &ceph_dentry_ops);
s->s_export_op = &ceph_export_ops;
s->s_time_gran = 1;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 6896fce122e1..08450d006016 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -230,7 +230,7 @@ static int coda_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = 12;
sb->s_magic = CODA_SUPER_MAGIC;
sb->s_op = &coda_super_operations;
- sb->s_d_op = &coda_dentry_operations;
+ set_default_d_op(sb, &coda_dentry_operations);
sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index ebf32822e29b..f327fbb9a0ca 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,6 @@ static void configfs_d_iput(struct dentry * dentry,
const struct dentry_operations configfs_dentry_ops = {
.d_iput = configfs_d_iput,
- .d_delete = always_delete_dentry,
};
#ifdef CONFIG_LOCKDEP
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index c2d820063ec4..740f18b60c9d 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,8 @@ static int configfs_fill_super(struct super_block *sb, struct fs_context *fc)
configfs_root_group.cg_item.ci_dentry = root;
root->d_fsdata = &configfs_root;
sb->s_root = root;
- sb->s_d_op = &configfs_dentry_ops; /* the rest get that */
+ set_default_d_op(sb, &configfs_dentry_ops); /* the rest get that */
+ sb->s_d_flags |= DCACHE_DONTCACHE;
return 0;
}
diff --git a/fs/coredump.c b/fs/coredump.c
index f217ebf2b3b6..fadf9d4be2e1 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -51,6 +51,7 @@
#include <net/sock.h>
#include <uapi/linux/pidfd.h>
#include <uapi/linux/un.h>
+#include <uapi/linux/coredump.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -81,17 +82,22 @@ static unsigned int core_sort_vma;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
+static atomic_t core_pipe_count = ATOMIC_INIT(0);
enum coredump_type_t {
- COREDUMP_FILE = 1,
- COREDUMP_PIPE = 2,
- COREDUMP_SOCK = 3,
+ COREDUMP_FILE = 1,
+ COREDUMP_PIPE = 2,
+ COREDUMP_SOCK = 3,
+ COREDUMP_SOCK_REQ = 4,
};
struct core_name {
char *corename;
int used, size;
+ unsigned int core_pipe_limit;
+ bool core_dumped;
enum coredump_type_t core_type;
+ u64 mask;
};
static int expand_corename(struct core_name *cn, int size)
@@ -222,11 +228,12 @@ put_exe_file:
return ret;
}
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+/*
+ * coredump_parse will inspect the pattern parameter, and output a name
+ * into corename, which must have space for at least CORENAME_MAX_SIZE
+ * bytes plus one byte for the zero terminator.
*/
-static int format_corename(struct core_name *cn, struct coredump_params *cprm,
+static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm,
size_t **argv, int *argc)
{
const struct cred *cred = current_cred();
@@ -235,8 +242,13 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
int pid_in_pattern = 0;
int err = 0;
+ cn->mask = COREDUMP_KERNEL;
+ if (core_pipe_limit)
+ cn->mask |= COREDUMP_WAIT;
cn->used = 0;
cn->corename = NULL;
+ cn->core_pipe_limit = 0;
+ cn->core_dumped = false;
if (*pat_ptr == '|')
cn->core_type = COREDUMP_PIPE;
else if (*pat_ptr == '@')
@@ -244,7 +256,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
else
cn->core_type = COREDUMP_FILE;
if (expand_corename(cn, core_name_size))
- return -ENOMEM;
+ return false;
cn->corename[0] = '\0';
switch (cn->core_type) {
@@ -252,26 +264,33 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
int argvs = sizeof(core_pattern) / 2;
(*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
if (!(*argv))
- return -ENOMEM;
+ return false;
(*argv)[(*argc)++] = 0;
++pat_ptr;
if (!(*pat_ptr))
- return -ENOMEM;
+ return false;
break;
}
case COREDUMP_SOCK: {
/* skip the @ */
pat_ptr++;
if (!(*pat_ptr))
- return -ENOMEM;
+ return false;
+ if (*pat_ptr == '@') {
+ pat_ptr++;
+ if (!(*pat_ptr))
+ return false;
+
+ cn->core_type = COREDUMP_SOCK_REQ;
+ }
err = cn_printf(cn, "%s", pat_ptr);
if (err)
- return err;
+ return false;
/* Require absolute paths. */
if (cn->corename[0] != '/')
- return -EINVAL;
+ return false;
/*
* Ensure we can uses spaces to indicate additional
@@ -279,7 +298,18 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
*/
if (strchr(cn->corename, ' ')) {
coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
- return -EINVAL;
+ return false;
+ }
+
+ /* Must not contain ".." in the path. */
+ if (name_contains_dotdot(cn->corename)) {
+ coredump_report_failure("Coredump socket may not %s contain '..' spaces", cn->corename);
+ return false;
+ }
+
+ if (strlen(cn->corename) >= UNIX_PATH_MAX) {
+ coredump_report_failure("Coredump socket path %s too long", cn->corename);
+ return false;
}
/*
@@ -289,13 +319,13 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
* via /proc/<pid>, using the SO_PEERPIDFD to guard
* against pid recycling when opening /proc/<pid>.
*/
- return 0;
+ return true;
}
case COREDUMP_FILE:
break;
default:
WARN_ON_ONCE(true);
- return -EINVAL;
+ return false;
}
/* Repeat as long as we have more pattern to process and more output
@@ -433,7 +463,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
}
if (err)
- return err;
+ return false;
}
out:
@@ -443,9 +473,9 @@ out:
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
- return cn_printf(cn, ".%d", task_tgid_vnr(current));
+ return cn_printf(cn, ".%d", task_tgid_vnr(current)) == 0;
- return 0;
+ return true;
}
static int zap_process(struct signal_struct *signal, int exit_code)
@@ -632,21 +662,445 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
return 0;
}
-void do_coredump(const kernel_siginfo_t *siginfo)
+#ifdef CONFIG_UNIX
+static bool coredump_sock_connect(struct core_name *cn, struct coredump_params *cprm)
+{
+ struct file *file __free(fput) = NULL;
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ ssize_t addr_len;
+ int retval;
+ struct socket *socket;
+
+ addr_len = strscpy(addr.sun_path, cn->corename);
+ if (addr_len < 0)
+ return false;
+ addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
+
+ /*
+ * It is possible that the userspace process which is supposed
+ * to handle the coredump and is listening on the AF_UNIX socket
+ * coredumps. Userspace should just mark itself non dumpable.
+ */
+
+ retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
+ if (retval < 0)
+ return false;
+
+ file = sock_alloc_file(socket, 0, NULL);
+ if (IS_ERR(file))
+ return false;
+
+ /*
+ * Set the thread-group leader pid which is used for the peer
+ * credentials during connect() below. Then immediately register
+ * it in pidfs...
+ */
+ cprm->pid = task_tgid(current);
+ retval = pidfs_register_pid(cprm->pid);
+ if (retval)
+ return false;
+
+ /*
+ * ... and set the coredump information so userspace has it
+ * available after connect()...
+ */
+ pidfs_coredump(cprm);
+
+ retval = kernel_connect(socket, (struct sockaddr *)(&addr), addr_len,
+ O_NONBLOCK | SOCK_COREDUMP);
+ /*
+ * ... Make sure to only put our reference after connect() took
+ * its own reference keeping the pidfs entry alive ...
+ */
+ pidfs_put_pid(cprm->pid);
+
+ if (retval) {
+ if (retval == -EAGAIN)
+ coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
+ else
+ coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
+ return false;
+ }
+
+ /* ... and validate that @sk_peer_pid matches @cprm.pid. */
+ if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm->pid))
+ return false;
+
+ cprm->limit = RLIM_INFINITY;
+ cprm->file = no_free_ptr(file);
+
+ return true;
+}
+
+static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags)
+{
+ struct msghdr msg = {};
+ struct kvec iov = { .iov_base = ack, .iov_len = size };
+ ssize_t ret;
+
+ memset(ack, 0, size);
+ ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags);
+ return ret == size;
+}
+
+static inline bool coredump_sock_send(struct file *file, struct coredump_req *req)
+{
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) };
+ ssize_t ret;
+
+ ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req));
+ return ret == sizeof(*req);
+}
+
+static_assert(sizeof(enum coredump_mark) == sizeof(__u32));
+
+static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark)
+{
+ struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+ struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) };
+ ssize_t ret;
+
+ ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark));
+ return ret == sizeof(mark);
+}
+
+static inline void coredump_sock_wait(struct file *file)
+{
+ ssize_t n;
+
+ /*
+ * We use a simple read to wait for the coredump processing to
+ * finish. Either the socket is closed or we get sent unexpected
+ * data. In both cases, we're done.
+ */
+ n = __kernel_read(file, &(char){ 0 }, 1, NULL);
+ if (n > 0)
+ coredump_report_failure("Coredump socket had unexpected data");
+ else if (n < 0)
+ coredump_report_failure("Coredump socket failed");
+}
+
+static inline void coredump_sock_shutdown(struct file *file)
+{
+ struct socket *socket;
+
+ socket = sock_from_file(file);
+ if (!socket)
+ return;
+
+ /* Let userspace know we're done processing the coredump. */
+ kernel_sock_shutdown(socket, SHUT_WR);
+}
+
+static bool coredump_sock_request(struct core_name *cn, struct coredump_params *cprm)
+{
+ struct coredump_req req = {
+ .size = sizeof(struct coredump_req),
+ .mask = COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ .size_ack = sizeof(struct coredump_ack),
+ };
+ struct coredump_ack ack = {};
+ ssize_t usize;
+
+ if (cn->core_type != COREDUMP_SOCK_REQ)
+ return true;
+
+ /* Let userspace know what we support. */
+ if (!coredump_sock_send(cprm->file, &req))
+ return false;
+
+ /* Peek the size of the coredump_ack. */
+ if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size),
+ MSG_PEEK | MSG_WAITALL))
+ return false;
+
+ /* Refuse unknown coredump_ack sizes. */
+ usize = ack.size;
+ if (usize < COREDUMP_ACK_SIZE_VER0) {
+ coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE);
+ return false;
+ }
+
+ if (usize > sizeof(ack)) {
+ coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE);
+ return false;
+ }
+
+ /* Now retrieve the coredump_ack. */
+ if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL))
+ return false;
+ if (ack.size != usize)
+ return false;
+
+ /* Refuse unknown coredump_ack flags. */
+ if (ack.mask & ~req.mask) {
+ coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
+ return false;
+ }
+
+ /* Refuse mutually exclusive options. */
+ if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL |
+ COREDUMP_REJECT)) != 1) {
+ coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING);
+ return false;
+ }
+
+ if (ack.spare) {
+ coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
+ return false;
+ }
+
+ cn->mask = ack.mask;
+ return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK);
+}
+
+static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm)
+{
+ if (!coredump_sock_connect(cn, cprm))
+ return false;
+
+ return coredump_sock_request(cn, cprm);
+}
+#else
+static inline void coredump_sock_wait(struct file *file) { }
+static inline void coredump_sock_shutdown(struct file *file) { }
+static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; }
+#endif
+
+/* cprm->mm_flags contains a stable snapshot of dumpability flags. */
+static inline bool coredump_force_suid_safe(const struct coredump_params *cprm)
+{
+ /* Require nonrelative corefile path and be extra careful. */
+ return __get_dumpable(cprm->mm_flags) == SUID_DUMP_ROOT;
+}
+
+static bool coredump_file(struct core_name *cn, struct coredump_params *cprm,
+ const struct linux_binfmt *binfmt)
+{
+ struct mnt_idmap *idmap;
+ struct inode *inode;
+ struct file *file __free(fput) = NULL;
+ int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL;
+
+ if (cprm->limit < binfmt->min_coredump)
+ return false;
+
+ if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') {
+ coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump");
+ return false;
+ }
+
+ /*
+ * Unlink the file if it exists unless this is a SUID
+ * binary - in that case, we're running around with root
+ * privs and don't want to unlink another user's coredump.
+ */
+ if (!coredump_force_suid_safe(cprm)) {
+ /*
+ * If it doesn't exist, that's fine. If there's some
+ * other problem, we'll catch it at the filp_open().
+ */
+ do_unlinkat(AT_FDCWD, getname_kernel(cn->corename));
+ }
+
+ /*
+ * There is a race between unlinking and creating the
+ * file, but if that causes an EEXIST here, that's
+ * fine - another process raced with us while creating
+ * the corefile, and the other process won. To userspace,
+ * what matters is that at least one of the two processes
+ * writes its coredump successfully, not which one.
+ */
+ if (coredump_force_suid_safe(cprm)) {
+ /*
+ * Using user namespaces, normal user tasks can change
+ * their current->fs->root to point to arbitrary
+ * directories. Since the intention of the "only dump
+ * with a fully qualified path" rule is to control where
+ * coredumps may be placed using root privileges,
+ * current->fs->root must not be used. Instead, use the
+ * root directory of init_task.
+ */
+ struct path root;
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+ file = file_open_root(&root, cn->corename, open_flags, 0600);
+ path_put(&root);
+ } else {
+ file = filp_open(cn->corename, open_flags, 0600);
+ }
+ if (IS_ERR(file))
+ return false;
+
+ inode = file_inode(file);
+ if (inode->i_nlink > 1)
+ return false;
+ if (d_unhashed(file->f_path.dentry))
+ return false;
+ /*
+ * AK: actually i see no reason to not allow this for named
+ * pipes etc, but keep the previous behaviour for now.
+ */
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ /*
+ * Don't dump core if the filesystem changed owner or mode
+ * of the file during file creation. This is an issue when
+ * a process dumps core while its cwd is e.g. on a vfat
+ * filesystem.
+ */
+ idmap = file_mnt_idmap(file);
+ if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) {
+ coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename);
+ return false;
+ }
+ if ((inode->i_mode & 0677) != 0600) {
+ coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename);
+ return false;
+ }
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return false;
+ if (do_truncate(idmap, file->f_path.dentry, 0, 0, file))
+ return false;
+
+ cprm->file = no_free_ptr(file);
+ return true;
+}
+
+static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm,
+ size_t *argv, int argc)
{
+ int argi;
+ char **helper_argv __free(kfree) = NULL;
+ struct subprocess_info *sub_info;
+
+ if (cprm->limit == 1) {
+ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a special value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things.
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
+ return false;
+ }
+ cprm->limit = RLIM_INFINITY;
+
+ cn->core_pipe_limit = atomic_inc_return(&core_pipe_count);
+ if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) {
+ coredump_report_failure("over core_pipe_limit, skipping core dump");
+ return false;
+ }
+
+ helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL);
+ if (!helper_argv) {
+ coredump_report_failure("%s failed to allocate memory", __func__);
+ return false;
+ }
+ for (argi = 0; argi < argc; argi++)
+ helper_argv[argi] = cn->corename + argv[argi];
+ helper_argv[argi] = NULL;
+
+ sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL,
+ GFP_KERNEL, umh_coredump_setup,
+ NULL, cprm);
+ if (!sub_info)
+ return false;
+
+ if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) {
+ coredump_report_failure("|%s pipe failed", cn->corename);
+ return false;
+ }
+
+ /*
+ * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
+ * have this set to NULL.
+ */
+ if (!cprm->file) {
+ coredump_report_failure("Core dump to |%s disabled", cn->corename);
+ return false;
+ }
+
+ return true;
+}
+
+static bool coredump_write(struct core_name *cn,
+ struct coredump_params *cprm,
+ struct linux_binfmt *binfmt)
+{
+
+ if (dump_interrupted())
+ return true;
+
+ if (!dump_vma_snapshot(cprm))
+ return false;
+
+ file_start_write(cprm->file);
+ cn->core_dumped = binfmt->core_dump(cprm);
+ /*
+ * Ensures that file size is big enough to contain the current
+ * file postion. This prevents gdb from complaining about
+ * a truncated file if the last "write" to the file was
+ * dump_skip.
+ */
+ if (cprm->to_skip) {
+ cprm->to_skip--;
+ dump_emit(cprm, "", 1);
+ }
+ file_end_write(cprm->file);
+ free_vma_snapshot(cprm);
+ return true;
+}
+
+static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm)
+{
+ if (cprm->file)
+ filp_close(cprm->file, NULL);
+ if (cn->core_pipe_limit) {
+ VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE);
+ atomic_dec(&core_pipe_count);
+ }
+ kfree(cn->corename);
+ coredump_finish(cn->core_dumped);
+}
+
+static inline bool coredump_skip(const struct coredump_params *cprm,
+ const struct linux_binfmt *binfmt)
+{
+ if (!binfmt)
+ return true;
+ if (!binfmt->core_dump)
+ return true;
+ if (!__get_dumpable(cprm->mm_flags))
+ return true;
+ return false;
+}
+
+void vfs_coredump(const kernel_siginfo_t *siginfo)
+{
+ struct cred *cred __free(put_cred) = NULL;
+ size_t *argv __free(kfree) = NULL;
struct core_state core_state;
struct core_name cn;
struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
+ struct linux_binfmt *binfmt = mm->binfmt;
const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- size_t *argv = NULL;
int argc = 0;
- /* require nonrelative corefile path and be extra careful */
- bool need_suid_safe = false;
- bool core_dumped = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
.limit = rlimit(RLIMIT_CORE),
@@ -662,357 +1116,92 @@ void do_coredump(const kernel_siginfo_t *siginfo)
audit_core_dumps(siginfo->si_signo);
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
+ if (coredump_skip(&cprm, binfmt))
+ return;
cred = prepare_creds();
if (!cred)
- goto fail;
+ return;
/*
* We cannot trust fsuid as being the "true" uid of the process
* nor do we know its entire history. We only know it was tainted
* so we dump it as root in mode 2, and only into a controlled
* environment (pipe handler or fully qualified path).
*/
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
- /* Setuid core dump mode */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_suid_safe = true;
- }
+ if (coredump_force_suid_safe(&cprm))
+ cred->fsuid = GLOBAL_ROOT_UID;
- retval = coredump_wait(siginfo->si_signo, &core_state);
- if (retval < 0)
- goto fail_creds;
+ if (coredump_wait(siginfo->si_signo, &core_state) < 0)
+ return;
old_cred = override_creds(cred);
- retval = format_corename(&cn, &cprm, &argv, &argc);
- if (retval < 0) {
+ if (!coredump_parse(&cn, &cprm, &argv, &argc)) {
coredump_report_failure("format_corename failed, aborting core");
- goto fail_unlock;
+ goto close_fail;
}
switch (cn.core_type) {
- case COREDUMP_FILE: {
- struct mnt_idmap *idmap;
- struct inode *inode;
- int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
- O_LARGEFILE | O_EXCL;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_suid_safe && cn.corename[0] != '/') {
- coredump_report_failure(
- "this process can only dump core to a fully qualified path, skipping core dump");
- goto fail_unlock;
- }
-
- /*
- * Unlink the file if it exists unless this is a SUID
- * binary - in that case, we're running around with root
- * privs and don't want to unlink another user's coredump.
- */
- if (!need_suid_safe) {
- /*
- * If it doesn't exist, that's fine. If there's some
- * other problem, we'll catch it at the filp_open().
- */
- do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
- }
-
- /*
- * There is a race between unlinking and creating the
- * file, but if that causes an EEXIST here, that's
- * fine - another process raced with us while creating
- * the corefile, and the other process won. To userspace,
- * what matters is that at least one of the two processes
- * writes its coredump successfully, not which one.
- */
- if (need_suid_safe) {
- /*
- * Using user namespaces, normal user tasks can change
- * their current->fs->root to point to arbitrary
- * directories. Since the intention of the "only dump
- * with a fully qualified path" rule is to control where
- * coredumps may be placed using root privileges,
- * current->fs->root must not be used. Instead, use the
- * root directory of init_task.
- */
- struct path root;
-
- task_lock(&init_task);
- get_fs_root(init_task.fs, &root);
- task_unlock(&init_task);
- cprm.file = file_open_root(&root, cn.corename,
- open_flags, 0600);
- path_put(&root);
- } else {
- cprm.file = filp_open(cn.corename, open_flags, 0600);
- }
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = file_inode(cprm.file);
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Don't dump core if the filesystem changed owner or mode
- * of the file during file creation. This is an issue when
- * a process dumps core while its cwd is e.g. on a vfat
- * filesystem.
- */
- idmap = file_mnt_idmap(cprm.file);
- if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
- current_fsuid())) {
- coredump_report_failure("Core dump to %s aborted: "
- "cannot preserve file owner", cn.corename);
- goto close_fail;
- }
- if ((inode->i_mode & 0677) != 0600) {
- coredump_report_failure("Core dump to %s aborted: "
- "cannot preserve file permissions", cn.corename);
- goto close_fail;
- }
- if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
- goto close_fail;
- if (do_truncate(idmap, cprm.file->f_path.dentry,
- 0, 0, cprm.file))
+ case COREDUMP_FILE:
+ if (!coredump_file(&cn, &cprm, binfmt))
goto close_fail;
break;
- }
- case COREDUMP_PIPE: {
- int argi;
- int dump_count;
- char **helper_argv;
- struct subprocess_info *sub_info;
-
- if (cprm.limit == 1) {
- /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a special value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- coredump_report_failure("over core_pipe_limit, skipping core dump");
- goto fail_dropcount;
- }
-
- helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
- GFP_KERNEL);
- if (!helper_argv) {
- coredump_report_failure("%s failed to allocate memory", __func__);
- goto fail_dropcount;
- }
- for (argi = 0; argi < argc; argi++)
- helper_argv[argi] = cn.corename + argv[argi];
- helper_argv[argi] = NULL;
-
- retval = -ENOMEM;
- sub_info = call_usermodehelper_setup(helper_argv[0],
- helper_argv, NULL, GFP_KERNEL,
- umh_coredump_setup, NULL, &cprm);
- if (sub_info)
- retval = call_usermodehelper_exec(sub_info,
- UMH_WAIT_EXEC);
-
- kfree(helper_argv);
- if (retval) {
- coredump_report_failure("|%s pipe failed", cn.corename);
+ case COREDUMP_PIPE:
+ if (!coredump_pipe(&cn, &cprm, argv, argc))
goto close_fail;
- }
break;
- }
- case COREDUMP_SOCK: {
-#ifdef CONFIG_UNIX
- struct file *file __free(fput) = NULL;
- struct sockaddr_un addr = {
- .sun_family = AF_UNIX,
- };
- ssize_t addr_len;
- struct socket *socket;
-
- addr_len = strscpy(addr.sun_path, cn.corename);
- if (addr_len < 0)
- goto close_fail;
- addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
-
- /*
- * It is possible that the userspace process which is
- * supposed to handle the coredump and is listening on
- * the AF_UNIX socket coredumps. Userspace should just
- * mark itself non dumpable.
- */
-
- retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
- if (retval < 0)
- goto close_fail;
-
- file = sock_alloc_file(socket, 0, NULL);
- if (IS_ERR(file))
- goto close_fail;
-
- /*
- * Set the thread-group leader pid which is used for the
- * peer credentials during connect() below. Then
- * immediately register it in pidfs...
- */
- cprm.pid = task_tgid(current);
- retval = pidfs_register_pid(cprm.pid);
- if (retval)
+ case COREDUMP_SOCK_REQ:
+ fallthrough;
+ case COREDUMP_SOCK:
+ if (!coredump_socket(&cn, &cprm))
goto close_fail;
-
- /*
- * ... and set the coredump information so userspace
- * has it available after connect()...
- */
- pidfs_coredump(&cprm);
-
- retval = kernel_connect(socket, (struct sockaddr *)(&addr),
- addr_len, O_NONBLOCK | SOCK_COREDUMP);
-
- /*
- * ... Make sure to only put our reference after connect() took
- * its own reference keeping the pidfs entry alive ...
- */
- pidfs_put_pid(cprm.pid);
-
- if (retval) {
- if (retval == -EAGAIN)
- coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
- else
- coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
- goto close_fail;
- }
-
- /* ... and validate that @sk_peer_pid matches @cprm.pid. */
- if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
- goto close_fail;
-
- cprm.limit = RLIM_INFINITY;
- cprm.file = no_free_ptr(file);
-#else
- coredump_report_failure("Core dump socket support %s disabled", cn.corename);
- goto close_fail;
-#endif
break;
- }
default:
WARN_ON_ONCE(true);
goto close_fail;
}
+ /* Don't even generate the coredump. */
+ if (cn.mask & COREDUMP_REJECT)
+ goto close_fail;
+
/* get us an unshared descriptor table; almost always a no-op */
/* The cell spufs coredump code reads the file descriptor tables */
- retval = unshare_files();
- if (retval)
+ if (unshare_files())
goto close_fail;
- if (!dump_interrupted()) {
- /*
- * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
- * have this set to NULL.
- */
- if (!cprm.file) {
- coredump_report_failure("Core dump to |%s disabled", cn.corename);
- goto close_fail;
- }
- if (!dump_vma_snapshot(&cprm))
- goto close_fail;
- file_start_write(cprm.file);
- core_dumped = binfmt->core_dump(&cprm);
- /*
- * Ensures that file size is big enough to contain the current
- * file postion. This prevents gdb from complaining about
- * a truncated file if the last "write" to the file was
- * dump_skip.
- */
- if (cprm.to_skip) {
- cprm.to_skip--;
- dump_emit(&cprm, "", 1);
- }
- file_end_write(cprm.file);
- free_vma_snapshot(&cprm);
- }
+ if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt))
+ goto close_fail;
-#ifdef CONFIG_UNIX
- /* Let userspace know we're done processing the coredump. */
- if (sock_from_file(cprm.file))
- kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
-#endif
+ coredump_sock_shutdown(cprm.file);
+
+ /* Let the parent know that a coredump was generated. */
+ if (cn.mask & COREDUMP_USERSPACE)
+ cn.core_dumped = true;
/*
* When core_pipe_limit is set we wait for the coredump server
* or usermodehelper to finish before exiting so it can e.g.,
* inspect /proc/<pid>.
*/
- if (core_pipe_limit) {
+ if (cn.mask & COREDUMP_WAIT) {
switch (cn.core_type) {
case COREDUMP_PIPE:
wait_for_dump_helpers(cprm.file);
break;
-#ifdef CONFIG_UNIX
- case COREDUMP_SOCK: {
- ssize_t n;
-
- /*
- * We use a simple read to wait for the coredump
- * processing to finish. Either the socket is
- * closed or we get sent unexpected data. In
- * both cases, we're done.
- */
- n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL);
- if (n != 0)
- coredump_report_failure("Unexpected data on coredump socket");
+ case COREDUMP_SOCK_REQ:
+ fallthrough;
+ case COREDUMP_SOCK:
+ coredump_sock_wait(cprm.file);
break;
- }
-#endif
default:
break;
}
}
close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (cn.core_type == COREDUMP_PIPE)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(argv);
- kfree(cn.corename);
- coredump_finish(core_dumped);
+ coredump_cleanup(&cn, &cprm);
revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
return;
}
@@ -1238,6 +1427,8 @@ void validate_coredump_safety(void)
static inline bool check_coredump_socket(void)
{
+ const char *p;
+
if (core_pattern[0] != '@')
return true;
@@ -1249,8 +1440,25 @@ static inline bool check_coredump_socket(void)
if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
return false;
- /* Must be an absolute path. */
- if (*(core_pattern + 1) != '/')
+ /* Must be an absolute path... */
+ if (core_pattern[1] != '/') {
+ /* ... or the socket request protocol... */
+ if (core_pattern[1] != '@')
+ return false;
+ /* ... and if so must be an absolute path. */
+ if (core_pattern[2] != '/')
+ return false;
+ p = &core_pattern[2];
+ } else {
+ p = &core_pattern[1];
+ }
+
+ /* The path obviously cannot exceed UNIX_PATH_MAX. */
+ if (strlen(p) >= UNIX_PATH_MAX)
+ return false;
+
+ /* Must not contain ".." in the path. */
+ if (name_contains_dotdot(core_pattern))
return false;
return true;
diff --git a/fs/dcache.c b/fs/dcache.c
index 03d58b2d4fa3..60046ae23d51 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1436,7 +1436,7 @@ int d_set_mounted(struct dentry *dentry)
{
struct dentry *p;
int ret = -ENOENT;
- write_seqlock(&rename_lock);
+ read_seqlock_excl(&rename_lock);
for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
/* Need exclusion wrt. d_invalidate() */
spin_lock(&p->d_lock);
@@ -1456,7 +1456,7 @@ int d_set_mounted(struct dentry *dentry)
}
spin_unlock(&dentry->d_lock);
out:
- write_sequnlock(&rename_lock);
+ read_sequnlock_excl(&rename_lock);
return ret;
}
@@ -1731,14 +1731,14 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dentry->d_inode = NULL;
dentry->d_parent = dentry;
dentry->d_sb = sb;
- dentry->d_op = NULL;
+ dentry->d_op = sb->__s_d_op;
+ dentry->d_flags = sb->s_d_flags;
dentry->d_fsdata = NULL;
INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_HLIST_HEAD(&dentry->d_children);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
INIT_HLIST_NODE(&dentry->d_sib);
- d_set_d_op(dentry, dentry->d_sb->s_d_op);
if (dentry->d_op && dentry->d_op->d_init) {
err = dentry->d_op->d_init(dentry);
@@ -1821,8 +1821,9 @@ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
struct dentry *dentry = __d_alloc(sb, name);
if (likely(dentry)) {
dentry->d_flags |= DCACHE_NORCU;
- if (!sb->s_d_op)
- d_set_d_op(dentry, &anon_ops);
+ /* d_op_flags(&anon_ops) is 0 */
+ if (!dentry->d_op)
+ dentry->d_op = &anon_ops;
}
return dentry;
}
@@ -1837,35 +1838,50 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
}
EXPORT_SYMBOL(d_alloc_name);
-void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+#define DCACHE_OP_FLAGS \
+ (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | \
+ DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | DCACHE_OP_PRUNE | \
+ DCACHE_OP_REAL)
+
+static unsigned int d_op_flags(const struct dentry_operations *op)
{
+ unsigned int flags = 0;
+ if (op) {
+ if (op->d_hash)
+ flags |= DCACHE_OP_HASH;
+ if (op->d_compare)
+ flags |= DCACHE_OP_COMPARE;
+ if (op->d_revalidate)
+ flags |= DCACHE_OP_REVALIDATE;
+ if (op->d_weak_revalidate)
+ flags |= DCACHE_OP_WEAK_REVALIDATE;
+ if (op->d_delete)
+ flags |= DCACHE_OP_DELETE;
+ if (op->d_prune)
+ flags |= DCACHE_OP_PRUNE;
+ if (op->d_real)
+ flags |= DCACHE_OP_REAL;
+ }
+ return flags;
+}
+
+static void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+ unsigned int flags = d_op_flags(op);
WARN_ON_ONCE(dentry->d_op);
- WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
- DCACHE_OP_COMPARE |
- DCACHE_OP_REVALIDATE |
- DCACHE_OP_WEAK_REVALIDATE |
- DCACHE_OP_DELETE |
- DCACHE_OP_REAL));
+ WARN_ON_ONCE(dentry->d_flags & DCACHE_OP_FLAGS);
dentry->d_op = op;
- if (!op)
- return;
- if (op->d_hash)
- dentry->d_flags |= DCACHE_OP_HASH;
- if (op->d_compare)
- dentry->d_flags |= DCACHE_OP_COMPARE;
- if (op->d_revalidate)
- dentry->d_flags |= DCACHE_OP_REVALIDATE;
- if (op->d_weak_revalidate)
- dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
- if (op->d_delete)
- dentry->d_flags |= DCACHE_OP_DELETE;
- if (op->d_prune)
- dentry->d_flags |= DCACHE_OP_PRUNE;
- if (op->d_real)
- dentry->d_flags |= DCACHE_OP_REAL;
-
-}
-EXPORT_SYMBOL(d_set_d_op);
+ if (flags)
+ dentry->d_flags |= flags;
+}
+
+void set_default_d_op(struct super_block *s, const struct dentry_operations *ops)
+{
+ unsigned int flags = d_op_flags(ops);
+ s->__s_d_op = ops;
+ s->s_d_flags = (s->s_d_flags & ~DCACHE_OP_FLAGS) | flags;
+}
+EXPORT_SYMBOL(set_default_d_op);
static unsigned d_flags_for_inode(struct inode *inode)
{
@@ -2530,13 +2546,19 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
struct hlist_bl_node *node;
- struct dentry *new = d_alloc(parent, name);
+ struct dentry *new = __d_alloc(parent->d_sb, name);
struct dentry *dentry;
unsigned seq, r_seq, d_seq;
if (unlikely(!new))
return ERR_PTR(-ENOMEM);
+ new->d_flags |= DCACHE_PAR_LOOKUP;
+ spin_lock(&parent->d_lock);
+ new->d_parent = dget_dlock(parent);
+ hlist_add_head(&new->d_sib, &parent->d_children);
+ spin_unlock(&parent->d_lock);
+
retry:
rcu_read_lock();
seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
@@ -2620,8 +2642,6 @@ retry:
return dentry;
}
rcu_read_unlock();
- /* we can't take ->d_lock here; it's OK, though. */
- new->d_flags |= DCACHE_PAR_LOOKUP;
new->d_wait = wq;
hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
hlist_bl_unlock(b);
@@ -2667,7 +2687,8 @@ EXPORT_SYMBOL(__d_lookup_unhash_wake);
/* inode->i_lock held if inode is non-NULL */
-static inline void __d_add(struct dentry *dentry, struct inode *inode)
+static inline void __d_add(struct dentry *dentry, struct inode *inode,
+ const struct dentry_operations *ops)
{
wait_queue_head_t *d_wait;
struct inode *dir = NULL;
@@ -2678,6 +2699,8 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
n = start_dir_add(dir);
d_wait = __d_lookup_unhash(dentry);
}
+ if (unlikely(ops))
+ d_set_d_op(dentry, ops);
if (inode) {
unsigned add_flags = d_flags_for_inode(inode);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
@@ -2709,7 +2732,7 @@ void d_add(struct dentry *entry, struct inode *inode)
security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
}
- __d_add(entry, inode);
+ __d_add(entry, inode, NULL);
}
EXPORT_SYMBOL(d_add);
@@ -2774,10 +2797,10 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
* @target: new dentry
* @exchange: exchange the two dentries
*
- * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way. Caller must hold
- * rename_lock, the i_mutex of the source and target directories,
- * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
+ * Update the dcache to reflect the move of a file name. Negative dcache
+ * entries should not be moved in this way. Caller must hold rename_lock, the
+ * i_rwsem of the source and target directories (exclusively), and the sb->
+ * s_vfs_rename_mutex if they differ. See lock_rename().
*/
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
@@ -2923,7 +2946,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex, and rename_lock
+ * dentry->d_parent->d_inode->i_rwsem, and rename_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
@@ -2961,30 +2984,8 @@ out_err:
return ret;
}
-/**
- * d_splice_alias - splice a disconnected dentry into the tree if one exists
- * @inode: the inode which may have a disconnected dentry
- * @dentry: a negative dentry which we want to point to the inode.
- *
- * If inode is a directory and has an IS_ROOT alias, then d_move that in
- * place of the given dentry and return it, else simply d_add the inode
- * to the dentry and return NULL.
- *
- * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
- * we should error out: directories can't have multiple aliases.
- *
- * This is needed in the lookup routine of any filesystem that is exportable
- * (via knfsd) so that we can build dcache paths to directories effectively.
- *
- * If a dentry was found and moved, then it is returned. Otherwise NULL
- * is returned. This matches the expected return value of ->lookup.
- *
- * Cluster filesystems may call this function with a negative, hashed dentry.
- * In that case, we know that the inode will be a regular file, and also this
- * will only occur during atomic_open. So we need to check for the dentry
- * being already hashed only in the final case.
- */
-struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
+struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry,
+ const struct dentry_operations *ops)
{
if (IS_ERR(inode))
return ERR_CAST(inode);
@@ -3030,9 +3031,37 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
}
}
out:
- __d_add(dentry, inode);
+ __d_add(dentry, inode, ops);
return NULL;
}
+
+/**
+ * d_splice_alias - splice a disconnected dentry into the tree if one exists
+ * @inode: the inode which may have a disconnected dentry
+ * @dentry: a negative dentry which we want to point to the inode.
+ *
+ * If inode is a directory and has an IS_ROOT alias, then d_move that in
+ * place of the given dentry and return it, else simply d_add the inode
+ * to the dentry and return NULL.
+ *
+ * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
+ * we should error out: directories can't have multiple aliases.
+ *
+ * This is needed in the lookup routine of any filesystem that is exportable
+ * (via knfsd) so that we can build dcache paths to directories effectively.
+ *
+ * If a dentry was found and moved, then it is returned. Otherwise NULL
+ * is returned. This matches the expected return value of ->lookup.
+ *
+ * Cluster filesystems may call this function with a negative, hashed dentry.
+ * In that case, we know that the inode will be a regular file, and also this
+ * will only occur during atomic_open. So we need to check for the dentry
+ * being already hashed only in the final case.
+ */
+struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
+{
+ return d_splice_alias_ops(inode, dentry, NULL);
+}
EXPORT_SYMBOL(d_splice_alias);
/*
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30c4944e1862..6677991c7e4b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -258,7 +258,6 @@ static struct vfsmount *debugfs_automount(struct path *path)
}
static const struct dentry_operations debugfs_dops = {
- .d_delete = always_delete_dentry,
.d_release = debugfs_release_dentry,
.d_automount = debugfs_automount,
};
@@ -273,7 +272,8 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
return err;
sb->s_op = &debugfs_super_operations;
- sb->s_d_op = &debugfs_dops;
+ set_default_d_op(sb, &debugfs_dops);
+ sb->s_d_flags |= DCACHE_DONTCACHE;
debugfs_apply_options(sb);
@@ -384,27 +384,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- inode_lock(d_inode(parent));
- if (unlikely(IS_DEADDIR(d_inode(parent))))
- dentry = ERR_PTR(-ENOENT);
- else
- dentry = lookup_noperm(&QSTR(name), parent);
- if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
- if (d_is_dir(dentry))
- pr_err("Directory '%s' with parent '%s' already present!\n",
- name, parent->d_name.name);
- else
- pr_err("File '%s' in directory '%s' already present!\n",
- name, parent->d_name.name);
- dput(dentry);
- dentry = ERR_PTR(-EEXIST);
- }
-
+ dentry = simple_start_creating(parent, name);
if (IS_ERR(dentry)) {
- inode_unlock(d_inode(parent));
+ if (dentry == ERR_PTR(-EEXIST))
+ pr_err("'%s' already exists in '%pd'\n", name, parent);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
-
return dentry;
}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9c20d78e41f6..fdf22264a8e9 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -381,7 +381,7 @@ static int devpts_fill_super(struct super_block *s, struct fs_context *fc)
s->s_blocksize_bits = 10;
s->s_magic = DEVPTS_SUPER_MAGIC;
s->s_op = &devpts_sops;
- s->s_d_op = &simple_dentry_operations;
+ s->s_d_flags = DCACHE_DONTCACHE;
s->s_time_gran = 1;
fsi->sb = s;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index bbd05f1a2145..1694ee9a9382 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1083,8 +1083,8 @@ static inline int drop_refcount(struct dio *dio)
* The locking rules are governed by the flags parameter:
* - if the flags value contains DIO_LOCKING we use a fancy locking
* scheme for dumb filesystems.
- * For writes this function is called under i_mutex and returns with
- * i_mutex held, for reads, i_mutex is not held on entry, but it is
+ * For writes this function is called under i_rwsem and returns with
+ * i_rwsem held, for reads, i_rwsem is not held on entry, but it is
* taken and dropped again before returning.
* - if the flags value does NOT contain DIO_LOCKING we don't use any
* internal locking but rather rely on the filesystem to synchronize
@@ -1094,7 +1094,7 @@ static inline int drop_refcount(struct dio *dio)
* counter before starting direct I/O, and decrement it once we are done.
* Truncate can wait for it to reach zero to provide exclusion. It is
* expected that filesystem provide exclusion between new direct I/O
- * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
+ * and truncates. For DIO_LOCKING filesystems this is done by i_rwsem,
* but other filesystems need to take care of this on their own.
*
* NOTE: if you pass "sdio" to anything by pointer make sure that function
@@ -1279,7 +1279,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
/*
* All block lookups have been performed. For READ requests
- * we can let i_mutex go now that its achieved its purpose
+ * we can let i_rwsem go now that its achieved its purpose
* of protecting us from looking up uninitialized blocks.
*/
if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 493d7f194956..bd317d943d62 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -635,10 +635,10 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
rd.old_mnt_idmap = &nop_mnt_idmap;
- rd.old_dir = d_inode(lower_old_dir_dentry);
+ rd.old_parent = lower_old_dir_dentry;
rd.old_dentry = lower_old_dentry;
rd.new_mnt_idmap = &nop_mnt_idmap;
- rd.new_dir = d_inode(lower_new_dir_dentry);
+ rd.new_parent = lower_new_dir_dentry;
rd.new_dentry = lower_new_dentry;
rc = vfs_rename(&rd);
if (rc)
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8dd1d7189c3b..eab1beb846d3 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -20,6 +20,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fs_stack.h>
+#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include "ecryptfs_kernel.h"
@@ -471,7 +472,7 @@ static int ecryptfs_get_tree(struct fs_context *fc)
sbi = NULL;
s->s_op = &ecryptfs_sops;
s->s_xattr = ecryptfs_xattr_handlers;
- s->s_d_op = &ecryptfs_dops;
+ set_default_d_op(s, &ecryptfs_dops);
err = "Reading sb failed";
rc = kern_path(fc->source, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
@@ -764,7 +765,7 @@ static struct kobject *ecryptfs_kobj;
static ssize_t version_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
+ return sysfs_emit(buff, "%d\n", ECRYPTFS_VERSIONING_MASK);
}
static struct kobj_attribute version_attr = __ATTR_RO(version);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 60f0ac8744b5..2c2b12fedeae 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -228,7 +228,7 @@ out:
/**
* ecryptfs_write_begin
- * @file: The eCryptfs file
+ * @iocb: I/O control block for the eCryptfs file
* @mapping: The eCryptfs object
* @pos: The file offset at which to start writing
* @len: Length of the write
@@ -239,7 +239,7 @@ out:
*
* Returns zero on success; non-zero otherwise
*/
-static int ecryptfs_write_begin(struct file *file,
+static int ecryptfs_write_begin(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
@@ -322,7 +322,7 @@ static int ecryptfs_write_begin(struct file *file,
* Note, this will increase i_size. */
if (index != 0) {
if (prev_page_end_size > i_size_read(mapping->host)) {
- rc = ecryptfs_truncate(file->f_path.dentry,
+ rc = ecryptfs_truncate(iocb->ki_filp->f_path.dentry,
prev_page_end_size);
if (rc) {
printk(KERN_ERR "%s: Error on attempt to "
@@ -429,7 +429,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
/**
* ecryptfs_write_end
- * @file: The eCryptfs file object
+ * @iocb: I/O control block for the eCryptfs file
* @mapping: The eCryptfs object
* @pos: The file position
* @len: The length of the data (unused)
@@ -437,7 +437,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
* @folio: The eCryptfs folio
* @fsdata: The fsdata (unused)
*/
-static int ecryptfs_write_end(struct file *file,
+static int ecryptfs_write_end(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index c900d98bf494..c4a139911356 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -183,7 +183,6 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
static const struct dentry_operations efivarfs_d_ops = {
.d_compare = efivarfs_d_compare,
.d_hash = efivarfs_d_hash,
- .d_delete = always_delete_dentry,
};
static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
@@ -350,7 +349,8 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = EFIVARFS_MAGIC;
sb->s_op = &efivarfs_ops;
- sb->s_d_op = &efivarfs_d_ops;
+ set_default_d_op(sb, &efivarfs_d_ops);
+ sb->s_d_flags |= DCACHE_DONTCACHE;
sb->s_time_gran = 1;
if (!efivar_supports_writes())
@@ -390,10 +390,16 @@ static int efivarfs_reconfigure(struct fs_context *fc)
return 0;
}
+static void efivarfs_free(struct fs_context *fc)
+{
+ kfree(fc->s_fs_info);
+}
+
static const struct fs_context_operations efivarfs_context_ops = {
.get_tree = efivarfs_get_tree,
.parse_param = efivarfs_parse_param,
.reconfigure = efivarfs_reconfigure,
+ .free = efivarfs_free,
};
static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 6beeb7063871..7b26efc271ee 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -147,6 +147,8 @@ config EROFS_FS_ZIP_ZSTD
config EROFS_FS_ZIP_ACCEL
bool "EROFS hardware decompression support"
depends on EROFS_FS_ZIP
+ select CRYPTO
+ select CRYPTO_DEFLATE
help
Saying Y here includes hardware accelerator support for reading
EROFS file systems containing compressed data. It gives better
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 6a329c329f43..f46c47335b9c 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -49,11 +49,18 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
return buf->base + (offset & ~PAGE_MASK);
}
-void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
+int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
+ bool in_metabox)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
buf->file = NULL;
+ if (in_metabox) {
+ if (unlikely(!sbi->metabox_inode))
+ return -EFSCORRUPTED;
+ buf->mapping = sbi->metabox_inode->i_mapping;
+ return 0;
+ }
buf->off = sbi->dif0.fsoff;
if (erofs_is_fileio_mode(sbi)) {
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
@@ -62,13 +69,18 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
buf->mapping = sbi->dif0.fscache->inode->i_mapping;
else
buf->mapping = sb->s_bdev->bd_mapping;
+ return 0;
}
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
- erofs_off_t offset, bool need_kmap)
+ erofs_off_t offset, bool in_metabox)
{
- erofs_init_metabuf(buf, sb);
- return erofs_bread(buf, offset, need_kmap);
+ int err;
+
+ err = erofs_init_metabuf(buf, sb, in_metabox);
+ if (err)
+ return ERR_PTR(err);
+ return erofs_bread(buf, offset, true);
}
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
@@ -118,7 +130,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;
- idx = erofs_read_metabuf(&buf, sb, pos, true);
+ idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode));
if (IS_ERR(idx)) {
err = PTR_ERR(idx);
goto out;
@@ -214,9 +226,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/*
* bit 30: I/O error occurred on this folio
+ * bit 29: CPU has dirty data in D-cache (needs aliasing handling);
* bit 0 - 29: remaining parts to complete this folio
*/
-#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+#define EROFS_ONLINEFOLIO_EIO 30
+#define EROFS_ONLINEFOLIO_DIRTY 29
void erofs_onlinefolio_init(struct folio *folio)
{
@@ -233,19 +247,23 @@ void erofs_onlinefolio_split(struct folio *folio)
atomic_inc((atomic_t *)&folio->private);
}
-void erofs_onlinefolio_end(struct folio *folio, int err)
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
{
int orig, v;
do {
orig = atomic_read((atomic_t *)&folio->private);
- v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ DBG_BUGON(orig <= 0);
+ v = dirty << EROFS_ONLINEFOLIO_DIRTY;
+ v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
- if (v & ~EROFS_ONLINEFOLIO_EIO)
+ if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
return;
folio->private = 0;
- folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+ if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
+ flush_dcache_folio(folio);
+ folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -258,51 +276,51 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
map.m_la = offset;
map.m_llen = length;
-
ret = erofs_map_blocks(inode, &map);
if (ret < 0)
return ret;
- mdev = (struct erofs_map_dev) {
- .m_deviceid = map.m_deviceid,
- .m_pa = map.m_pa,
- };
- ret = erofs_map_dev(sb, &mdev);
- if (ret)
- return ret;
-
iomap->offset = map.m_la;
- if (flags & IOMAP_DAX)
- iomap->dax_dev = mdev.m_dif->dax_dev;
- else
- iomap->bdev = mdev.m_bdev;
iomap->length = map.m_llen;
iomap->flags = 0;
iomap->private = NULL;
-
+ iomap->addr = IOMAP_NULL_ADDR;
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
iomap->type = IOMAP_HOLE;
- iomap->addr = IOMAP_NULL_ADDR;
- if (!iomap->length)
- iomap->length = length;
return 0;
}
+ if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
+ mdev = (struct erofs_map_dev) {
+ .m_deviceid = map.m_deviceid,
+ .m_pa = map.m_pa,
+ };
+ ret = erofs_map_dev(sb, &mdev);
+ if (ret)
+ return ret;
+
+ if (flags & IOMAP_DAX)
+ iomap->dax_dev = mdev.m_dif->dax_dev;
+ else
+ iomap->bdev = mdev.m_bdev;
+ iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
+ if (flags & IOMAP_DAX)
+ iomap->addr += mdev.m_dif->dax_part_off;
+ }
+
if (map.m_flags & EROFS_MAP_META) {
void *ptr;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
iomap->type = IOMAP_INLINE;
- ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true);
+ ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
+ erofs_inode_in_metabox(inode));
if (IS_ERR(ptr))
return PTR_ERR(ptr);
iomap->inline_data = ptr;
iomap->private = buf.base;
} else {
iomap->type = IOMAP_MAPPED;
- iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
- if (flags & IOMAP_DAX)
- iomap->addr += mdev.m_dif->dax_part_off;
}
return 0;
}
@@ -351,11 +369,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
+ trace_erofs_read_folio(folio, true);
+
return iomap_read_folio(folio, &erofs_iomap_ops);
}
static void erofs_readahead(struct readahead_control *rac)
{
+ trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
+ readahead_count(rac), true);
+
return iomap_readahead(rac, &erofs_iomap_ops);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index bf62e2836b60..354762c9723f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
cur = min(cur, rq->outputsize);
if (cur && rq->out[0]) {
kin = kmap_local_page(rq->in[nrpages_in - 1]);
- if (rq->out[0] == rq->in[nrpages_in - 1]) {
+ if (rq->out[0] == rq->in[nrpages_in - 1])
memmove(kin + rq->pageofs_out, kin + pi, cur);
- flush_dcache_page(rq->out[0]);
- } else {
+ else
memcpy_to_page(rq->out[0], rq->pageofs_out,
kin + pi, cur);
- }
kunmap_local(kin);
}
rq->outputsize -= cur;
@@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
DBG_BUGON(no >= nrpages_out);
cnt = min(insz - pi, PAGE_SIZE - po);
- if (rq->out[no] == rq->in[ni]) {
+ if (rq->out[no] == rq->in[ni])
memmove(kin + po,
kin + rq->pageofs_in + pi, cnt);
- flush_dcache_page(rq->out[no]);
- } else if (rq->out[no]) {
+ else if (rq->out[no])
memcpy_to_page(rq->out[no], po,
kin + rq->pageofs_in + pi, cnt);
- }
pi += cnt;
} while (pi < insz);
kunmap_local(kin);
@@ -471,7 +467,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
return -EOPNOTSUPP;
}
- erofs_init_metabuf(&buf, sb);
+ (void)erofs_init_metabuf(&buf, sb, false);
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
alg = 0;
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 2fae209d0274..debf469ad6bd 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -34,7 +34,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
}
if (!dir_emit(ctx, de_name, de_namelen,
- le64_to_cpu(de->nid), d_type))
+ erofs_nid_to_ino64(EROFS_SB(dir->i_sb),
+ le64_to_cpu(de->nid)), d_type))
return 1;
++de;
ctx->pos += sizeof(struct erofs_dirent);
@@ -47,8 +48,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct inode *dir = file_inode(f);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct super_block *sb = dir->i_sb;
+ struct file_ra_state *ra = &f->f_ra;
unsigned long bsz = sb->s_blocksize;
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
+ pgoff_t ra_pages = DIV_ROUND_UP_POW2(
+ EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE);
+ pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE);
int err = 0;
bool initial = true;
@@ -58,6 +63,21 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
+ /* readahead blocks to enhance performance for large directories */
+ if (ra_pages) {
+ pgoff_t idx = DIV_ROUND_UP_POW2(ctx->pos, PAGE_SIZE);
+ pgoff_t pages = min(nr_pages - idx, ra_pages);
+
+ if (pages > 1 && !ra_has_index(ra, idx))
+ page_cache_sync_readahead(dir->i_mapping, ra,
+ f, idx, pages);
+ }
+
de = erofs_bread(&buf, dbstart, true);
if (IS_ERR(de)) {
erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",
@@ -88,6 +108,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
break;
ctx->pos = dbstart + maxsize;
ofs = 0;
+ cond_resched();
}
erofs_put_metabuf(&buf);
if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) {
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 767fb4acdc93..377ee12b8b96 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -15,6 +15,7 @@
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
+#define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -31,8 +32,9 @@
#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040
#define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080
+#define EROFS_FEATURE_INCOMPAT_METABOX 0x00000100
#define EROFS_ALL_FEATURE_INCOMPAT \
- ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1)
+ ((EROFS_FEATURE_INCOMPAT_METABOX << 1) - 1)
#define EROFS_SB_EXTSLOT_SIZE 16
@@ -46,7 +48,7 @@ struct erofs_deviceslot {
};
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
-/* erofs on-disk super block (currently 128 bytes) */
+/* erofs on-disk super block (currently 144 bytes at maximum) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
__le32 checksum; /* crc32c to avoid unexpected on-disk overlap */
@@ -82,7 +84,9 @@ struct erofs_super_block {
__u8 reserved[3];
__le32 build_time; /* seconds added to epoch for mkfs time */
__le64 rootnid_8b; /* (48BIT on) nid of root directory */
- __u8 reserved2[8];
+ __le64 reserved2;
+ __le64 metabox_nid; /* (METABOX on) nid of the metabox inode */
+ __le64 reserved3; /* [align to extslot 1] */
};
/*
@@ -267,6 +271,9 @@ struct erofs_inode_chunk_index {
__le32 startblk_lo; /* starting block number of this chunk */
};
+#define EROFS_DIRENT_NID_METABOX_BIT 63
+#define EROFS_DIRENT_NID_MASK (BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT) - 1)
+
/* dirent sorts in alphabet order, thus we can do binary search */
struct erofs_dirent {
__le64 nid; /* node number */
@@ -434,7 +441,7 @@ static inline void erofs_check_ondisk_layout_definitions(void)
.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT
};
- BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+ BUILD_BUG_ON(sizeof(struct erofs_super_block) != 144);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 7d81f504bff0..b7b3432a9882 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
} else {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
- erofs_onlinefolio_end(fi.folio, ret);
+ erofs_onlinefolio_end(fi.folio, ret, false);
}
}
bio_uninit(&rq->bio);
@@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
{
+ const struct cred *old_cred;
struct iov_iter iter;
int ret;
@@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
rq->iocb.ki_flags = IOCB_DIRECT;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
+ old_cred = override_creds(rq->iocb.ki_filp->f_cred);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
+ revert_creds(old_cred);
if (ret != -EIOCBQUEUED)
erofs_fileio_ki_complete(&rq->iocb, ret);
}
@@ -93,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
struct erofs_map_blocks *map = &io->map;
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
loff_t pos = folio_pos(folio), ofs;
- struct iov_iter iter;
- struct bio_vec bv;
int err = 0;
erofs_onlinefolio_init(folio);
@@ -114,18 +115,12 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
void *src;
src = erofs_read_metabuf(&buf, inode->i_sb,
- map->m_pa + ofs, true);
+ map->m_pa + ofs, erofs_inode_in_metabox(inode));
if (IS_ERR(src)) {
err = PTR_ERR(src);
break;
}
- bvec_set_folio(&bv, folio, len, cur);
- iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len);
- if (copy_to_iter(src, len, &iter) != len) {
- erofs_put_metabuf(&buf);
- err = -EIO;
- break;
- }
+ memcpy_to_folio(folio, cur, src, len);
erofs_put_metabuf(&buf);
} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, cur + len);
@@ -159,7 +154,7 @@ io_retry:
}
cur += len;
}
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 34517ca9df91..362acf828279 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -274,7 +274,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
size_t size = map.m_llen;
void *src;
- src = erofs_read_metabuf(&buf, sb, map.m_pa, true);
+ src = erofs_read_metabuf(&buf, sb, map.m_pa,
+ erofs_inode_in_metabox(inode));
if (IS_ERR(src))
return PTR_ERR(src);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index a0ae0b4f7b01..9a2f59721522 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -29,6 +29,7 @@ static int erofs_read_inode(struct inode *inode)
struct super_block *sb = inode->i_sb;
erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode));
unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode));
+ bool in_mbox = erofs_inode_in_metabox(inode);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_sb_info *sbi = EROFS_SB(sb);
erofs_blk_t addrmask = BIT_ULL(48) - 1;
@@ -39,10 +40,10 @@ static int erofs_read_inode(struct inode *inode)
void *ptr;
int err = 0;
- ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true);
+ ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
- erofs_err(sb, "failed to get inode (nid: %llu) page, err %d",
+ erofs_err(sb, "failed to read inode meta block (nid: %llu): %d",
vi->nid, err);
goto err_out;
}
@@ -78,10 +79,10 @@ static int erofs_read_inode(struct inode *inode)
memcpy(&copied, dic, gotten);
ptr = erofs_read_metabuf(&buf, sb,
- erofs_pos(sb, blkaddr + 1), true);
+ erofs_pos(sb, blkaddr + 1), in_mbox);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
- erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d",
+ erofs_err(sb, "failed to read inode payload block (nid: %llu): %d",
vi->nid, err);
goto err_out;
}
@@ -264,13 +265,13 @@ static int erofs_fill_inode(struct inode *inode)
* ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
* so that it will fit.
*/
-static ino_t erofs_squash_ino(erofs_nid_t nid)
+static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid)
{
- ino_t ino = (ino_t)nid;
+ u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid);
if (sizeof(ino_t) < sizeof(erofs_nid_t))
- ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
- return ino;
+ ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
+ return (ino_t)ino64;
}
static int erofs_iget5_eq(struct inode *inode, void *opaque)
@@ -282,7 +283,7 @@ static int erofs_iget5_set(struct inode *inode, void *opaque)
{
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
- inode->i_ino = erofs_squash_ino(nid);
+ inode->i_ino = erofs_squash_ino(inode->i_sb, nid);
EROFS_I(inode)->nid = nid;
return 0;
}
@@ -291,7 +292,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
{
struct inode *inode;
- inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq,
+ inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq,
erofs_iget5_set, &nid);
if (!inode)
return ERR_PTR(-ENOMEM);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index a32c03a80c70..4ccc5f0ee8df 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -125,6 +125,7 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
struct inode *packed_inode;
+ struct inode *metabox_inode;
struct erofs_dev_context *devs;
u64 total_blocks;
@@ -148,6 +149,7 @@ struct erofs_sb_info {
/* what we really care is nid, rather than ino.. */
erofs_nid_t root_nid;
erofs_nid_t packed_nid;
+ erofs_nid_t metabox_nid;
/* used for statfs, f_files - f_favail */
u64 inos;
@@ -157,6 +159,7 @@ struct erofs_sb_info {
/* sysfs support */
struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
struct completion s_kobj_unregister;
+ erofs_off_t dir_ra_bytes;
/* fscache support */
struct fscache_volume *volume;
@@ -227,8 +230,27 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT)
+EROFS_FEATURE_FUNCS(metabox, incompat, INCOMPAT_METABOX)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
+EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX)
+
+static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
+{
+ if (!erofs_sb_has_metabox(sbi))
+ return nid;
+
+ /*
+ * When metadata compression is enabled, avoid generating excessively
+ * large inode numbers for metadata-compressed inodes. Shift NIDs in
+ * the 31-62 bit range left by one and move the metabox flag to bit 31.
+ *
+ * Note: on-disk NIDs remain unchanged as they are primarily used for
+ * compatibility with non-LFS 32-bit applications.
+ */
+ return ((nid << 1) & GENMASK_ULL(63, 32)) | (nid & GENMASK(30, 0)) |
+ ((nid >> EROFS_DIRENT_NID_METABOX_BIT) << 31);
+}
/* atomic flag definitions */
#define EROFS_I_EA_INITED_BIT 0
@@ -238,6 +260,9 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
#define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1)
#define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2)
+/* default readahead size of directories */
+#define EROFS_DIR_RA_BYTES 16384
+
struct erofs_inode {
erofs_nid_t nid;
@@ -279,12 +304,20 @@ struct erofs_inode {
#define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode)
+static inline bool erofs_inode_in_metabox(struct inode *inode)
+{
+ return EROFS_I(inode)->nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT);
+}
+
static inline erofs_off_t erofs_iloc(struct inode *inode)
{
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+ erofs_nid_t nid_lo = EROFS_I(inode)->nid & EROFS_DIRENT_NID_MASK;
+ if (erofs_inode_in_metabox(inode))
+ return nid_lo << sbi->islotbits;
return erofs_pos(inode->i_sb, sbi->meta_blkaddr) +
- (EROFS_I(inode)->nid << sbi->islotbits);
+ (nid_lo << sbi->islotbits);
}
static inline unsigned int erofs_inode_version(unsigned int ifmt)
@@ -315,10 +348,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as,
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED 0x0008
/* Located in the special packed inode */
-#define EROFS_MAP_FRAGMENT 0x0010
+#define __EROFS_MAP_FRAGMENT 0x0010
/* The extent refers to partial decompressed data */
#define EROFS_MAP_PARTIAL_REF 0x0020
+#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT)
+
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -381,16 +416,17 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap);
-void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb);
+int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
+ bool in_metabox);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
- erofs_off_t offset, bool need_kmap);
+ erofs_off_t offset, bool in_metabox);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
-void erofs_onlinefolio_end(struct folio *folio, int err);
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index e1e9f06e8342..e1020aa60771 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_deviceslot *dis;
struct file *file;
- dis = erofs_read_metabuf(buf, sb, *pos, true);
+ dis = erofs_read_metabuf(buf, sb, *pos, false);
if (IS_ERR(dis))
return PTR_ERR(dis);
@@ -258,7 +258,7 @@ static int erofs_read_superblock(struct super_block *sb)
void *data;
int ret;
- data = erofs_read_metabuf(&buf, sb, 0, true);
+ data = erofs_read_metabuf(&buf, sb, 0, false);
if (IS_ERR(data)) {
erofs_err(sb, "cannot read erofs superblock");
return PTR_ERR(data);
@@ -319,6 +319,14 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b);
}
sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
+ if (erofs_sb_has_metabox(sbi)) {
+ if (sbi->sb_size <= offsetof(struct erofs_super_block,
+ metabox_nid))
+ return -EFSCORRUPTED;
+ sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid);
+ if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT))
+ return -EFSCORRUPTED; /* self-loop detection */
+ }
sbi->inos = le64_to_cpu(dsb->inos);
sbi->epoch = (s64)le64_to_cpu(dsb->epoch);
@@ -335,6 +343,8 @@ static int erofs_read_superblock(struct super_block *sb)
if (erofs_sb_has_48bit(sbi))
erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!");
+ if (erofs_sb_has_metabox(sbi))
+ erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!");
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!");
out:
@@ -690,6 +700,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return PTR_ERR(inode);
sbi->packed_inode = inode;
}
+ if (erofs_sb_has_metabox(sbi)) {
+ inode = erofs_iget(sb, sbi->metabox_nid);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ sbi->metabox_inode = inode;
+ }
inode = erofs_iget(sb, sbi->root_nid);
if (IS_ERR(inode))
@@ -715,6 +731,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
+ sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES;
erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid);
return 0;
}
@@ -845,6 +862,8 @@ static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi)
{
iput(sbi->packed_inode);
sbi->packed_inode = NULL;
+ iput(sbi->metabox_inode);
+ sbi->metabox_inode = NULL;
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
sbi->managed_cache = NULL;
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index eed8797a193f..1e0658a1d95b 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -65,12 +65,14 @@ EROFS_ATTR_FUNC(drop_caches, 0200);
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
EROFS_ATTR_FUNC(accel, 0644);
#endif
+EROFS_ATTR_RW_UI(dir_ra_bytes, erofs_sb_info);
static struct attribute *erofs_sb_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP
ATTR_LIST(sync_decompress),
ATTR_LIST(drop_caches),
#endif
+ ATTR_LIST(dir_ra_bytes),
NULL,
};
ATTRIBUTE_GROUPS(erofs_sb);
@@ -95,6 +97,7 @@ EROFS_ATTR_FEATURE(ztailpacking);
EROFS_ATTR_FEATURE(fragments);
EROFS_ATTR_FEATURE(dedupe);
EROFS_ATTR_FEATURE(48bit);
+EROFS_ATTR_FEATURE(metabox);
static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
@@ -108,6 +111,7 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(fragments),
ATTR_LIST(dedupe),
ATTR_LIST(48bit),
+ ATTR_LIST(metabox),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 9cf84717a92e..eaa9efd766ee 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -72,12 +72,14 @@ static int erofs_init_inode_xattrs(struct inode *inode)
ret = -EFSCORRUPTED;
goto out_unlock; /* xattr ondisk layout error */
}
- ret = -ENOATTR;
+ ret = -ENODATA;
goto out_unlock;
}
it.buf = __EROFS_BUF_INITIALIZER;
- erofs_init_metabuf(&it.buf, sb);
+ ret = erofs_init_metabuf(&it.buf, sb, erofs_inode_in_metabox(inode));
+ if (ret)
+ goto out_unlock;
it.pos = erofs_iloc(inode) + vi->inode_isize;
/* read in shared xattr array (non-atomic, see kmalloc below) */
@@ -266,20 +268,20 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it)
(entry.e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
- return -ENOATTR;
+ return -ENODATA;
if (it->index != pf->prefix->base_index ||
it->name.len != entry.e_name_len + pf->infix_len)
- return -ENOATTR;
+ return -ENODATA;
if (memcmp(it->name.name, pf->prefix->infix, pf->infix_len))
- return -ENOATTR;
+ return -ENODATA;
it->infix_len = pf->infix_len;
} else {
if (it->index != entry.e_name_index ||
it->name.len != entry.e_name_len)
- return -ENOATTR;
+ return -ENODATA;
it->infix_len = 0;
}
@@ -295,7 +297,7 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it)
entry.e_name_len - processed);
if (memcmp(it->name.name + it->infix_len + processed,
it->kaddr, slice))
- return -ENOATTR;
+ return -ENODATA;
it->pos += slice;
}
@@ -323,9 +325,12 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it,
sizeof(u32) * vi->xattr_shared_count;
if (xattr_header_sz >= vi->xattr_isize) {
DBG_BUGON(xattr_header_sz > vi->xattr_isize);
- return -ENOATTR;
+ return -ENODATA;
}
+ ret = erofs_init_metabuf(&it->buf, it->sb, erofs_inode_in_metabox(inode));
+ if (ret)
+ return ret;
remaining = vi->xattr_isize - xattr_header_sz;
it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz;
@@ -347,7 +352,7 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it,
ret = erofs_getxattr_foreach(it);
else
ret = erofs_listxattr_foreach(it);
- if ((getxattr && ret != -ENOATTR) || (!getxattr && ret))
+ if ((getxattr && ret != -ENODATA) || (!getxattr && ret))
break;
it->pos = next_pos;
@@ -361,12 +366,17 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it,
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = it->sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
- unsigned int i;
- int ret = -ENOATTR;
+ unsigned int i = 0;
+ int ret;
- for (i = 0; i < vi->xattr_shared_count; ++i) {
+ ret = erofs_init_metabuf(&it->buf, sb,
+ erofs_sb_has_shared_ea_in_metabox(sbi));
+ if (ret)
+ return ret;
+
+ while (i < vi->xattr_shared_count) {
it->pos = erofs_pos(sb, sbi->xattr_blkaddr) +
- vi->xattr_shared_xattrs[i] * sizeof(__le32);
+ vi->xattr_shared_xattrs[i++] * sizeof(__le32);
it->kaddr = erofs_bread(&it->buf, it->pos, true);
if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
@@ -375,10 +385,10 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it,
ret = erofs_getxattr_foreach(it);
else
ret = erofs_listxattr_foreach(it);
- if ((getxattr && ret != -ENOATTR) || (!getxattr && ret))
+ if ((getxattr && ret != -ENODATA) || (!getxattr && ret))
break;
}
- return ret;
+ return i ? ret : -ENODATA;
}
int erofs_getxattr(struct inode *inode, int index, const char *name,
@@ -403,7 +413,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
EROFS_XATTR_FILTER_SEED + index);
hashbit &= EROFS_XATTR_FILTER_BITS - 1;
if (vi->xattr_name_filter & (1U << hashbit))
- return -ENOATTR;
+ return -ENODATA;
}
it.index = index;
@@ -413,13 +423,12 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
it.sb = inode->i_sb;
it.buf = __EROFS_BUF_INITIALIZER;
- erofs_init_metabuf(&it.buf, it.sb);
it.buffer = buffer;
it.buffer_size = buffer_size;
it.buffer_ofs = 0;
ret = erofs_xattr_iter_inline(&it, inode, true);
- if (ret == -ENOATTR)
+ if (ret == -ENODATA)
ret = erofs_xattr_iter_shared(&it, inode, true);
erofs_put_metabuf(&it.buf);
return ret ? ret : it.buffer_ofs;
@@ -432,23 +441,22 @@ ssize_t erofs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
ret = erofs_init_inode_xattrs(inode);
- if (ret == -ENOATTR)
+ if (ret == -ENODATA)
return 0;
if (ret)
return ret;
it.sb = dentry->d_sb;
it.buf = __EROFS_BUF_INITIALIZER;
- erofs_init_metabuf(&it.buf, it.sb);
it.dentry = dentry;
it.buffer = buffer;
it.buffer_size = buffer_size;
it.buffer_ofs = 0;
ret = erofs_xattr_iter_inline(&it, inode, false);
- if (!ret || ret == -ENOATTR)
+ if (!ret || ret == -ENODATA)
ret = erofs_xattr_iter_shared(&it, inode, false);
- if (ret == -ENOATTR)
+ if (ret == -ENODATA)
ret = 0;
erofs_put_metabuf(&it.buf);
return ret ? ret : it.buffer_ofs;
@@ -485,7 +493,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (sbi->packed_inode)
buf.mapping = sbi->packed_inode->i_mapping;
else
- erofs_init_metabuf(&buf, sb);
+ (void)erofs_init_metabuf(&buf, sb, false);
for (i = 0; i < sbi->xattr_prefix_count; i++) {
void *ptr = erofs_read_metadata(sb, &buf, &pos, &len);
@@ -539,7 +547,7 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu)
rc = erofs_getxattr(inode, prefix, "", value, rc);
}
- if (rc == -ENOATTR)
+ if (rc == -ENODATA)
acl = NULL;
else if (rc < 0)
acl = ERR_PTR(rc);
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index b246cd0e135e..6317caa8413e 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -10,9 +10,6 @@
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
-/* Attribute not found */
-#define ENOATTR ENODATA
-
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct xattr_handler erofs_xattr_user_handler;
extern const struct xattr_handler erofs_xattr_trusted_handler;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index fe8071844724..792f20888a8f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -805,6 +805,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
struct z_erofs_pcluster *pcl = NULL;
+ void *ptr;
int ret;
DBG_BUGON(fe->pcl);
@@ -854,15 +855,17 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
/* bind cache first when cached decompression is preferred */
z_erofs_bind_cache(fe);
} else {
- void *mptr;
-
- mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false);
- if (IS_ERR(mptr)) {
- ret = PTR_ERR(mptr);
- erofs_err(sb, "failed to get inline data %d", ret);
+ ret = erofs_init_metabuf(&map->buf, sb,
+ erofs_inode_in_metabox(fe->inode));
+ if (ret)
+ return ret;
+ ptr = erofs_bread(&map->buf, map->m_pa, false);
+ if (IS_ERR(ptr)) {
+ ret = PTR_ERR(ptr);
+ erofs_err(sb, "failed to get inline folio %d", ret);
return ret;
}
- get_page(map->buf.page);
+ folio_get(page_folio(map->buf.page));
WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
@@ -1034,7 +1037,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, end);
tight = false;
- } else if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ } else if (map->m_flags & __EROFS_MAP_FRAGMENT) {
erofs_off_t fpos = offset + cur - map->m_la;
err = z_erofs_read_fragment(inode->i_sb, folio, cur,
@@ -1091,7 +1094,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
tight = (bs == PAGE_SIZE);
}
} while ((end = cur) > 0);
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
@@ -1196,7 +1199,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
cur += len;
}
kunmap_local(dst);
- erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
+ erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true);
list_del(p);
kfree(bvi);
}
@@ -1325,9 +1328,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
/* must handle all compressed pages before actual file pages */
if (pcl->from_meta) {
- page = pcl->compressed_bvecs[0].page;
+ folio_put(page_folio(pcl->compressed_bvecs[0].page));
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
- put_page(page);
} else {
/* managed folios are still left in compressed_bvecs[] */
for (i = 0; i < pclusterpages; ++i) {
@@ -1355,7 +1357,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- erofs_onlinefolio_end(page_folio(page), err);
+ erofs_onlinefolio_end(page_folio(page), err, true);
continue;
}
if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) {
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 14ea47f954f5..a93efd95c555 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -17,7 +17,7 @@ struct z_erofs_maprecorder {
u16 delta[2];
erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
- bool partialref;
+ bool partialref, in_mbox;
};
static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
@@ -31,7 +31,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
struct z_erofs_lcluster_index *di;
unsigned int advise;
- di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true);
+ di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
if (IS_ERR(di))
return PTR_ERR(di);
m->lcn = lcn;
@@ -146,7 +146,7 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
else
return -EOPNOTSUPP;
- in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true);
+ in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
if (IS_ERR(in))
return PTR_ERR(in);
@@ -240,6 +240,13 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
+ if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
+ erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu",
+ m->type, lcn, EROFS_I(m->inode)->nid);
+ DBG_BUGON(1);
+ return -EOPNOTSUPP;
+ }
+
switch (EROFS_I(m->inode)->datalayout) {
case EROFS_INODE_COMPRESSED_FULL:
return z_erofs_load_full_lcluster(m, lcn);
@@ -265,12 +272,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
if (err)
return err;
- if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
- erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
- m->type, lcn, vi->nid);
- DBG_BUGON(1);
- return -EOPNOTSUPP;
- } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+ if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
lookback_distance = m->delta[0];
if (!lookback_distance)
break;
@@ -325,25 +327,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
DBG_BUGON(lcn == initial_lcn &&
m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
- if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
- if (m->delta[0] != 1) {
- erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- if (m->compressedblks)
- goto out;
- } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
- /*
- * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
- * rather than CBLKCNT, it's a 1 block-sized pcluster.
- */
- m->compressedblks = 1;
- goto out;
+ if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) {
+ erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
- erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
+
+ /*
+ * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather
+ * than CBLKCNT, it's a 1 block-sized pcluster.
+ */
+ if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks)
+ m->compressedblks = 1;
out:
m->map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
@@ -379,11 +374,6 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
if (lcn != headlcn)
break; /* ends at the next HEAD lcluster */
m->delta[1] = 1;
- } else {
- erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
- m->type, lcn, vi->nid);
- DBG_BUGON(1);
- return -EOPNOTSUPP;
}
lcn += m->delta[1];
}
@@ -402,6 +392,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
+ .in_mbox = erofs_inode_in_metabox(inode),
};
int err = 0;
unsigned int endoff, afmt;
@@ -413,8 +404,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
- map->m_flags = EROFS_MAP_MAPPED |
- EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
return 0;
}
initial_lcn = ofs >> lclusterbits;
@@ -429,44 +419,33 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;
- switch (m.type) {
- case Z_EROFS_LCLUSTER_TYPE_PLAIN:
- case Z_EROFS_LCLUSTER_TYPE_HEAD1:
- case Z_EROFS_LCLUSTER_TYPE_HEAD2:
- if (endoff >= m.clusterofs) {
- m.headtype = m.type;
- map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
- /*
- * For ztailpacking files, in order to inline data more
- * effectively, special EOF lclusters are now supported
- * which can have three parts at most.
- */
- if (ztailpacking && end > inode->i_size)
- end = inode->i_size;
- break;
- }
- /* m.lcn should be >= 1 if endoff < m.clusterofs */
- if (!m.lcn) {
- erofs_err(sb, "invalid logical cluster 0 at nid %llu",
- vi->nid);
- err = -EFSCORRUPTED;
- goto unmap_out;
+ if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) {
+ m.headtype = m.type;
+ map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ /*
+ * For ztailpacking files, in order to inline data more
+ * effectively, special EOF lclusters are now supported
+ * which can have three parts at most.
+ */
+ if (ztailpacking && end > inode->i_size)
+ end = inode->i_size;
+ } else {
+ if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+ /* m.lcn should be >= 1 if endoff < m.clusterofs */
+ if (!m.lcn) {
+ erofs_err(sb, "invalid logical cluster 0 at nid %llu",
+ vi->nid);
+ err = -EFSCORRUPTED;
+ goto unmap_out;
+ }
+ end = (m.lcn << lclusterbits) | m.clusterofs;
+ map->m_flags |= EROFS_MAP_FULL_MAPPED;
+ m.delta[0] = 1;
}
- end = (m.lcn << lclusterbits) | m.clusterofs;
- map->m_flags |= EROFS_MAP_FULL_MAPPED;
- m.delta[0] = 1;
- fallthrough;
- case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
/* get the corresponding first chunk */
err = z_erofs_extent_lookback(&m, m.delta[0]);
if (err)
goto unmap_out;
- break;
- default:
- erofs_err(sb, "unknown type %u @ offset %llu of nid %llu",
- m.type, ofs, vi->nid);
- err = -EOPNOTSUPP;
- goto unmap_out;
}
if (m.partialref)
map->m_flags |= EROFS_MAP_PARTIAL_REF;
@@ -489,7 +468,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
goto unmap_out;
}
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
- map->m_flags |= EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
} else {
map->m_pa = erofs_pos(sb, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -543,6 +522,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
unsigned int recsz = z_erofs_extent_recsize(vi->z_advise);
erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
vi->inode_isize + vi->xattr_isize), recsz);
+ bool in_mbox = erofs_inode_in_metabox(inode);
erofs_off_t lend = inode->i_size;
erofs_off_t l, r, mid, pa, la, lstart;
struct z_erofs_extent *ext;
@@ -552,7 +532,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
map->m_flags = 0;
if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) {
if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) {
- ext = erofs_read_metabuf(&map->buf, sb, pos, true);
+ ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
if (IS_ERR(ext))
return PTR_ERR(ext);
pa = le64_to_cpu(*(__le64 *)ext);
@@ -565,7 +545,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
}
for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) {
- ext = erofs_read_metabuf(&map->buf, sb, pos, true);
+ ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
if (IS_ERR(ext))
return PTR_ERR(ext);
map->m_plen = le32_to_cpu(ext->plen);
@@ -585,7 +565,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
for (l = 0, r = vi->z_extents; l < r; ) {
mid = l + (r - l) / 2;
ext = erofs_read_metabuf(&map->buf, sb,
- pos + mid * recsz, true);
+ pos + mid * recsz, in_mbox);
if (IS_ERR(ext))
return PTR_ERR(ext);
@@ -597,6 +577,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (la > map->m_la) {
r = mid;
+ if (la > lend) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
lend = la;
} else {
l = mid + 1;
@@ -613,7 +597,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (lstart < lend) {
map->m_la = lstart;
if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
- map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
vi->z_fragmentoff = map->m_plen;
if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
@@ -635,22 +619,15 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
}
}
map->m_llen = lend - map->m_la;
- if (!last && map->m_llen < sb->s_blocksize) {
- erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu",
- map->m_llen, map->m_la, vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
return 0;
}
-static int z_erofs_fill_inode_lazy(struct inode *inode)
+static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = inode->i_sb;
int err, headnr;
erofs_off_t pos;
- struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct z_erofs_map_header *h;
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
@@ -670,7 +647,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_unlock;
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
- h = erofs_read_metabuf(&buf, sb, pos, true);
+ h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode));
if (IS_ERR(h)) {
err = PTR_ERR(h);
goto out_unlock;
@@ -708,7 +685,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
err = -EOPNOTSUPP;
- goto out_put_metabuf;
+ goto out_unlock;
}
if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
@@ -717,7 +694,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto out_put_metabuf;
+ goto out_unlock;
}
if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
@@ -725,27 +702,25 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto out_put_metabuf;
+ goto out_unlock;
}
if (vi->z_idata_size ||
(vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
- struct erofs_map_blocks map = {
+ struct erofs_map_blocks tm = {
.buf = __EROFS_BUF_INITIALIZER
};
- err = z_erofs_map_blocks_fo(inode, &map,
+ err = z_erofs_map_blocks_fo(inode, &tm,
EROFS_GET_BLOCKS_FINDTAIL);
- erofs_put_metabuf(&map.buf);
+ erofs_put_metabuf(&tm.buf);
if (err < 0)
- goto out_put_metabuf;
+ goto out_unlock;
}
done:
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
-out_put_metabuf:
- erofs_put_metabuf(&buf);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
@@ -763,7 +738,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
map->m_la = inode->i_size;
map->m_flags = 0;
} else {
- err = z_erofs_fill_inode_lazy(inode);
+ err = z_erofs_fill_inode(inode, map);
if (!err) {
if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
(vi->z_advise & Z_EROFS_ADVISE_EXTENTS))
@@ -799,7 +774,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
- iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
+ iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d4dbffdedd08..b22d6f819f78 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,6 +218,7 @@ struct eventpoll {
/* used to optimize loop detection check */
u64 gen;
struct hlist_head refs;
+ u8 loop_check_depth;
/*
* usage count, used together with epitem->dying to
@@ -883,7 +884,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
kfree_rcu(epi, rcu);
percpu_counter_dec(&ep->user->epoll_watches);
- return ep_refcount_dec_and_test(ep);
+ return true;
}
/*
@@ -891,14 +892,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
*/
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
{
- WARN_ON_ONCE(__ep_remove(ep, epi, false));
+ if (__ep_remove(ep, epi, false))
+ WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
}
static void ep_clear_and_put(struct eventpoll *ep)
{
struct rb_node *rbp, *next;
struct epitem *epi;
- bool dispose;
/* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait))
@@ -931,10 +932,8 @@ static void ep_clear_and_put(struct eventpoll *ep)
cond_resched();
}
- dispose = ep_refcount_dec_and_test(ep);
mutex_unlock(&ep->mtx);
-
- if (dispose)
+ if (ep_refcount_dec_and_test(ep))
ep_free(ep);
}
@@ -1137,7 +1136,7 @@ again:
dispose = __ep_remove(ep, epi, true);
mutex_unlock(&ep->mtx);
- if (dispose)
+ if (dispose && ep_refcount_dec_and_test(ep))
ep_free(ep);
goto again;
}
@@ -2142,23 +2141,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
}
/**
- * ep_loop_check_proc - verify that adding an epoll file inside another
- * epoll structure does not violate the constraints, in
- * terms of closed loops, or too deep chains (which can
- * result in excessive stack usage).
+ * ep_loop_check_proc - verify that adding an epoll file @ep inside another
+ * epoll file does not create closed loops, and
+ * determine the depth of the subtree starting at @ep
*
* @ep: the &struct eventpoll to be currently checked.
* @depth: Current depth of the path being checked.
*
- * Return: %zero if adding the epoll @file inside current epoll
- * structure @ep does not violate the constraints, or %-1 otherwise.
+ * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep.
*/
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
{
- int error = 0;
+ int result = 0;
struct rb_node *rbp;
struct epitem *epi;
+ if (ep->gen == loop_check_gen)
+ return ep->loop_check_depth;
+
mutex_lock_nested(&ep->mtx, depth + 1);
ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
@@ -2166,13 +2166,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
if (unlikely(is_file_epoll(epi->ffd.file))) {
struct eventpoll *ep_tovisit;
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->gen == loop_check_gen)
- continue;
if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
- error = -1;
+ result = INT_MAX;
else
- error = ep_loop_check_proc(ep_tovisit, depth + 1);
- if (error != 0)
+ result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1);
+ if (result > EP_MAX_NESTS)
break;
} else {
/*
@@ -2186,9 +2184,25 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
list_file(epi->ffd.file);
}
}
+ ep->loop_check_depth = result;
mutex_unlock(&ep->mtx);
- return error;
+ return result;
+}
+
+/* ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards */
+static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth)
+{
+ int result = 0;
+ struct epitem *epi;
+
+ if (ep->gen == loop_check_gen)
+ return ep->loop_check_depth;
+ hlist_for_each_entry_rcu(epi, &ep->refs, fllink)
+ result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1);
+ ep->gen = loop_check_gen;
+ ep->loop_check_depth = result;
+ return result;
}
/**
@@ -2204,8 +2218,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
*/
static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to)
{
+ int depth, upwards_depth;
+
inserting_into = ep;
- return ep_loop_check_proc(to, 0);
+ /*
+ * Check how deep down we can get from @to, and whether it is possible
+ * to loop up to @ep.
+ */
+ depth = ep_loop_check_proc(to, 0);
+ if (depth > EP_MAX_NESTS)
+ return -1;
+ /* Check how far up we can go from @ep. */
+ rcu_read_lock();
+ upwards_depth = ep_get_upwards_depth_proc(ep, 0);
+ rcu_read_unlock();
+
+ return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0;
}
static void clear_tfile_check_list(void)
diff --git a/fs/exec.c b/fs/exec.c
index 1f5fdd2e096e..ba400aafd640 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
bool path_noexec(const struct path *path)
{
+ /* If it's an anonymous inode make sure that we catch any shenanigans. */
+ VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC));
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
@@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (IS_ERR(file))
return file;
+ if (path_noexec(&file->f_path))
+ return ERR_PTR(-EACCES);
+
/*
* In the past the regular type check was here. It moved to may_open() in
* 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
* an invariant that all non-regular files error out before we get here.
*/
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)))
return ERR_PTR(-EACCES);
err = exe_file_deny_write_access(file);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 841a5b18e3df..70f53edd0a10 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -532,11 +532,10 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
return blkdev_issue_flush(inode->i_sb->s_bdev);
}
-static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
+static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
{
int err;
loff_t pos;
- struct inode *inode = file_inode(file);
struct exfat_inode_info *ei = EXFAT_I(inode);
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *ops = mapping->a_ops;
@@ -551,14 +550,14 @@ static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
if (pos + len > new_valid_size)
len = new_valid_size - pos;
- err = ops->write_begin(file, mapping, pos, len, &folio, NULL);
+ err = ops->write_begin(NULL, mapping, pos, len, &folio, NULL);
if (err)
goto out;
off = offset_in_folio(folio, pos);
folio_zero_new_buffers(folio, off, off + len);
- err = ops->write_end(file, mapping, pos, len, len, folio, NULL);
+ err = ops->write_end(NULL, mapping, pos, len, len, folio, NULL);
if (err < 0)
goto out;
pos += len;
@@ -604,7 +603,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
}
if (pos > valid_size) {
- ret = exfat_extend_valid_size(file, pos);
+ ret = exfat_extend_valid_size(inode, pos);
if (ret < 0 && ret != -ENOSPC) {
exfat_err(inode->i_sb,
"write: fail to zero from %llu to %llu(%zd)",
@@ -665,7 +664,7 @@ static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
start + vma->vm_end - vma->vm_start);
if (ei->valid_size < end) {
- err = exfat_extend_valid_size(file, end);
+ err = exfat_extend_valid_size(inode, end);
if (err < 0) {
inode_unlock(inode);
return vmf_fs_error(err);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index b22c02d6000f..c10844e1e16c 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -446,9 +446,10 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int exfat_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len,
- struct folio **foliop, void **fsdata)
+static int exfat_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned int len,
+ struct folio **foliop, void **fsdata)
{
int ret;
@@ -463,15 +464,16 @@ static int exfat_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int exfat_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct folio *folio, void *fsdata)
+static int exfat_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
struct exfat_inode_info *ei = EXFAT_I(inode);
int err;
- err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (err < len)
exfat_write_failed(mapping, pos+len);
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 7ed858937d45..ea5c1334a214 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -667,9 +667,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
}
if (sbi->options.utf8)
- sb->s_d_op = &exfat_utf8_dentry_ops;
+ set_default_d_op(sb, &exfat_utf8_dentry_ops);
else
- sb->s_d_op = &exfat_dentry_ops;
+ set_default_d_op(sb, &exfat_dentry_ops);
root_inode = new_inode(sb);
if (!root_inode) {
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 402fecf90a44..b07b3b369710 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -87,7 +87,7 @@ static void ext2_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
struct inode *dir = mapping->host;
inode_inc_iversion(dir);
- block_write_end(NULL, mapping, pos, len, len, folio, NULL);
+ block_write_end(pos, len, len, folio);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 30f8201c155f..d35ca26eee3c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -915,7 +915,7 @@ static void ext2_readahead(struct readahead_control *rac)
}
static int
-ext2_write_begin(struct file *file, struct address_space *mapping,
+ext2_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
int ret;
@@ -926,13 +926,14 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int ext2_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int ext2_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
ext2_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 21df81347147..274b41a476c8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -977,7 +977,8 @@ const struct file_operations ext4_file_operations = {
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
.fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC |
- FOP_DIO_PARALLEL_WRITE,
+ FOP_DIO_PARALLEL_WRITE |
+ FOP_DONTCACHE,
};
const struct inode_operations ext4_file_inode_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be9a4cba35fd..5c7024051f1e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1252,7 +1252,8 @@ int ext4_block_write_begin(handle_t *handle, struct folio *folio,
* and the ext4_write_end(). So doing the jbd2_journal_start at the start of
* ext4_write_begin() is the right place.
*/
-static int ext4_write_begin(struct file *file, struct address_space *mapping,
+static int ext4_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -1263,7 +1264,6 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
struct folio *folio;
pgoff_t index;
unsigned from, to;
- fgf_t fgp = FGP_WRITEBEGIN;
ret = ext4_emergency_state(inode->i_sb);
if (unlikely(ret))
@@ -1287,16 +1287,14 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
}
/*
- * __filemap_get_folio() can take a long time if the
+ * write_begin_get_folio() can take a long time if the
* system is thrashing due to memory pressure, or if the folio
* is being written back. So grab it first before we start
* the transaction handle. This also allows us to allocate
* the folio (if needed) without using GFP_NOFS.
*/
retry_grab:
- fgp |= fgf_set_order(len);
- folio = __filemap_get_folio(mapping, index, fgp,
- mapping_gfp_mask(mapping));
+ folio = write_begin_get_folio(iocb, mapping, index, len);
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -1400,12 +1398,12 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
/*
* We need to pick up the new inode size which generic_commit_write gave us
- * `file' can be NULL - eg, when called from page_symlink().
+ * `iocb` can be NULL - eg, when called from page_symlink().
*
* ext4 never places buffers on inode->i_mapping->i_private_list. metadata
* buffers are managed internally.
*/
-static int ext4_write_end(struct file *file,
+static int ext4_write_end(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
@@ -1424,7 +1422,7 @@ static int ext4_write_end(struct file *file,
return ext4_write_inline_data_end(inode, pos, len, copied,
folio);
- copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ copied = block_write_end(pos, len, copied, folio);
/*
* it's important to update i_size while still holding folio lock:
* page writeout could otherwise come in and zero beyond i_size.
@@ -1510,7 +1508,7 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
} while (bh != head);
}
-static int ext4_journalled_write_end(struct file *file,
+static int ext4_journalled_write_end(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
@@ -3036,7 +3034,8 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0;
}
-static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
+static int ext4_da_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -3044,7 +3043,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct folio *folio;
pgoff_t index;
struct inode *inode = mapping->host;
- fgf_t fgp = FGP_WRITEBEGIN;
ret = ext4_emergency_state(inode->i_sb);
if (unlikely(ret))
@@ -3054,7 +3052,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
- return ext4_write_begin(file, mapping, pos,
+ return ext4_write_begin(iocb, mapping, pos,
len, foliop, fsdata);
}
*fsdata = (void *)0;
@@ -3070,9 +3068,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
}
retry:
- fgp |= fgf_set_order(len);
- folio = __filemap_get_folio(mapping, index, fgp,
- mapping_gfp_mask(mapping));
+ folio = write_begin_get_folio(iocb, mapping, index, len);
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -3144,8 +3140,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
* flag, which all that's needed to trigger page writeback.
*/
- copied = block_write_end(NULL, mapping, pos, len, copied,
- folio, NULL);
+ copied = block_write_end(pos, len, copied, folio);
new_i_size = pos + copied;
/*
@@ -3196,7 +3191,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
return copied;
}
-static int ext4_da_write_end(struct file *file,
+static int ext4_da_write_end(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
@@ -3205,7 +3200,7 @@ static int ext4_da_write_end(struct file *file,
int write_mode = (int)(unsigned long)fsdata;
if (write_mode == FALL_BACK_TO_NONDELALLOC)
- return ext4_write_end(file, mapping, pos,
+ return ext4_write_end(iocb, mapping, pos,
len, copied, folio, fsdata);
trace_ext4_da_write_end(inode, pos, len, copied);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 31e892842625..711ad80b38d0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3519,8 +3519,10 @@ reserve_block:
return 0;
}
-static int f2fs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
+static int f2fs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, struct folio **foliop,
+ void **fsdata)
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -3656,7 +3658,7 @@ fail:
return err;
}
-static int f2fs_write_end(struct file *file,
+static int f2fs_write_end(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6bd3de64f2a8..696131e655ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,6 +35,17 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
+static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+{
+ loff_t old_size = i_size_read(inode);
+
+ if (old_size >= new_size)
+ return;
+
+ /* zero or drop pages only in range of [old_size, new_size] */
+ truncate_pagecache(inode, old_size);
+}
+
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
+ filemap_invalidate_unlock(inode->i_mapping);
+
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
+
folio_lock(folio);
if (unlikely(folio->mapping != inode->i_mapping ||
folio_pos(folio) > i_size_read(inode) ||
@@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ if (attr->ia_size > old_size)
+ f2fs_zero_post_eof_page(inode, attr->ia_size);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
@@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
+ filemap_invalidate_lock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
+
+ f2fs_zero_post_eof_page(inode, offset + len);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
@@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
err = file_modified(file);
if (err)
return err;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
+ filemap_invalidate_unlock(inode->i_mapping);
return count;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1cb4cba7f961..bfe104db284e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2078,7 +2078,6 @@ write_node:
if (!__write_node_folio(folio, false, &submitted,
wbc, do_balance, io_type, NULL)) {
- folio_unlock(folio);
folio_batch_release(&fbatch);
ret = -EIO;
goto out;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3852bb66358c..9648ed097816 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -219,13 +219,14 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int fat_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int fat_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int err;
- err = cont_write_begin(file, mapping, pos, len,
+ err = cont_write_begin(iocb, mapping, pos, len,
foliop, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
if (err < 0)
@@ -233,13 +234,14 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
return err;
}
-static int fat_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int fat_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
- err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (err < len)
fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 23e9b9371ec3..0b920ee40a7f 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -646,7 +646,7 @@ static const struct inode_operations msdos_dir_inode_operations = {
static void setup(struct super_block *sb)
{
MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
- sb->s_d_op = &msdos_dentry_operations;
+ set_default_d_op(sb, &msdos_dentry_operations);
sb->s_flags |= SB_NOATIME;
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index dd910edd2404..5dbc4cbb8fce 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1187,9 +1187,9 @@ static void setup(struct super_block *sb)
{
MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
if (MSDOS_SB(sb)->options.name_check != 's')
- sb->s_d_op = &vfat_ci_dentry_ops;
+ set_default_d_op(sb, &vfat_ci_dentry_ops);
else
- sb->s_d_op = &vfat_dentry_ops;
+ set_default_d_op(sb, &vfat_dentry_ops);
}
static int vfat_fill_super(struct super_block *sb, struct fs_context *fc)
diff --git a/fs/file.c b/fs/file.c
index 3a3146664cf3..6d2275c3be9c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -197,6 +197,21 @@ static struct fdtable *alloc_fdtable(unsigned int slots_wanted)
return ERR_PTR(-EMFILE);
}
+ /*
+ * Check if the allocation size would exceed INT_MAX. kvmalloc_array()
+ * and kvmalloc() will warn if the allocation size is greater than
+ * INT_MAX, as filp_cachep objects are not __GFP_NOWARN.
+ *
+ * This can happen when sysctl_nr_open is set to a very high value and
+ * a process tries to use a file descriptor near that limit. For example,
+ * if sysctl_nr_open is set to 1073741816 (0x3ffffff8) - which is what
+ * systemd typically sets it to - then trying to use a file descriptor
+ * close to that value will require allocating a file descriptor table
+ * that exceeds 8GB in size.
+ */
+ if (unlikely(nr > INT_MAX / sizeof(struct file *)))
+ return ERR_PTR(-EMFILE);
+
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
if (!fdt)
goto out;
@@ -1198,8 +1213,12 @@ bool file_seek_cur_needs_f_lock(struct file *file)
if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared)
return false;
- VFS_WARN_ON_ONCE((file_count(file) > 1) &&
- !mutex_is_locked(&file->f_pos_lock));
+ /*
+ * Note that we are not guaranteed to be called after fdget_pos() on
+ * this file obj, in which case the caller is expected to provide the
+ * appropriate locking.
+ */
+
return true;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 138114d64307..f09d79a98111 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,17 +52,20 @@ struct backing_file {
};
};
-static inline struct backing_file *backing_file(struct file *f)
-{
- return container_of(f, struct backing_file, file);
-}
+#define backing_file(f) container_of(f, struct backing_file, file)
-struct path *backing_file_user_path(struct file *f)
+struct path *backing_file_user_path(const struct file *f)
{
return &backing_file(f)->user_path;
}
EXPORT_SYMBOL_GPL(backing_file_user_path);
+void backing_file_set_user_path(struct file *f, const struct path *path)
+{
+ backing_file(f)->user_path = *path;
+}
+EXPORT_SYMBOL_GPL(backing_file_set_user_path);
+
static inline void file_free(struct file *f)
{
security_file_free(f);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 2a730d88cc3b..bb407705603c 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs_context.h>
+#include <linux/namei.h>
#define FUSE_CTL_SUPER_MAGIC 0x65735543
@@ -212,7 +213,6 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
struct dentry *dentry;
struct inode *inode;
- BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES);
dentry = d_alloc_name(parent, name);
if (!dentry)
return NULL;
@@ -236,8 +236,6 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
inode->i_private = fc;
d_add(dentry, inode);
- fc->ctl_dentry[fc->ctl_ndents++] = dentry;
-
return dentry;
}
@@ -280,27 +278,29 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
return -ENOMEM;
}
+static void remove_one(struct dentry *dentry)
+{
+ d_inode(dentry)->i_private = NULL;
+}
+
/*
* Remove a connection from the control filesystem (if it exists).
* Caller must hold fuse_mutex
*/
void fuse_ctl_remove_conn(struct fuse_conn *fc)
{
- int i;
+ struct dentry *dentry;
+ char name[32];
if (!fuse_control_sb || fc->no_control)
return;
- for (i = fc->ctl_ndents - 1; i >= 0; i--) {
- struct dentry *dentry = fc->ctl_dentry[i];
- d_inode(dentry)->i_private = NULL;
- if (!i) {
- /* Get rid of submounts: */
- d_invalidate(dentry);
- }
- dput(dentry);
+ sprintf(name, "%u", fc->dev);
+ dentry = lookup_noperm_positive_unlocked(&QSTR(name), fuse_control_sb->s_root);
+ if (!IS_ERR(dentry)) {
+ simple_recursive_removal(dentry, remove_one);
+ dput(dentry); // paired with lookup_noperm_positive_unlocked()
}
- drop_nlink(d_inode(fuse_control_sb->s_root));
}
static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
@@ -346,12 +346,8 @@ static int fuse_ctl_init_fs_context(struct fs_context *fsc)
static void fuse_ctl_kill_sb(struct super_block *sb)
{
- struct fuse_conn *fc;
-
mutex_lock(&fuse_mutex);
fuse_control_sb = NULL;
- list_for_each_entry(fc, &fuse_conn_list, entry)
- fc->ctl_ndents = 0;
mutex_unlock(&fuse_mutex);
kill_litter_super(sb);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 45b4c3cc1396..2d817d7cab26 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -338,13 +338,6 @@ const struct dentry_operations fuse_dentry_operations = {
.d_automount = fuse_dentry_automount,
};
-const struct dentry_operations fuse_root_dentry_operations = {
-#if BITS_PER_LONG < 64
- .d_init = fuse_dentry_init,
- .d_release = fuse_dentry_release,
-#endif
-};
-
int fuse_valid_type(int m)
{
return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f102afc03359..2ddfb3bb6483 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
- unsigned int max_pages)
+ unsigned int max_folios)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
@@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
int err = 0;
num = min(iov_iter_count(ii), fc->max_write);
- num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
- while (num) {
+ while (num && ap->num_folios < max_folios) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
@@ -2213,10 +2212,13 @@ out:
* It's worthy to make sure that space is reserved on disk for the write,
* but how to implement it without killing performance need more thinking.
*/
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
+static int fuse_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, struct folio **foliop,
+ void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
+ struct file *file = iocb->ki_filp;
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
struct folio *folio;
loff_t fsize;
@@ -2256,9 +2258,10 @@ error:
return err;
}
-static int fuse_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int fuse_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = folio->mapping->host;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b54f4f57789f..989f37abe138 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -913,12 +913,6 @@ struct fuse_conn {
/** Device ID from the root super block */
dev_t dev;
- /** Dentries in the control filesystem */
- struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
-
- /** number of dentries used in the above array */
- int ctl_ndents;
-
/** Key for lock owner ID scrambling */
u32 scramble_key[4];
@@ -1109,7 +1103,6 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
extern const struct file_operations fuse_dev_operations;
extern const struct dentry_operations fuse_dentry_operations;
-extern const struct dentry_operations fuse_root_dentry_operations;
/**
* Get a filled in inode
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index bfe8d8af46f3..ecb869e895ab 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -9,6 +9,7 @@
#include "fuse_i.h"
#include "dev_uring_i.h"
+#include <linux/dax.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
@@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode)
/* Will write inode on close/munmap and in all other dirtiers */
WARN_ON(inode->i_state & I_DIRTY_INODE);
+ if (FUSE_IS_DAX(inode))
+ dax_break_layout_final(inode);
+
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) {
@@ -1715,7 +1719,7 @@ static int fuse_fill_super_submount(struct super_block *sb,
fi = get_fuse_inode(root);
fi->nlookup--;
- sb->s_d_op = &fuse_dentry_operations;
+ set_default_d_op(sb, &fuse_dentry_operations);
sb->s_root = d_make_root(root);
if (!sb->s_root)
return -ENOMEM;
@@ -1850,12 +1854,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
err = -ENOMEM;
root = fuse_get_root_inode(sb, ctx->rootmode);
- sb->s_d_op = &fuse_root_dentry_operations;
+ set_default_d_op(sb, &fuse_dentry_operations);
root_dentry = d_make_root(root);
if (!root_dentry)
goto err_dev_free;
- /* Root dentry doesn't have .d_revalidate */
- sb->s_d_op = &fuse_dentry_operations;
mutex_lock(&fuse_mutex);
err = -EINVAL;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index dbf1aede744c..509e2f0d97e7 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,6 +60,7 @@
#include <linux/crc32.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
+#include <linux/log2.h>
#include "gfs2.h"
#include "incore.h"
@@ -912,7 +913,6 @@ static int dir_make_exhash(struct inode *inode)
struct qstr args;
struct buffer_head *bh, *dibh;
struct gfs2_leaf *leaf;
- int y;
u32 x;
__be64 *lp;
u64 bn;
@@ -979,9 +979,7 @@ static int dir_make_exhash(struct inode *inode)
i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
gfs2_add_inode_blocks(&dip->i_inode, 1);
dip->i_diskflags |= GFS2_DIF_EXHASH;
-
- for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
- dip->i_depth = y;
+ dip->i_depth = ilog2(sdp->sd_hash_ptrs);
gfs2_dinode_out(dip, dibh->b_data);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ba25b884169e..b6fd1cb17de7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -590,35 +590,31 @@ static void gfs2_demote_wake(struct gfs2_glock *gl)
static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- struct gfs2_holder *gh;
- unsigned state = ret & LM_OUT_ST_MASK;
- trace_gfs2_glock_state_change(gl, state);
- state_change(gl, state);
- gh = find_first_waiter(gl);
+ if (!(ret & ~LM_OUT_ST_MASK)) {
+ unsigned state = ret & LM_OUT_ST_MASK;
+
+ trace_gfs2_glock_state_change(gl, state);
+ state_change(gl, state);
+ }
+
/* Demote to UN request arrived during demote to SH or DF */
if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
- state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
+ gl->gl_state != LM_ST_UNLOCKED &&
+ gl->gl_demote_state == LM_ST_UNLOCKED)
gl->gl_target = LM_ST_UNLOCKED;
/* Check for state != intended state */
- if (unlikely(state != gl->gl_target)) {
- if (gh && (ret & LM_OUT_CANCELED))
- gfs2_holder_wake(gh);
+ if (unlikely(gl->gl_state != gl->gl_target)) {
+ struct gfs2_holder *gh = find_first_waiter(gl);
+
if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
if (ret & LM_OUT_CANCELED) {
list_del_init(&gh->gh_list);
trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
gl->gl_target = gl->gl_state;
- gh = find_first_waiter(gl);
- if (gh) {
- gl->gl_target = gh->gh_state;
- if (do_promote(gl))
- goto out;
- do_xmote(gl, gh, gl->gl_target);
- return;
- }
goto out;
}
/* Some error or failed "try lock" - report it */
@@ -629,7 +625,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
goto out;
}
}
- switch(state) {
+ switch(gl->gl_state) {
/* Unlocked due to conversion deadlock, try again */
case LM_ST_UNLOCKED:
do_xmote(gl, gh, gl->gl_target);
@@ -640,8 +636,10 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
do_xmote(gl, gh, LM_ST_UNLOCKED);
break;
default: /* Everything else */
- fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
- gl->gl_target, state);
+ fs_err(gl->gl_name.ln_sbd,
+ "glock %u:%llu requested=%u ret=%u\n",
+ gl->gl_name.ln_type, gl->gl_name.ln_number,
+ gl->gl_req, ret);
GLOCK_BUG_ON(gl, 1);
}
return;
@@ -650,7 +648,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
/* Fast path - we got what we asked for */
if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
gfs2_demote_wake(gl);
- if (state != LM_ST_UNLOCKED) {
+ if (gl->gl_state != LM_ST_UNLOCKED) {
if (glops->go_xmote_bh) {
int rv;
@@ -802,7 +800,8 @@ skip_inval:
* We skip telling dlm to do the locking, so we won't get a
* reply that would otherwise clear GLF_LOCK. So we clear it here.
*/
- clear_bit(GLF_LOCK, &gl->gl_flags);
+ if (!test_bit(GLF_CANCELING, &gl->gl_flags))
+ clear_bit(GLF_LOCK, &gl->gl_flags);
clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
return;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c171f745650f..9339a3bff6ee 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -92,12 +92,22 @@ enum {
* LM_OUT_ST_MASK
* Masks the lower two bits of lock state in the returned value.
*
+ * LM_OUT_TRY_AGAIN
+ * The trylock request failed.
+ *
+ * LM_OUT_DEADLOCK
+ * The lock request failed because it would deadlock.
+ *
* LM_OUT_CANCELED
* The lock request was canceled.
*
+ * LM_OUT_ERROR
+ * The lock request timed out or failed.
*/
#define LM_OUT_ST_MASK 0x00000003
+#define LM_OUT_TRY_AGAIN 0x00000020
+#define LM_OUT_DEADLOCK 0x00000010
#define LM_OUT_CANCELED 0x00000008
#define LM_OUT_ERROR 0x00000004
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cebd66b22694..fe0faad4892f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -11,6 +11,7 @@
#include <linux/bio.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
+#include <linux/log2.h>
#include "gfs2.h"
#include "incore.h"
@@ -450,6 +451,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
gfs2_consist_inode(ip);
return -EIO;
}
+ if ((ip->i_diskflags & GFS2_DIF_EXHASH) &&
+ depth < ilog2(sdp->sd_hash_ptrs)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0a41c4e76b32..d4ad82f47eee 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -375,7 +375,6 @@ struct gfs2_glock {
enum {
GIF_QD_LOCKED = 1,
- GIF_ALLOC_FAILED = 2,
GIF_SW_PAGED = 3,
GIF_FREE_VFS_INODE = 5,
GIF_GLOP_PENDING = 6,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 187d789a8f1e..8760e7e20c9d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -444,11 +444,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
struct inode *inode = &ip->i_inode;
struct gfs2_glock *gl = ip->i_gl;
- if (unlikely(!gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ /* This can only happen during incomplete inode creation. */
+ if (unlikely(!gl))
return;
- }
truncate_inode_pages(gfs2_glock2aspace(gl), 0);
truncate_inode_pages(&inode->i_data, 0);
@@ -902,7 +900,6 @@ fail_gunlock3:
fail_gunlock2:
gfs2_glock_put(io_gl);
fail_dealloc_inode:
- set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
dealloc_error = 0;
if (ip->i_eattr)
dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index eafe123617e6..811a0bd3792c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -44,17 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip)
static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks)
{
- inode->i_blocks = blocks << (inode->i_blkbits - 9);
+ inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT);
}
static inline u64 gfs2_get_inode_blocks(const struct inode *inode)
{
- return inode->i_blocks >> (inode->i_blkbits - 9);
+ return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT);
}
static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change)
{
- change <<= inode->i_blkbits - 9;
+ change <<= inode->i_blkbits - SECTOR_SHIFT;
gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change));
inode->i_blocks += change;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 7cb9d216d8bb..cee5d199d2d8 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -119,7 +119,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl)
static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
- unsigned ret = gl->gl_state;
+ unsigned ret;
/* If the glock is dead, we only react to a dlm_unlock() reply. */
if (__lockref_is_dead(&gl->gl_lockref) &&
@@ -139,13 +139,16 @@ static void gdlm_ast(void *arg)
gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
- ret |= LM_OUT_CANCELED;
+ ret = LM_OUT_CANCELED;
goto out;
case -EAGAIN: /* Try lock fails */
+ ret = LM_OUT_TRY_AGAIN;
+ goto out;
case -EDEADLK: /* Deadlock detected */
+ ret = LM_OUT_DEADLOCK;
goto out;
case -ETIMEDOUT: /* Canceled due to timeout */
- ret |= LM_OUT_ERROR;
+ ret = LM_OUT_ERROR;
goto out;
case 0: /* Success */
break;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 9dc8885c95d0..7fb11ff71b5a 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -103,6 +103,7 @@ const struct address_space_operations gfs2_meta_aops = {
.invalidate_folio = block_invalidate_folio,
.writepages = gfs2_aspace_writepages,
.release_folio = gfs2_release_folio,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
const struct address_space_operations gfs2_rgrp_aops = {
@@ -110,6 +111,7 @@ const struct address_space_operations gfs2_rgrp_aops = {
.invalidate_folio = block_invalidate_folio,
.writepages = gfs2_aspace_writepages,
.release_folio = gfs2_release_folio,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
/**
@@ -228,7 +230,7 @@ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num)
struct bio *bio;
bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO);
- bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT);
while (num > 0) {
bh = *bhs;
if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) {
@@ -443,11 +445,9 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
struct buffer_head *bh;
int ty;
- if (!ip->i_gl) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ /* This can only happen during incomplete inode creation. */
+ if (!ip->i_gl)
return;
- }
gfs2_ail1_wipe(sdp, bstart, blen);
while (blen) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 85c491fcf1a3..efe99b732551 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -163,7 +163,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ if (sb->sb_bsize < SECTOR_SIZE || sb->sb_bsize > PAGE_SIZE ||
(sb->sb_bsize & (sb->sb_bsize - 1))) {
pr_warn("Invalid block size\n");
return -EINVAL;
@@ -224,8 +224,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
if (unlikely(!sb))
return -ENOMEM;
err = bdev_rw_virt(sdp->sd_vfs->s_bdev,
- sector * (sdp->sd_vfs->s_blocksize >> 9), sb, PAGE_SIZE,
- REQ_OP_READ | REQ_META);
+ sector << (sdp->sd_vfs->s_blocksize_bits - SECTOR_SHIFT),
+ sb, PAGE_SIZE, REQ_OP_READ | REQ_META);
if (err) {
pr_warn("error %d reading superblock\n", err);
kfree(sb);
@@ -257,7 +257,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
return error;
}
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9;
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT;
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode)) / sizeof(u64);
@@ -1145,7 +1145,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
- sb->s_d_op = &gfs2_dops;
+ set_default_d_op(sb, &gfs2_dops);
sb->s_export_op = &gfs2_export_ops;
sb->s_qcop = &gfs2_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@ -1155,12 +1155,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
- sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512);
+ sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, SECTOR_SIZE);
error = -EINVAL;
if (!sdp->sd_sb.sb_bsize)
goto fail_free;
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9;
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT;
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 7c518c4ff638..b42e2110084b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -487,11 +487,9 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
- if (unlikely(!ip->i_gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ /* This can only happen during incomplete inode creation. */
+ if (unlikely(!ip->i_gl))
return;
- }
if (gfs2_withdrawing_or_withdrawn(sdp))
return;
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index d5a1e63fa257..24864a66074b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -232,32 +232,23 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
*/
ret = gfs2_glock_nq(&sdp->sd_live_gh);
+ gfs2_glock_put(live_gl); /* drop extra reference we acquired */
+ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+
/*
* If we actually got the "live" lock in EX mode, there are no other
- * nodes available to replay our journal. So we try to replay it
- * ourselves. We hold the "live" glock to prevent other mounters
- * during recovery, then just dequeue it and reacquire it in our
- * normal SH mode. Just in case the problem that caused us to
- * withdraw prevents us from recovering our journal (e.g. io errors
- * and such) we still check if the journal is clean before proceeding
- * but we may wait forever until another mounter does the recovery.
+ * nodes available to replay our journal.
*/
if (ret == 0) {
- fs_warn(sdp, "No other mounters found. Trying to recover our "
- "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
- if (gfs2_recover_journal(sdp->sd_jdesc, 1))
- fs_warn(sdp, "Unable to recover our journal jid %d.\n",
- sdp->sd_lockstruct.ls_jid);
- gfs2_glock_dq_wait(&sdp->sd_live_gh);
- gfs2_holder_reinit(LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
- &sdp->sd_live_gh);
- gfs2_glock_nq(&sdp->sd_live_gh);
+ fs_warn(sdp, "No other mounters found.\n");
+ /*
+ * We are about to release the lockspace. By keeping live_gl
+ * locked here, we ensure that the next mounter coming along
+ * will be a "first" mounter which will perform recovery.
+ */
+ goto skip_recovery;
}
- gfs2_glock_put(live_gl); /* drop extra reference we acquired */
- clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
-
/*
* At this point our journal is evicted, so we need to get a new inode
* for it. Once done, we need to call gfs2_find_jhead which
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index a0c7cb0f79fc..c3fd3172fdd6 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -201,7 +201,7 @@ extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern const struct address_space_operations hfs_aops;
extern const struct address_space_operations hfs_btree_aops;
-int hfs_write_begin(struct file *file, struct address_space *mapping,
+int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len, struct folio **foliop, void **fsdata);
extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t);
extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a81ce7a740b9..096f338134f9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -44,12 +44,12 @@ static void hfs_write_failed(struct address_space *mapping, loff_t to)
}
}
-int hfs_write_begin(struct file *file, struct address_space *mapping,
+int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
int ret;
- ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
+ ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
hfs_get_block,
&HFS_I(mapping->host)->phys_size);
if (unlikely(ret))
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index fe09c2093a93..388a318297ec 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -365,7 +365,7 @@ static int hfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (!root_inode)
goto bail_no_root;
- sb->s_d_op = &hfs_dentry_operations;
+ set_default_d_op(sb, &hfs_dentry_operations);
res = -ENOMEM;
sb->s_root = d_make_root(root_inode);
if (!sb->s_root)
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 2f089bff0095..3d5c65aef3b2 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -473,8 +473,10 @@ extern const struct address_space_operations hfsplus_aops;
extern const struct address_space_operations hfsplus_btree_aops;
extern const struct dentry_operations hfsplus_dentry_operations;
-int hfsplus_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct folio **foliop, void **fsdata);
+int hfsplus_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, struct folio **foliop,
+ void **fsdata);
struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
umode_t mode);
void hfsplus_delete_inode(struct inode *inode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index f331e9574217..97d75bb2c388 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -38,12 +38,14 @@ static void hfsplus_write_failed(struct address_space *mapping, loff_t to)
}
}
-int hfsplus_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
+int hfsplus_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, struct folio **foliop,
+ void **fsdata)
{
int ret;
- ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
+ ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
hfsplus_get_block,
&HFSPLUS_I(mapping->host)->phys_size);
if (unlikely(ret))
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 948b8aaee33e..0caf7aa1c249 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -508,7 +508,7 @@ static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc)
goto out_put_alloc_file;
}
- sb->s_d_op = &hfsplus_dentry_operations;
+ set_default_d_op(sb, &hfsplus_dentry_operations);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
err = -ENOMEM;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 702c41317589..2ac60d5d7314 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -445,7 +445,8 @@ static int hostfs_read_folio(struct file *file, struct folio *folio)
return ret;
}
-static int hostfs_write_begin(struct file *file, struct address_space *mapping,
+static int hostfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -458,7 +459,8 @@ static int hostfs_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
-static int hostfs_write_end(struct file *file, struct address_space *mapping,
+static int hostfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
@@ -468,7 +470,7 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
int err;
buffer = kmap_local_folio(folio, from);
- err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer, copied);
+ err = write_file(FILE_HOSTFS_I(iocb->ki_filp)->fd, &pos, buffer, copied);
kunmap_local(buffer);
if (!folio_test_uptodate(folio) && err == folio_size(folio))
@@ -933,7 +935,7 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = 10;
sb->s_magic = HOSTFS_SUPER_MAGIC;
sb->s_op = &hostfs_sbops;
- sb->s_d_op = &simple_dentry_operations;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_maxbytes = MAX_LFS_FILESIZE;
err = super_setup_bdi(sb);
if (err)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 449a3fc1b8d9..7b95a3d2e2a6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -188,13 +188,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
hpfs_unlock(inode->i_sb);
}
-static int hpfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int hpfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
+ ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
hpfs_get_block,
&hpfs_i(mapping->host)->mmu_private);
if (unlikely(ret))
@@ -203,13 +204,14 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int hpfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int hpfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
- err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ err = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (err < len)
hpfs_write_failed(mapping, pos + len);
if (!(err < 0)) {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 27567920abe4..42b779b4d87f 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -554,7 +554,7 @@ static int hpfs_fill_super(struct super_block *s, struct fs_context *fc)
/* Fill superblock stuff */
s->s_magic = HPFS_SUPER_MAGIC;
s->s_op = &hpfs_sops;
- s->s_d_op = &hpfs_dentry_operations;
+ set_default_d_op(s, &hpfs_dentry_operations);
s->s_time_min = local_to_gmt(s, 0);
s->s_time_max = local_to_gmt(s, U32_MAX);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e4de5425838d..9ddd67da0eeb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -311,7 +311,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval;
}
-static int hugetlbfs_write_begin(struct file *file,
+static int hugetlbfs_write_begin(const struct kiocb *iocb,
struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
@@ -319,9 +319,10 @@ static int hugetlbfs_write_begin(struct file *file,
return -EINVAL;
}
-static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int hugetlbfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
BUG();
return -EINVAL;
@@ -1433,6 +1434,7 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_time_gran = 1;
/*
@@ -1587,7 +1589,7 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
} else {
struct hugetlbfs_fs_context *ctx = fc->fs_private;
ctx->hstate = h;
- mnt = fc_mount(fc);
+ mnt = fc_mount_longterm(fc);
put_fs_context(fc);
}
if (IS_ERR(mnt))
diff --git a/fs/inode.c b/fs/inode.c
index 99318b157a9a..01ebdc40021e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -865,12 +865,12 @@ static void dispose_list(struct list_head *head)
*/
void evict_inodes(struct super_block *sb)
{
- struct inode *inode, *next;
+ struct inode *inode;
LIST_HEAD(dispose);
again:
spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count))
continue;
@@ -1158,9 +1158,8 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
/* Set new key only if filesystem hasn't already changed it */
if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
/*
- * ensure nobody is actually holding i_mutex
+ * ensure nobody is actually holding i_rwsem
*/
- // mutex_destroy(&inode->i_mutex);
init_rwsem(&inode->i_rwsem);
lockdep_set_class(&inode->i_rwsem,
&type->i_mutex_dir_key);
@@ -2615,7 +2614,7 @@ EXPORT_SYMBOL(inode_dio_finished);
* proceed with a truncate or equivalent operation.
*
* Must be called under a lock that serializes taking new references
- * to i_dio_count, usually by inode->i_mutex.
+ * to i_dio_count, usually by inode->i_rwsem.
*/
void inode_dio_wait(struct inode *inode)
{
@@ -2633,7 +2632,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible);
/*
* inode_set_flags - atomically set some inode flags
*
- * Note: the caller should be holding i_mutex, or else be sure that
+ * Note: the caller should be holding i_rwsem exclusively, or else be sure that
* they have exclusive access to the inode structure (i.e., while the
* inode is being instantiated). The reason for the cmpxchg() loop
* --- which wouldn't be necessary if all code paths which modify
@@ -2641,7 +2640,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible);
* code path which doesn't today so we use cmpxchg() out of an abundance
* of caution.
*
- * In the long run, i_mutex is overkill, and we should probably look
+ * In the long run, i_rwsem is overkill, and we should probably look
* at using the i_lock spinlock to protect i_flags, and then make sure
* it is so documented in include/linux/fs.h and that all code follows
* the locking convention!!
diff --git a/fs/internal.h b/fs/internal.h
index 393f6c5c24f6..d733d8bb3d1f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,6 +101,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
struct file *alloc_empty_file(int flags, const struct cred *cred);
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
+void backing_file_set_user_path(struct file *f, const struct path *path);
static inline void file_put_write_access(struct file *file)
{
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 3729391a18f3..8f29be98a46e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -71,6 +71,9 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
unsigned long flags;
bool uptodate = true;
+ if (folio_test_uptodate(folio))
+ return;
+
if (ifs) {
spin_lock_irqsave(&ifs->state_lock, flags);
uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
@@ -923,8 +926,7 @@ static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
size_t bh_written;
- bh_written = block_write_end(NULL, iter->inode->i_mapping, pos,
- len, copied, folio, NULL);
+ bh_written = block_write_end(pos, len, copied, folio);
WARN_ON_ONCE(bh_written != copied && bh_written != 0);
return bh_written == copied;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d5da9817df9b..6f0e6b19383c 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -939,7 +939,7 @@ root_found:
sbi->s_check = opt->check;
if (table)
- s->s_d_op = &isofs_dentry_ops[table - 1];
+ set_default_d_op(s, &isofs_dentry_ops[table - 1]);
/* get the root dentry */
s->s_root = d_make_root(inode);
@@ -1440,9 +1440,16 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_op = &page_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
- } else
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ } else {
+ printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %lu.\n",
+ inode->i_mode, inode->i_ino);
+ ret = -EIO;
+ goto fail;
+ }
ret = 0;
out:
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 13c18ccc13b0..adec3af9bf8d 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -21,12 +21,14 @@
#include <linux/jffs2.h>
#include "nodelist.h"
-static int jffs2_write_end(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata);
-static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata);
+static int jffs2_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata);
+static int jffs2_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata);
static int jffs2_read_folio(struct file *filp, struct folio *folio);
int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
@@ -121,9 +123,10 @@ static int jffs2_read_folio(struct file *file, struct folio *folio)
return ret;
}
-static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int jffs2_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
struct folio *folio;
struct inode *inode = mapping->host;
@@ -235,9 +238,10 @@ out_err:
return ret;
}
-static int jffs2_write_end(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int jffs2_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
/* Actually commit the write from the page cache page we're looking at.
* For now, we write the full page out each time. It sucks, but it's simple
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 60fc92dee24d..083e7fa54709 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -290,9 +290,10 @@ static void jfs_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int jfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int jfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
@@ -303,13 +304,14 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int jfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct folio *folio,
- void *fsdata)
+static int jfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
jfs_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 10368c188c5e..3cfb86c5a36e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -542,7 +542,7 @@ static int jfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_magic = JFS_SUPER_MAGIC;
if (sbi->mntflag & JFS_OS2)
- sb->s_d_op = &jfs_ci_dentry_operations;
+ set_default_d_op(sb, &jfs_ci_dentry_operations);
inode = jfs_iget(sb, ROOT_I);
if (IS_ERR(inode)) {
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index c1719b5778a1..e384a69fbece 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -318,7 +318,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
return -ENOMEM;
}
sb->s_root = root;
- sb->s_d_op = &kernfs_dops;
+ set_default_d_op(sb, &kernfs_dops);
return 0;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a8..67bd8eea4af1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -62,11 +62,6 @@ int always_delete_dentry(const struct dentry *dentry)
}
EXPORT_SYMBOL(always_delete_dentry);
-const struct dentry_operations simple_dentry_operations = {
- .d_delete = always_delete_dentry,
-};
-EXPORT_SYMBOL(simple_dentry_operations);
-
/*
* Lookup the data. This is trivial - if the dentry didn't already
* exist, we know it is negative. Set d_op to delete negative dentries.
@@ -75,9 +70,11 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
{
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- if (!dentry->d_sb->s_d_op)
- d_set_d_op(dentry, &simple_dentry_operations);
-
+ if (!dentry->d_op && !(dentry->d_flags & DCACHE_DONTCACHE)) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_DONTCACHE;
+ spin_unlock(&dentry->d_lock);
+ }
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
return NULL;
@@ -605,15 +602,16 @@ struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
}
EXPORT_SYMBOL(find_next_child);
-void simple_recursive_removal(struct dentry *dentry,
- void (*callback)(struct dentry *))
+static void __simple_recursive_removal(struct dentry *dentry,
+ void (*callback)(struct dentry *),
+ bool locked)
{
struct dentry *this = dget(dentry);
while (true) {
struct dentry *victim = NULL, *child;
struct inode *inode = this->d_inode;
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
if (d_is_dir(this))
inode->i_flags |= S_DEAD;
while ((child = find_next_child(this, victim)) == NULL) {
@@ -625,15 +623,13 @@ void simple_recursive_removal(struct dentry *dentry,
victim = this;
this = this->d_parent;
inode = this->d_inode;
- inode_lock(inode);
+ if (!locked || victim != dentry)
+ inode_lock_nested(inode, I_MUTEX_CHILD);
if (simple_positive(victim)) {
d_invalidate(victim); // avoid lost mounts
- if (d_is_dir(victim))
- fsnotify_rmdir(inode, victim);
- else
- fsnotify_unlink(inode, victim);
if (callback)
callback(victim);
+ fsnotify_delete(inode, d_inode(victim), victim);
dput(victim); // unpin it
}
if (victim == dentry) {
@@ -641,7 +637,8 @@ void simple_recursive_removal(struct dentry *dentry,
inode_set_ctime_current(inode));
if (d_is_dir(dentry))
drop_nlink(inode);
- inode_unlock(inode);
+ if (!locked)
+ inode_unlock(inode);
dput(dentry);
return;
}
@@ -650,8 +647,22 @@ void simple_recursive_removal(struct dentry *dentry,
this = child;
}
}
+
+void simple_recursive_removal(struct dentry *dentry,
+ void (*callback)(struct dentry *))
+{
+ return __simple_recursive_removal(dentry, callback, false);
+}
EXPORT_SYMBOL(simple_recursive_removal);
+/* caller holds parent directory with I_MUTEX_PARENT */
+void locked_recursive_removal(struct dentry *dentry,
+ void (*callback)(struct dentry *))
+{
+ return __simple_recursive_removal(dentry, callback, true);
+}
+EXPORT_SYMBOL(locked_recursive_removal);
+
static const struct super_operations simple_super_operations = {
.statfs = simple_statfs,
};
@@ -684,7 +695,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
s->s_root = d_make_root(root);
if (!s->s_root)
return -ENOMEM;
- s->s_d_op = ctx->dops;
+ set_default_d_op(s, ctx->dops);
return 0;
}
@@ -910,7 +921,7 @@ static int simple_read_folio(struct file *file, struct folio *folio)
return 0;
}
-int simple_write_begin(struct file *file, struct address_space *mapping,
+int simple_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -935,7 +946,7 @@ EXPORT_SYMBOL(simple_write_begin);
/**
* simple_write_end - .write_end helper for non-block-device FSes
- * @file: See .write_end of address_space_operations
+ * @iocb: kernel I/O control block
* @mapping: "
* @pos: "
* @len: "
@@ -946,7 +957,8 @@ EXPORT_SYMBOL(simple_write_begin);
* simple_write_end does the minimum needed for updating a folio after
* writing is done. It has the same API signature as the .write_end of
* address_space_operations vector. So it can just be set onto .write_end for
- * FSes that don't need any other processing. i_mutex is assumed to be held.
+ * FSes that don't need any other processing. i_rwsem is assumed to be held
+ * exclusively.
* Block based filesystems should use generic_write_end().
* NOTE: Even though i_size might get updated by this function, mark_inode_dirty
* is not called, so a filesystem that actually does store data in .write_inode
@@ -955,9 +967,10 @@ EXPORT_SYMBOL(simple_write_begin);
*
* Use *ONLY* with simple_read_folio()
*/
-static int simple_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int simple_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = folio->mapping->host;
loff_t last_pos = pos + copied;
@@ -973,7 +986,7 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
}
/*
* No need to use i_size_read() here, the i_size
- * cannot change under us because we hold the i_mutex.
+ * cannot change under us because we hold the i_rwsem.
*/
if (last_pos > inode->i_size)
i_size_write(inode, last_pos);
@@ -1583,13 +1596,17 @@ EXPORT_SYMBOL(generic_file_fsync);
int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
{
u64 last_fs_block = num_blocks - 1;
- u64 last_fs_page =
- last_fs_block >> (PAGE_SHIFT - blocksize_bits);
+ u64 last_fs_page, max_bytes;
+
+ if (check_shl_overflow(num_blocks, blocksize_bits, &max_bytes))
+ return -EFBIG;
+
+ last_fs_page = (max_bytes >> PAGE_SHIFT) - 1;
if (unlikely(num_blocks == 0))
return 0;
- if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
+ if (blocksize_bits < 9)
return -EINVAL;
if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
@@ -1649,12 +1666,10 @@ struct inode *alloc_anon_inode(struct super_block *s)
*/
inode->i_state = I_DIRTY;
/*
- * Historically anonymous inodes didn't have a type at all and
- * userspace has come to rely on this. Internally they're just
- * regular files but S_IFREG is masked off when reporting
- * information to userspace.
+ * Historically anonymous inodes don't have a type at all and
+ * userspace has come to rely on this.
*/
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE | S_ANON_INODE;
@@ -1950,22 +1965,22 @@ static const struct dentry_operations generic_encrypted_dentry_ops = {
* @sb: superblock to be configured
*
* Filesystems supporting casefolding and/or fscrypt can call this
- * helper at mount-time to configure sb->s_d_op to best set of dentry
- * operations required for the enabled features. The helper must be
- * called after these have been configured, but before the root dentry
- * is created.
+ * helper at mount-time to configure default dentry_operations to the
+ * best set of dentry operations required for the enabled features.
+ * The helper must be called after these have been configured, but
+ * before the root dentry is created.
*/
void generic_set_sb_d_ops(struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
if (sb->s_encoding) {
- sb->s_d_op = &generic_ci_dentry_ops;
+ set_default_d_op(sb, &generic_ci_dentry_ops);
return;
}
#endif
#ifdef CONFIG_FS_ENCRYPTION
if (sb->s_cop) {
- sb->s_d_op = &generic_encrypted_dentry_ops;
+ set_default_d_op(sb, &generic_encrypted_dentry_ops);
return;
}
#endif
@@ -2263,3 +2278,28 @@ void stashed_dentry_prune(struct dentry *dentry)
*/
cmpxchg(stashed, dentry, NULL);
}
+
+/* parent must be held exclusive */
+struct dentry *simple_start_creating(struct dentry *parent, const char *name)
+{
+ struct dentry *dentry;
+ struct inode *dir = d_inode(parent);
+
+ inode_lock(dir);
+ if (unlikely(IS_DEADDIR(dir))) {
+ inode_unlock(dir);
+ return ERR_PTR(-ENOENT);
+ }
+ dentry = lookup_noperm(&QSTR(name), parent);
+ if (IS_ERR(dentry)) {
+ inode_unlock(dir);
+ return dentry;
+ }
+ if (dentry->d_inode) {
+ dput(dentry);
+ inode_unlock(dir);
+ return ERR_PTR(-EEXIST);
+ }
+ return dentry;
+}
+EXPORT_SYMBOL(simple_start_creating);
diff --git a/fs/locks.c b/fs/locks.c
index 1619cddfa7a4..559f02aa4172 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -712,7 +712,7 @@ static void __locks_wake_up_blocks(struct file_lock_core *blocker)
fl->fl_lmops && fl->fl_lmops->lm_notify)
fl->fl_lmops->lm_notify(fl);
else
- locks_wake_up(fl);
+ locks_wake_up_waiter(waiter);
/*
* The setting of flc_blocker to NULL marks the "done"
@@ -1794,7 +1794,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr
/*
* In the delegation case we need mutual exclusion with
- * a number of operations that take the i_mutex. We trylock
+ * a number of operations that take the i_rwsem. We trylock
* because delegations are an optional optimization, and if
* there's some chance of a conflict--we'd rather not
* bother, maybe that's a sign this just isn't a good file to
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index dd2a425b41f0..19052fc47e9e 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -45,7 +45,7 @@ static void dir_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
struct address_space *mapping = folio->mapping;
struct inode *dir = mapping->host;
- block_write_end(NULL, mapping, pos, len, len, folio, NULL);
+ block_write_end(pos, len, len, folio);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f007e389d5d2..df9d11479caf 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -442,9 +442,10 @@ static void minix_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int minix_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int minix_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
diff --git a/fs/mount.h b/fs/mount.h
index ad7173037924..97737051a8b9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -44,7 +44,6 @@ struct mountpoint {
struct hlist_node m_hash;
struct dentry *m_dentry;
struct hlist_head m_list;
- int m_count;
};
struct mount {
@@ -70,8 +69,8 @@ struct mount {
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
- struct list_head mnt_slave_list;/* list of slave mounts */
- struct list_head mnt_slave; /* slave list entry */
+ struct hlist_head mnt_slave_list;/* list of slave mounts */
+ struct hlist_node mnt_slave; /* slave list entry */
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
@@ -79,21 +78,38 @@ struct mount {
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
struct hlist_node mnt_umount;
};
- struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
struct list_head to_notify; /* need to queue notification */
struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
#endif
+ int mnt_t_flags; /* namespace_sem-protected flags */
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
struct hlist_head mnt_stuck_children;
+ struct mount *overmount; /* mounted on ->mnt_root */
} __randomize_layout;
+enum {
+ T_SHARED = 1, /* mount is shared */
+ T_UNBINDABLE = 2, /* mount is unbindable */
+ T_MARKED = 4, /* internal mark for propagate_... */
+ T_UMOUNT_CANDIDATE = 8, /* for propagate_umount */
+
+ /*
+ * T_SHARED_MASK is the set of flags that should be cleared when a
+ * mount becomes shared. Currently, this is only the flag that says a
+ * mount cannot be bind mounted, since this is how we create a mount
+ * that shares events with another mount. If you add a new T_*
+ * flag, consider how it interacts with shared mounts.
+ */
+ T_SHARED_MASK = T_UNBINDABLE,
+};
+
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
static inline struct mount *real_mount(struct vfsmount *mnt)
@@ -101,7 +117,7 @@ static inline struct mount *real_mount(struct vfsmount *mnt)
return container_of(mnt, struct mount, mnt);
}
-static inline int mnt_has_parent(struct mount *mnt)
+static inline int mnt_has_parent(const struct mount *mnt)
{
return mnt != mnt->mnt_parent;
}
@@ -146,8 +162,8 @@ struct proc_mounts {
extern const struct seq_operations mounts_op;
-extern bool __is_local_mountpoint(struct dentry *dentry);
-static inline bool is_local_mountpoint(struct dentry *dentry)
+extern bool __is_local_mountpoint(const struct dentry *dentry);
+static inline bool is_local_mountpoint(const struct dentry *dentry)
{
if (!d_mountpoint(dentry))
return false;
@@ -160,6 +176,13 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
+static inline bool anon_ns_root(const struct mount *m)
+{
+ struct mnt_namespace *ns = READ_ONCE(m->mnt_ns);
+
+ return !IS_ERR_OR_NULL(ns) && is_anon_ns(ns) && m == ns->root;
+}
+
static inline bool mnt_ns_attached(const struct mount *mnt)
{
return !RB_EMPTY_NODE(&mnt->mnt_node);
@@ -170,7 +193,7 @@ static inline bool mnt_ns_empty(const struct mnt_namespace *ns)
return RB_EMPTY_ROOT(&ns->mounts);
}
-static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
+static inline void move_from_ns(struct mount *mnt)
{
struct mnt_namespace *ns = mnt->mnt_ns;
WARN_ON(!mnt_ns_attached(mnt));
@@ -180,7 +203,6 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
ns->mnt_first_node = rb_next(&mnt->mnt_node);
rb_erase(&mnt->mnt_node, &ns->mounts);
RB_CLEAR_NODE(&mnt->mnt_node);
- list_add_tail(&mnt->mnt_list, dt_list);
}
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 4bb889fc980b..ae95bea02bbc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1469,7 +1469,7 @@ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
int ret = 0;
while (flags & DCACHE_MANAGED_DENTRY) {
- /* Allow the filesystem to manage the transit without i_mutex
+ /* Allow the filesystem to manage the transit without i_rwsem
* being held. */
if (flags & DCACHE_MANAGE_TRANSIT) {
ret = path->dentry->d_op->d_manage(path, false);
@@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap,
* @base: base directory to lookup from
*
* Look up a dentry by name in the dcache, returning NULL if it does not
- * currently exist. The function does not try to create a dentry.
+ * currently exist. The function does not try to create a dentry and if one
+ * is found it doesn't try to revalidate it.
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code. It does no permission checking.
@@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base)
if (err)
return ERR_PTR(err);
- return lookup_dcache(name, base, 0);
+ return d_lookup(base, name);
}
EXPORT_SYMBOL(try_lookup_noperm);
@@ -2945,7 +2946,7 @@ EXPORT_SYMBOL(try_lookup_noperm);
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code. It does no permission checking.
*
- * The caller must hold base->i_mutex.
+ * The caller must hold base->i_rwsem.
*/
struct dentry *lookup_noperm(struct qstr *name, struct dentry *base)
{
@@ -2971,7 +2972,7 @@ EXPORT_SYMBOL(lookup_noperm);
*
* This can be used for in-kernel filesystem clients such as file servers.
*
- * The caller must hold base->i_mutex.
+ * The caller must hold base->i_rwsem.
*/
struct dentry *lookup_one(struct mnt_idmap *idmap, struct qstr *name,
struct dentry *base)
@@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked);
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code. It does no permission checking.
*
- * Unlike lookup_noperm, it should be called without the parent
+ * Unlike lookup_noperm(), it should be called without the parent
* i_rwsem held, and will take the i_rwsem itself if necessary.
+ *
+ * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already
+ * existed.
*/
struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base)
{
struct dentry *ret;
+ int err;
- ret = try_lookup_noperm(name, base);
+ err = lookup_noperm_common(name, base);
+ if (err)
+ return ERR_PTR(err);
+
+ ret = lookup_dcache(name, base, 0);
if (!ret)
ret = lookup_slow(name, base, 0);
return ret;
@@ -3471,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return -EACCES;
break;
default:
- VFS_BUG_ON_INODE(1, inode);
+ VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
@@ -4542,13 +4551,13 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* @dentry: victim
* @delegated_inode: returns victim inode, if the inode is delegated.
*
- * The caller must hold dir->i_mutex.
+ * The caller must hold dir->i_rwsem exclusively.
*
* If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
* return a reference to the inode in delegated_inode. The caller
* should then break the delegation on that inode and retry. Because
* breaking a delegation may take a long time, the caller should drop
- * dir->i_mutex before doing so.
+ * dir->i_rwsem before doing so.
*
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
@@ -4607,7 +4616,7 @@ EXPORT_SYMBOL(vfs_unlink);
/*
* Make sure that the actual truncation of the file will occur outside its
- * directory's i_mutex. Truncate can take a long time if there is a lot of
+ * directory's i_rwsem. Truncate can take a long time if there is a lot of
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
@@ -4785,13 +4794,13 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
* @new_dentry: where to create the new link
* @delegated_inode: returns inode needing a delegation break
*
- * The caller must hold dir->i_mutex
+ * The caller must hold dir->i_rwsem exclusively.
*
* If vfs_link discovers a delegation on the to-be-linked file in need
* of breaking, it will return -EWOULDBLOCK and return a reference to the
* inode in delegated_inode. The caller should then break the delegation
* and retry. Because breaking a delegation may take a long time, the
- * caller should drop the i_mutex before doing so.
+ * caller should drop the i_rwsem before doing so.
*
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
@@ -4987,7 +4996,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* c) we may have to lock up to _four_ objects - parents and victim (if it exists),
* and source (if it's a non-directory or a subdirectory that moves to
* different parent).
- * And that - after we got ->i_mutex on parents (until then we don't know
+ * And that - after we got ->i_rwsem on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
* only under ->s_vfs_rename_mutex _and_ that parent of the object we
@@ -4999,15 +5008,16 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* has no more than 1 dentry. If "hybrid" objects will ever appear,
* we'd better make sure that there's no link(2) for them.
* d) conversion from fhandle to dentry may come in the wrong moment - when
- * we are removing the target. Solution: we will have to grab ->i_mutex
+ * we are removing the target. Solution: we will have to grab ->i_rwsem
* in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
- * ->i_mutex on parents, which works but leads to some truly excessive
+ * ->i_rwsem on parents, which works but leads to some truly excessive
* locking].
*/
int vfs_rename(struct renamedata *rd)
{
int error;
- struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
+ struct inode *old_dir = d_inode(rd->old_parent);
+ struct inode *new_dir = d_inode(rd->new_parent);
struct dentry *old_dentry = rd->old_dentry;
struct dentry *new_dentry = rd->new_dentry;
struct inode **delegated_inode = rd->delegated_inode;
@@ -5266,10 +5276,10 @@ retry_deleg:
if (error)
goto exit5;
- rd.old_dir = old_path.dentry->d_inode;
+ rd.old_parent = old_path.dentry;
rd.old_dentry = old_dentry;
rd.old_mnt_idmap = mnt_idmap(old_path.mnt);
- rd.new_dir = new_path.dentry->d_inode;
+ rd.new_parent = new_path.dentry;
rd.new_dentry = new_dentry;
rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
rd.delegated_inode = &delegated_inode;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7ca4612c7ae9..ddfd4457d338 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -79,6 +79,7 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
#ifdef CONFIG_FSNOTIFY
@@ -380,10 +381,9 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_list);
INIT_LIST_HEAD(&mnt->mnt_expire);
INIT_LIST_HEAD(&mnt->mnt_share);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
- INIT_LIST_HEAD(&mnt->mnt_slave);
+ INIT_HLIST_HEAD(&mnt->mnt_slave_list);
+ INIT_HLIST_NODE(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
- INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
RB_CLEAR_NODE(&mnt->mnt_node);
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
@@ -894,7 +894,7 @@ struct vfsmount *lookup_mnt(const struct path *path)
* namespace not just a mount that happens to have some specified
* parent mount.
*/
-bool __is_local_mountpoint(struct dentry *dentry)
+bool __is_local_mountpoint(const struct dentry *dentry)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt, *n;
@@ -911,42 +911,48 @@ bool __is_local_mountpoint(struct dentry *dentry)
return is_covered;
}
-static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
+struct pinned_mountpoint {
+ struct hlist_node node;
+ struct mountpoint *mp;
+};
+
+static bool lookup_mountpoint(struct dentry *dentry, struct pinned_mountpoint *m)
{
struct hlist_head *chain = mp_hash(dentry);
struct mountpoint *mp;
hlist_for_each_entry(mp, chain, m_hash) {
if (mp->m_dentry == dentry) {
- mp->m_count++;
- return mp;
+ hlist_add_head(&m->node, &mp->m_list);
+ m->mp = mp;
+ return true;
}
}
- return NULL;
+ return false;
}
-static struct mountpoint *get_mountpoint(struct dentry *dentry)
+static int get_mountpoint(struct dentry *dentry, struct pinned_mountpoint *m)
{
- struct mountpoint *mp, *new = NULL;
+ struct mountpoint *mp __free(kfree) = NULL;
+ bool found;
int ret;
if (d_mountpoint(dentry)) {
/* might be worth a WARN_ON() */
if (d_unlinked(dentry))
- return ERR_PTR(-ENOENT);
+ return -ENOENT;
mountpoint:
read_seqlock_excl(&mount_lock);
- mp = lookup_mountpoint(dentry);
+ found = lookup_mountpoint(dentry, m);
read_sequnlock_excl(&mount_lock);
- if (mp)
- goto done;
+ if (found)
+ return 0;
}
- if (!new)
- new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
- if (!new)
- return ERR_PTR(-ENOMEM);
-
+ if (!mp)
+ mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
/* Exactly one processes may set d_mounted */
ret = d_set_mounted(dentry);
@@ -956,34 +962,28 @@ mountpoint:
goto mountpoint;
/* The dentry is not available as a mountpoint? */
- mp = ERR_PTR(ret);
if (ret)
- goto done;
+ return ret;
/* Add the new mountpoint to the hash table */
read_seqlock_excl(&mount_lock);
- new->m_dentry = dget(dentry);
- new->m_count = 1;
- hlist_add_head(&new->m_hash, mp_hash(dentry));
- INIT_HLIST_HEAD(&new->m_list);
+ mp->m_dentry = dget(dentry);
+ hlist_add_head(&mp->m_hash, mp_hash(dentry));
+ INIT_HLIST_HEAD(&mp->m_list);
+ hlist_add_head(&m->node, &mp->m_list);
+ m->mp = no_free_ptr(mp);
read_sequnlock_excl(&mount_lock);
-
- mp = new;
- new = NULL;
-done:
- kfree(new);
- return mp;
+ return 0;
}
/*
* vfsmount lock must be held. Additionally, the caller is responsible
* for serializing calls for given disposal list.
*/
-static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
+static void maybe_free_mountpoint(struct mountpoint *mp, struct list_head *list)
{
- if (!--mp->m_count) {
+ if (hlist_empty(&mp->m_list)) {
struct dentry *dentry = mp->m_dentry;
- BUG_ON(!hlist_empty(&mp->m_list));
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
@@ -993,10 +993,15 @@ static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
}
}
-/* called with namespace_lock and vfsmount lock */
-static void put_mountpoint(struct mountpoint *mp)
+/*
+ * locks: mount_lock [read_seqlock_excl], namespace_sem [excl]
+ */
+static void unpin_mountpoint(struct pinned_mountpoint *m)
{
- __put_mountpoint(mp, &ex_mountpoints);
+ if (m->mp) {
+ hlist_del(&m->node);
+ maybe_free_mountpoint(m->mp, &ex_mountpoints);
+ }
}
static inline int check_mnt(struct mount *mnt)
@@ -1038,11 +1043,14 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
/*
- * vfsmount lock must be held for write
+ * locks: mount_lock[write_seqlock]
*/
-static struct mountpoint *unhash_mnt(struct mount *mnt)
+static void __umount_mnt(struct mount *mnt, struct list_head *shrink_list)
{
struct mountpoint *mp;
+ struct mount *parent = mnt->mnt_parent;
+ if (unlikely(parent->overmount == mnt))
+ parent->overmount = NULL;
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
@@ -1050,15 +1058,15 @@ static struct mountpoint *unhash_mnt(struct mount *mnt)
hlist_del_init(&mnt->mnt_mp_list);
mp = mnt->mnt_mp;
mnt->mnt_mp = NULL;
- return mp;
+ maybe_free_mountpoint(mp, shrink_list);
}
/*
- * vfsmount lock must be held for write
+ * locks: mount_lock[write_seqlock], namespace_sem[excl] (for ex_mountpoints)
*/
static void umount_mnt(struct mount *mnt)
{
- put_mountpoint(unhash_mnt(mnt));
+ __umount_mnt(mnt, &ex_mountpoints);
}
/*
@@ -1068,43 +1076,17 @@ void mnt_set_mountpoint(struct mount *mnt,
struct mountpoint *mp,
struct mount *child_mnt)
{
- mp->m_count++;
- mnt_add_count(mnt, 1); /* essentially, that's mntget */
child_mnt->mnt_mountpoint = mp->m_dentry;
child_mnt->mnt_parent = mnt;
child_mnt->mnt_mp = mp;
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
-/**
- * mnt_set_mountpoint_beneath - mount a mount beneath another one
- *
- * @new_parent: the source mount
- * @top_mnt: the mount beneath which @new_parent is mounted
- * @new_mp: the new mountpoint of @top_mnt on @new_parent
- *
- * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and
- * parent @top_mnt->mnt_parent and mount it on top of @new_parent at
- * @new_mp. And mount @new_parent on the old parent and old
- * mountpoint of @top_mnt.
- *
- * Context: This function expects namespace_lock() and lock_mount_hash()
- * to have been acquired in that order.
- */
-static void mnt_set_mountpoint_beneath(struct mount *new_parent,
- struct mount *top_mnt,
- struct mountpoint *new_mp)
-{
- struct mount *old_top_parent = top_mnt->mnt_parent;
- struct mountpoint *old_top_mp = top_mnt->mnt_mp;
-
- mnt_set_mountpoint(old_top_parent, old_top_mp, new_parent);
- mnt_change_mountpoint(new_parent, new_mp, top_mnt);
-}
-
-
-static void __attach_mnt(struct mount *mnt, struct mount *parent)
+static void make_visible(struct mount *mnt)
{
+ struct mount *parent = mnt->mnt_parent;
+ if (unlikely(mnt->mnt_mountpoint == parent->mnt.mnt_root))
+ parent->overmount = mnt;
hlist_add_head_rcu(&mnt->mnt_hash,
m_hash(&parent->mnt, mnt->mnt_mountpoint));
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
@@ -1116,51 +1098,34 @@ static void __attach_mnt(struct mount *mnt, struct mount *parent)
* @parent: the parent
* @mnt: the new mount
* @mp: the new mountpoint
- * @beneath: whether to mount @mnt beneath or on top of @parent
*
- * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt
+ * Mount @mnt at @mp on @parent. Then attach @mnt
* to @parent's child mount list and to @mount_hashtable.
*
- * If @beneath is true, remove @mnt from its current parent and
- * mountpoint and mount it on @mp on @parent, and mount @parent on the
- * old parent and old mountpoint of @mnt. Finally, attach @parent to
- * @mnt_hashtable and @parent->mnt_parent->mnt_mounts.
- *
- * Note, when __attach_mnt() is called @mnt->mnt_parent already points
+ * Note, when make_visible() is called @mnt->mnt_parent already points
* to the correct parent.
*
* Context: This function expects namespace_lock() and lock_mount_hash()
* to have been acquired in that order.
*/
static void attach_mnt(struct mount *mnt, struct mount *parent,
- struct mountpoint *mp, bool beneath)
+ struct mountpoint *mp)
{
- if (beneath)
- mnt_set_mountpoint_beneath(mnt, parent, mp);
- else
- mnt_set_mountpoint(parent, mp, mnt);
- /*
- * Note, @mnt->mnt_parent has to be used. If @mnt was mounted
- * beneath @parent then @mnt will need to be attached to
- * @parent's old parent, not @parent. IOW, @mnt->mnt_parent
- * isn't the same mount as @parent.
- */
- __attach_mnt(mnt, mnt->mnt_parent);
+ mnt_set_mountpoint(parent, mp, mnt);
+ make_visible(mnt);
}
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
struct mountpoint *old_mp = mnt->mnt_mp;
- struct mount *old_parent = mnt->mnt_parent;
list_del_init(&mnt->mnt_child);
hlist_del_init(&mnt->mnt_mp_list);
hlist_del_init_rcu(&mnt->mnt_hash);
- attach_mnt(mnt, parent, mp, false);
+ attach_mnt(mnt, parent, mp);
- put_mountpoint(old_mp);
- mnt_add_count(old_parent, -1);
+ maybe_free_mountpoint(old_mp, &ex_mountpoints);
}
static inline struct mount *node_to_mount(struct rb_node *node)
@@ -1197,32 +1162,6 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
mnt_notify_add(mnt);
}
-/*
- * vfsmount lock must be held for write
- */
-static void commit_tree(struct mount *mnt)
-{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
- LIST_HEAD(head);
- struct mnt_namespace *n = parent->mnt_ns;
-
- BUG_ON(parent == mnt);
-
- list_add_tail(&head, &mnt->mnt_list);
- while (!list_empty(&head)) {
- m = list_first_entry(&head, typeof(*m), mnt_list);
- list_del(&m->mnt_list);
-
- mnt_add_to_ns(n, m);
- }
- n->nr_mounts += n->pending_mounts;
- n->pending_mounts = 0;
-
- __attach_mnt(mnt, parent);
- touch_mnt_namespace(n);
-}
-
static struct mount *next_mnt(struct mount *p, struct mount *root)
{
struct list_head *next = p->mnt_mounts.next;
@@ -1249,6 +1188,27 @@ static struct mount *skip_mnt_tree(struct mount *p)
return p;
}
+/*
+ * vfsmount lock must be held for write
+ */
+static void commit_tree(struct mount *mnt)
+{
+ struct mnt_namespace *n = mnt->mnt_parent->mnt_ns;
+
+ if (!mnt_ns_attached(mnt)) {
+ for (struct mount *m = mnt; m; m = next_mnt(m, mnt))
+ if (unlikely(mnt_ns_attached(m)))
+ m = skip_mnt_tree(m);
+ else
+ mnt_add_to_ns(n, m);
+ n->nr_mounts += n->pending_mounts;
+ n->pending_mounts = 0;
+ }
+
+ make_visible(mnt);
+ touch_mnt_namespace(n);
+}
+
/**
* vfs_create_mount - Create a mount for a configured superblock
* @fc: The configuration context with the superblock attached
@@ -1296,6 +1256,15 @@ struct vfsmount *fc_mount(struct fs_context *fc)
}
EXPORT_SYMBOL(fc_mount);
+struct vfsmount *fc_mount_longterm(struct fs_context *fc)
+{
+ struct vfsmount *mnt = fc_mount(fc);
+ if (!IS_ERR(mnt))
+ real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
+ return mnt;
+}
+EXPORT_SYMBOL(fc_mount_longterm);
+
struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data)
@@ -1337,7 +1306,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (!mnt)
return ERR_PTR(-ENOMEM);
- if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
+ mnt->mnt.mnt_flags = READ_ONCE(old->mnt.mnt_flags) &
+ ~MNT_INTERNAL_FLAGS;
+
+ if (flag & (CL_SLAVE | CL_PRIVATE))
mnt->mnt_group_id = 0; /* not a peer of original */
else
mnt->mnt_group_id = old->mnt_group_id;
@@ -1348,8 +1320,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
goto out_free;
}
- mnt->mnt.mnt_flags = old->mnt.mnt_flags;
- mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
+ if (mnt->mnt_group_id)
+ set_mnt_shared(mnt);
atomic_inc(&sb->s_active);
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@@ -1362,30 +1334,19 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
unlock_mount_hash();
- if ((flag & CL_SLAVE) ||
- ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
- list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+ if (flag & CL_PRIVATE) // we are done with it
+ return mnt;
+
+ if (peers(mnt, old))
+ list_add(&mnt->mnt_share, &old->mnt_share);
+
+ if ((flag & CL_SLAVE) && old->mnt_group_id) {
+ hlist_add_head(&mnt->mnt_slave, &old->mnt_slave_list);
mnt->mnt_master = old;
- CLEAR_MNT_SHARED(mnt);
- } else if (!(flag & CL_PRIVATE)) {
- if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
- list_add(&mnt->mnt_share, &old->mnt_share);
- if (IS_MNT_SLAVE(old))
- list_add(&mnt->mnt_slave, &old->mnt_slave);
+ } else if (IS_MNT_SLAVE(old)) {
+ hlist_add_behind(&mnt->mnt_slave, &old->mnt_slave);
mnt->mnt_master = old->mnt_master;
- } else {
- CLEAR_MNT_SHARED(mnt);
- }
- if (flag & CL_MAKE_SHARED)
- set_mnt_shared(mnt);
-
- /* stick the duplicate mount on the same expiry list
- * as the original if that was on one */
- if (flag & CL_EXPIRE) {
- if (!list_empty(&old->mnt_expire))
- list_add(&mnt->mnt_expire, &old->mnt_expire);
}
-
return mnt;
out_free:
@@ -1478,11 +1439,13 @@ static void mntput_no_expire(struct mount *mnt)
rcu_read_unlock();
list_del(&mnt->mnt_instance);
+ if (unlikely(!list_empty(&mnt->mnt_expire)))
+ list_del(&mnt->mnt_expire);
if (unlikely(!list_empty(&mnt->mnt_mounts))) {
struct mount *p, *tmp;
list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
- __put_mountpoint(unhash_mnt(p), &list);
+ __umount_mnt(p, &list);
hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
}
}
@@ -1679,23 +1642,19 @@ const struct seq_operations mounts_op = {
int may_umount_tree(struct vfsmount *m)
{
struct mount *mnt = real_mount(m);
- int actual_refs = 0;
- int minimum_refs = 0;
- struct mount *p;
- BUG_ON(!m);
+ bool busy = false;
/* write lock needed for mnt_get_count */
lock_mount_hash();
- for (p = mnt; p; p = next_mnt(p, mnt)) {
- actual_refs += mnt_get_count(p);
- minimum_refs += 2;
+ for (struct mount *p = mnt; p; p = next_mnt(p, mnt)) {
+ if (mnt_get_count(p) > (p == mnt ? 2 : 1)) {
+ busy = true;
+ break;
+ }
}
unlock_mount_hash();
- if (actual_refs > minimum_refs)
- return 0;
-
- return 1;
+ return !busy;
}
EXPORT_SYMBOL(may_umount_tree);
@@ -1771,15 +1730,18 @@ static bool need_notify_mnt_list(void)
}
#endif
+static void free_mnt_ns(struct mnt_namespace *);
static void namespace_unlock(void)
{
struct hlist_head head;
struct hlist_node *p;
struct mount *m;
+ struct mnt_namespace *ns = emptied_ns;
LIST_HEAD(list);
hlist_move_list(&unmounted, &head);
list_splice_init(&ex_mountpoints, &list);
+ emptied_ns = NULL;
if (need_notify_mnt_list()) {
/*
@@ -1793,6 +1755,11 @@ static void namespace_unlock(void)
} else {
up_write(&namespace_sem);
}
+ if (unlikely(ns)) {
+ /* Make sure we notice when we leak mounts. */
+ VFS_WARN_ON_ONCE(!mnt_ns_empty(ns));
+ free_mnt_ns(ns);
+ }
shrink_dentry_list(&list);
@@ -1865,9 +1832,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
for (p = mnt; p; p = next_mnt(p, mnt)) {
p->mnt.mnt_flags |= MNT_UMOUNT;
if (mnt_ns_attached(p))
- move_from_ns(p, &tmp_list);
- else
- list_move(&p->mnt_list, &tmp_list);
+ move_from_ns(p);
+ list_add_tail(&p->mnt_list, &tmp_list);
}
/* Hide the mounts from mnt_mounts */
@@ -1896,7 +1862,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
disconnect = disconnect_mount(p, how);
if (mnt_has_parent(p)) {
- mnt_add_count(p->mnt_parent, -1);
if (!disconnect) {
/* Don't forget about p */
list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
@@ -1973,7 +1938,7 @@ static int do_umount(struct mount *mnt, int flags)
* all race cases, but it's a slowpath.
*/
lock_mount_hash();
- if (mnt_get_count(mnt) != 2) {
+ if (!list_empty(&mnt->mnt_mounts) || mnt_get_count(mnt) != 2) {
unlock_mount_hash();
return -EBUSY;
}
@@ -2019,23 +1984,27 @@ static int do_umount(struct mount *mnt, int flags)
namespace_lock();
lock_mount_hash();
- /* Recheck MNT_LOCKED with the locks held */
+ /* Repeat the earlier racy checks, now that we are holding the locks */
retval = -EINVAL;
+ if (!check_mnt(mnt))
+ goto out;
+
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto out;
+ if (!mnt_has_parent(mnt)) /* not the absolute root */
+ goto out;
+
event++;
if (flags & MNT_DETACH) {
- if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
- umount_tree(mnt, UMOUNT_PROPAGATE);
+ umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
smp_mb(); // paired with __legitimize_mnt()
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
- if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
- umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
+ umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
retval = 0;
}
}
@@ -2053,29 +2022,28 @@ out:
* detach_mounts allows lazily unmounting those mounts instead of
* leaking them.
*
- * The caller may hold dentry->d_inode->i_mutex.
+ * The caller may hold dentry->d_inode->i_rwsem.
*/
void __detach_mounts(struct dentry *dentry)
{
- struct mountpoint *mp;
+ struct pinned_mountpoint mp = {};
struct mount *mnt;
namespace_lock();
lock_mount_hash();
- mp = lookup_mountpoint(dentry);
- if (!mp)
+ if (!lookup_mountpoint(dentry, &mp))
goto out_unlock;
event++;
- while (!hlist_empty(&mp->m_list)) {
- mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
+ while (mp.node.next) {
+ mnt = hlist_entry(mp.node.next, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
umount_mnt(mnt);
hlist_add_head(&mnt->mnt_umount, &unmounted);
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
- put_mountpoint(mp);
+ unpin_mountpoint(&mp);
out_unlock:
unlock_mount_hash();
namespace_unlock();
@@ -2259,7 +2227,6 @@ struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
return dst_mnt;
src_parent = src_root;
- dst_mnt->mnt_mountpoint = src_root->mnt_mountpoint;
list_for_each_entry(src_root_child, &src_root->mnt_mounts, mnt_child) {
if (!is_subdir(src_root_child->mnt_mountpoint, dentry))
@@ -2294,8 +2261,16 @@ struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
if (IS_ERR(dst_mnt))
goto out;
lock_mount_hash();
- list_add_tail(&dst_mnt->mnt_list, &res->mnt_list);
- attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp, false);
+ if (src_mnt->mnt.mnt_flags & MNT_LOCKED)
+ dst_mnt->mnt.mnt_flags |= MNT_LOCKED;
+ if (unlikely(flag & CL_EXPIRE)) {
+ /* stick the duplicate mount on the same expiry
+ * list as the original if that was on one */
+ if (!list_empty(&src_mnt->mnt_expire))
+ list_add(&dst_mnt->mnt_expire,
+ &src_mnt->mnt_expire);
+ }
+ attach_mnt(dst_mnt, dst_parent, src_parent->mnt_mp);
unlock_mount_hash();
}
}
@@ -2310,104 +2285,94 @@ out:
return dst_mnt;
}
-/* Caller should check returned pointer for errors */
-
-struct vfsmount *collect_mounts(const struct path *path)
+static inline bool extend_array(struct path **res, struct path **to_free,
+ unsigned n, unsigned *count, unsigned new_count)
{
- struct mount *tree;
- namespace_lock();
- if (!check_mnt(real_mount(path->mnt)))
- tree = ERR_PTR(-EINVAL);
- else
- tree = copy_tree(real_mount(path->mnt), path->dentry,
- CL_COPY_ALL | CL_PRIVATE);
- namespace_unlock();
- if (IS_ERR(tree))
- return ERR_CAST(tree);
- return &tree->mnt;
-}
+ struct path *p;
-static void free_mnt_ns(struct mnt_namespace *);
-static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
+ if (likely(n < *count))
+ return true;
+ p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL);
+ if (p && *count)
+ memcpy(p, *res, *count * sizeof(struct path));
+ *count = new_count;
+ kfree(*to_free);
+ *to_free = *res = p;
+ return p;
+}
-static inline bool must_dissolve(struct mnt_namespace *mnt_ns)
+struct path *collect_paths(const struct path *path,
+ struct path *prealloc, unsigned count)
{
- /*
- * This mount belonged to an anonymous mount namespace
- * but was moved to a non-anonymous mount namespace and
- * then unmounted.
- */
- if (unlikely(!mnt_ns))
- return false;
+ struct mount *root = real_mount(path->mnt);
+ struct mount *child;
+ struct path *res = prealloc, *to_free = NULL;
+ unsigned n = 0;
- /*
- * This mount belongs to a non-anonymous mount namespace
- * and we know that such a mount can never transition to
- * an anonymous mount namespace again.
- */
- if (!is_anon_ns(mnt_ns)) {
- /*
- * A detached mount either belongs to an anonymous mount
- * namespace or a non-anonymous mount namespace. It
- * should never belong to something purely internal.
- */
- VFS_WARN_ON_ONCE(mnt_ns == MNT_NS_INTERNAL);
- return false;
+ guard(rwsem_read)(&namespace_sem);
+
+ if (!check_mnt(root))
+ return ERR_PTR(-EINVAL);
+ if (!extend_array(&res, &to_free, 0, &count, 32))
+ return ERR_PTR(-ENOMEM);
+ res[n++] = *path;
+ list_for_each_entry(child, &root->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, path->dentry))
+ continue;
+ for (struct mount *m = child; m; m = next_mnt(m, child)) {
+ if (!extend_array(&res, &to_free, n, &count, 2 * count))
+ return ERR_PTR(-ENOMEM);
+ res[n].mnt = &m->mnt;
+ res[n].dentry = m->mnt.mnt_root;
+ n++;
+ }
}
+ if (!extend_array(&res, &to_free, n, &count, count + 1))
+ return ERR_PTR(-ENOMEM);
+ memset(res + n, 0, (count - n) * sizeof(struct path));
+ for (struct path *p = res; p->mnt; p++)
+ path_get(p);
+ return res;
+}
- return true;
+void drop_collected_paths(struct path *paths, struct path *prealloc)
+{
+ for (struct path *p = paths; p->mnt; p++)
+ path_put(p);
+ if (paths != prealloc)
+ kfree(paths);
}
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
+
void dissolve_on_fput(struct vfsmount *mnt)
{
- struct mnt_namespace *ns;
struct mount *m = real_mount(mnt);
+ /*
+ * m used to be the root of anon namespace; if it still is one,
+ * we need to dissolve the mount tree and free that namespace.
+ * Let's try to avoid taking namespace_sem if we can determine
+ * that there's nothing to do without it - rcu_read_lock() is
+ * enough to make anon_ns_root() memory-safe and once m has
+ * left its namespace, it's no longer our concern, since it will
+ * never become a root of anon ns again.
+ */
+
scoped_guard(rcu) {
- if (!must_dissolve(READ_ONCE(m->mnt_ns)))
+ if (!anon_ns_root(m))
return;
}
scoped_guard(namespace_lock, &namespace_sem) {
- ns = m->mnt_ns;
- if (!must_dissolve(ns))
- return;
-
- /*
- * After must_dissolve() we know that this is a detached
- * mount in an anonymous mount namespace.
- *
- * Now when mnt_has_parent() reports that this mount
- * tree has a parent, we know that this anonymous mount
- * tree has been moved to another anonymous mount
- * namespace.
- *
- * So when closing this file we cannot unmount the mount
- * tree. This will be done when the file referring to
- * the root of the anonymous mount namespace will be
- * closed (It could already be closed but it would sync
- * on @namespace_sem and wait for us to finish.).
- */
- if (mnt_has_parent(m))
+ if (!anon_ns_root(m))
return;
+ emptied_ns = m->mnt_ns;
lock_mount_hash();
umount_tree(m, UMOUNT_CONNECTED);
unlock_mount_hash();
}
-
- /* Make sure we notice when we leak mounts. */
- VFS_WARN_ON_ONCE(!mnt_ns_empty(ns));
- free_mnt_ns(ns);
-}
-
-void drop_collected_mounts(struct vfsmount *mnt)
-{
- namespace_lock();
- lock_mount_hash();
- umount_tree(real_mount(mnt), 0);
- unlock_mount_hash();
- namespace_unlock();
}
static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
@@ -2486,9 +2451,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
* loops get created.
*/
if (!check_mnt(old_mnt)) {
- if (!is_mounted(&old_mnt->mnt) ||
- !is_anon_ns(old_mnt->mnt_ns) ||
- mnt_has_parent(old_mnt))
+ if (!anon_ns_root(old_mnt))
return ERR_PTR(-EINVAL);
if (!check_for_nsfs_mounts(old_mnt))
@@ -2511,21 +2474,6 @@ struct vfsmount *clone_private_mount(const struct path *path)
}
EXPORT_SYMBOL_GPL(clone_private_mount);
-int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
- struct vfsmount *root)
-{
- struct mount *mnt;
- int res = f(root, arg);
- if (res)
- return res;
- list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
- res = f(&mnt->mnt, arg);
- if (res)
- return res;
- }
- return 0;
-}
-
static void lock_mnt_tree(struct mount *mnt)
{
struct mount *p;
@@ -2547,7 +2495,7 @@ static void lock_mnt_tree(struct mount *mnt)
if (flags & MNT_NOEXEC)
flags |= MNT_LOCK_NOEXEC;
/* Don't allow unprivileged users to reveal what is under a mount */
- if (list_empty(&p->mnt_expire))
+ if (list_empty(&p->mnt_expire) && p != mnt)
flags |= MNT_LOCKED;
p->mnt.mnt_flags = flags;
}
@@ -2568,7 +2516,7 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
struct mount *p;
for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
- if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
+ if (!p->mnt_group_id) {
int err = mnt_alloc_group_id(p);
if (err) {
cleanup_group_ids(mnt, p);
@@ -2604,17 +2552,15 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
}
enum mnt_tree_flags_t {
- MNT_TREE_MOVE = BIT(0),
- MNT_TREE_BENEATH = BIT(1),
- MNT_TREE_PROPAGATION = BIT(2),
+ MNT_TREE_BENEATH = BIT(0),
+ MNT_TREE_PROPAGATION = BIT(1),
};
/**
* attach_recursive_mnt - attach a source mount tree
* @source_mnt: mount tree to be attached
- * @top_mnt: mount that @source_mnt will be mounted on or mounted beneath
+ * @dest_mnt: mount that @source_mnt will be mounted on
* @dest_mp: the mountpoint @source_mnt will be mounted at
- * @flags: modify how @source_mnt is supposed to be attached
*
* NOTE: in the table below explains the semantics when a source mount
* of a given type is attached to a destination mount of a given type.
@@ -2677,26 +2623,31 @@ enum mnt_tree_flags_t {
* Otherwise a negative error code is returned.
*/
static int attach_recursive_mnt(struct mount *source_mnt,
- struct mount *top_mnt,
- struct mountpoint *dest_mp,
- enum mnt_tree_flags_t flags)
+ struct mount *dest_mnt,
+ struct mountpoint *dest_mp)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
HLIST_HEAD(tree_list);
- struct mnt_namespace *ns = top_mnt->mnt_ns;
- struct mountpoint *smp;
- struct mount *child, *dest_mnt, *p;
+ struct mnt_namespace *ns = dest_mnt->mnt_ns;
+ struct pinned_mountpoint root = {};
+ struct mountpoint *shorter = NULL;
+ struct mount *child, *p;
+ struct mount *top;
struct hlist_node *n;
int err = 0;
- bool moving = flags & MNT_TREE_MOVE, beneath = flags & MNT_TREE_BENEATH;
+ bool moving = mnt_has_parent(source_mnt);
/*
* Preallocate a mountpoint in case the new mounts need to be
* mounted beneath mounts on the same mountpoint.
*/
- smp = get_mountpoint(source_mnt->mnt.mnt_root);
- if (IS_ERR(smp))
- return PTR_ERR(smp);
+ for (top = source_mnt; unlikely(top->overmount); top = top->overmount) {
+ if (!shorter && is_mnt_ns_file(top->mnt.mnt_root))
+ shorter = top->mnt_mp;
+ }
+ err = get_mountpoint(top->mnt.mnt_root, &root);
+ if (err)
+ return err;
/* Is there space to add these mounts to the mount namespace? */
if (!moving) {
@@ -2705,11 +2656,6 @@ static int attach_recursive_mnt(struct mount *source_mnt,
goto out;
}
- if (beneath)
- dest_mnt = top_mnt->mnt_parent;
- else
- dest_mnt = top_mnt;
-
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
@@ -2726,42 +2672,50 @@ static int attach_recursive_mnt(struct mount *source_mnt,
}
if (moving) {
- if (beneath)
- dest_mp = smp;
- unhash_mnt(source_mnt);
- attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
+ umount_mnt(source_mnt);
mnt_notify_add(source_mnt);
- touch_mnt_namespace(source_mnt->mnt_ns);
+ /* if the mount is moved, it should no longer be expired
+ * automatically */
+ list_del_init(&source_mnt->mnt_expire);
} else {
if (source_mnt->mnt_ns) {
- LIST_HEAD(head);
-
/* move from anon - the caller will destroy */
+ emptied_ns = source_mnt->mnt_ns;
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
- move_from_ns(p, &head);
- list_del_init(&head);
+ move_from_ns(p);
}
- if (beneath)
- mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp);
- else
- mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
- commit_tree(source_mnt);
}
+ mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+ /*
+ * Now the original copy is in the same state as the secondaries -
+ * its root attached to mountpoint, but not hashed and all mounts
+ * in it are either in our namespace or in no namespace at all.
+ * Add the original to the list of copies and deal with the
+ * rest of work for all of them uniformly.
+ */
+ hlist_add_head(&source_mnt->mnt_hash, &tree_list);
+
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- if (q)
- mnt_change_mountpoint(child, smp, q);
/* Notice when we are propagating across user namespaces */
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
lock_mnt_tree(child);
- child->mnt.mnt_flags &= ~MNT_LOCKED;
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q) {
+ struct mountpoint *mp = root.mp;
+ struct mount *r = child;
+ while (unlikely(r->overmount))
+ r = r->overmount;
+ if (unlikely(shorter) && child != source_mnt)
+ mp = shorter;
+ mnt_change_mountpoint(r, mp, q);
+ }
commit_tree(child);
}
- put_mountpoint(smp);
+ unpin_mountpoint(&root);
unlock_mount_hash();
return 0;
@@ -2778,7 +2732,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
ns->pending_mounts = 0;
read_seqlock_excl(&mount_lock);
- put_mountpoint(smp);
+ unpin_mountpoint(&root);
read_sequnlock_excl(&mount_lock);
return err;
@@ -2818,12 +2772,12 @@ static int attach_recursive_mnt(struct mount *source_mnt,
* Return: Either the target mountpoint on the top mount or the top
* mount's mountpoint.
*/
-static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
+static int do_lock_mount(struct path *path, struct pinned_mountpoint *pinned, bool beneath)
{
struct vfsmount *mnt = path->mnt;
struct dentry *dentry;
- struct mountpoint *mp = ERR_PTR(-ENOENT);
struct path under = {};
+ int err = -ENOENT;
for (;;) {
struct mount *m = real_mount(mnt);
@@ -2861,8 +2815,8 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
path->dentry = dget(mnt->mnt_root);
continue; // got overmounted
}
- mp = get_mountpoint(dentry);
- if (IS_ERR(mp))
+ err = get_mountpoint(dentry, pinned);
+ if (err)
break;
if (beneath) {
/*
@@ -2873,25 +2827,25 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
*/
path_put(&under);
}
- return mp;
+ return 0;
}
namespace_unlock();
inode_unlock(dentry->d_inode);
if (beneath)
path_put(&under);
- return mp;
+ return err;
}
-static inline struct mountpoint *lock_mount(struct path *path)
+static inline int lock_mount(struct path *path, struct pinned_mountpoint *m)
{
- return do_lock_mount(path, false);
+ return do_lock_mount(path, m, false);
}
-static void unlock_mount(struct mountpoint *where)
+static void unlock_mount(struct pinned_mountpoint *m)
{
- inode_unlock(where->m_dentry->d_inode);
+ inode_unlock(m->mp->m_dentry->d_inode);
read_seqlock_excl(&mount_lock);
- put_mountpoint(where);
+ unpin_mountpoint(m);
read_sequnlock_excl(&mount_lock);
namespace_unlock();
}
@@ -2905,7 +2859,7 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
d_is_dir(mnt->mnt.mnt_root))
return -ENOTDIR;
- return attach_recursive_mnt(mnt, p, mp, 0);
+ return attach_recursive_mnt(mnt, p, mp);
}
/*
@@ -2954,10 +2908,8 @@ static int do_change_type(struct path *path, int ms_flags)
goto out_unlock;
}
- lock_mount_hash();
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- unlock_mount_hash();
out_unlock:
namespace_unlock();
@@ -3031,26 +2983,21 @@ static inline bool may_copy_tree(struct path *path)
static struct mount *__do_loopback(struct path *old_path, int recurse)
{
- struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
+ struct mount *old = real_mount(old_path->mnt);
if (IS_MNT_UNBINDABLE(old))
- return mnt;
+ return ERR_PTR(-EINVAL);
if (!may_copy_tree(old_path))
- return mnt;
+ return ERR_PTR(-EINVAL);
if (!recurse && __has_locked_children(old, old_path->dentry))
- return mnt;
+ return ERR_PTR(-EINVAL);
if (recurse)
- mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
+ return copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
else
- mnt = clone_mnt(old, old_path->dentry, 0);
-
- if (!IS_ERR(mnt))
- mnt->mnt.mnt_flags &= ~MNT_LOCKED;
-
- return mnt;
+ return clone_mnt(old, old_path->dentry, 0);
}
/*
@@ -3061,7 +3008,7 @@ static int do_loopback(struct path *path, const char *old_name,
{
struct path old_path;
struct mount *mnt = NULL, *parent;
- struct mountpoint *mp;
+ struct pinned_mountpoint mp = {};
int err;
if (!old_name || !*old_name)
return -EINVAL;
@@ -3073,11 +3020,9 @@ static int do_loopback(struct path *path, const char *old_name,
if (mnt_ns_loop(old_path.dentry))
goto out;
- mp = lock_mount(path);
- if (IS_ERR(mp)) {
- err = PTR_ERR(mp);
+ err = lock_mount(path, &mp);
+ if (err)
goto out;
- }
parent = real_mount(path->mnt);
if (!check_mnt(parent))
@@ -3089,14 +3034,14 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2;
}
- err = graft_tree(mnt, parent, mp);
+ err = graft_tree(mnt, parent, mp.mp);
if (err) {
lock_mount_hash();
umount_tree(mnt, UMOUNT_SYNC);
unlock_mount_hash();
}
out2:
- unlock_mount(mp);
+ unlock_mount(&mp);
out:
path_put(&old_path);
return err;
@@ -3444,18 +3389,14 @@ static int do_set_group(struct path *from_path, struct path *to_path)
goto out;
if (IS_MNT_SLAVE(from)) {
- struct mount *m = from->mnt_master;
-
- list_add(&to->mnt_slave, &from->mnt_slave);
- to->mnt_master = m;
+ hlist_add_behind(&to->mnt_slave, &from->mnt_slave);
+ to->mnt_master = from->mnt_master;
}
if (IS_MNT_SHARED(from)) {
to->mnt_group_id = from->mnt_group_id;
list_add(&to->mnt_share, &from->mnt_share);
- lock_mount_hash();
set_mnt_shared(to);
- unlock_mount_hash();
}
err = 0;
@@ -3492,6 +3433,17 @@ static inline bool path_overmounted(const struct path *path)
return unlikely(!no_child);
}
+/*
+ * Check if there is a possibly empty chain of descent from p1 to p2.
+ * Locks: namespace_sem (shared) or mount_lock (read_seqlock_excl).
+ */
+static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
+{
+ while (p2 != p1 && mnt_has_parent(p2))
+ p2 = p2->mnt_parent;
+ return p2 == p1;
+}
+
/**
* can_move_mount_beneath - check that we can mount beneath the top mount
* @from: mount to mount beneath
@@ -3543,9 +3495,8 @@ static int can_move_mount_beneath(const struct path *from,
if (parent_mnt_to == current->nsproxy->mnt_ns->root)
return -EINVAL;
- for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent)
- if (p == mnt_to)
- return -EINVAL;
+ if (mount_is_ancestor(mnt_to, mnt_from))
+ return -EINVAL;
/*
* If the parent mount propagates to the child mount this would
@@ -3630,27 +3581,20 @@ static int do_move_mount(struct path *old_path,
struct mount *p;
struct mount *old;
struct mount *parent;
- struct mountpoint *mp, *old_mp;
+ struct pinned_mountpoint mp;
int err;
- bool attached, beneath = flags & MNT_TREE_BENEATH;
+ bool beneath = flags & MNT_TREE_BENEATH;
- mp = do_lock_mount(new_path, beneath);
- if (IS_ERR(mp))
- return PTR_ERR(mp);
+ err = do_lock_mount(new_path, &mp, beneath);
+ if (err)
+ return err;
old = real_mount(old_path->mnt);
p = real_mount(new_path->mnt);
parent = old->mnt_parent;
- attached = mnt_has_parent(old);
- if (attached)
- flags |= MNT_TREE_MOVE;
- old_mp = old->mnt_mp;
ns = old->mnt_ns;
err = -EINVAL;
- /* The thing moved must be mounted... */
- if (!is_mounted(&old->mnt))
- goto out;
if (check_mnt(old)) {
/* if the source is in our namespace... */
@@ -3660,13 +3604,14 @@ static int do_move_mount(struct path *old_path,
/* ... and the target should be in our namespace */
if (!check_mnt(p))
goto out;
+ /* parent of the source should not be shared */
+ if (IS_MNT_SHARED(parent))
+ goto out;
} else {
/*
* otherwise the source must be the root of some anon namespace.
- * AV: check for mount being root of an anon namespace is worth
- * an inlined predicate...
*/
- if (!is_anon_ns(ns) || mnt_has_parent(old))
+ if (!anon_ns_root(old))
goto out;
/*
* Bail out early if the target is within the same namespace -
@@ -3689,20 +3634,14 @@ static int do_move_mount(struct path *old_path,
if (d_is_dir(new_path->dentry) !=
d_is_dir(old_path->dentry))
goto out;
- /*
- * Don't move a mount residing in a shared parent.
- */
- if (attached && IS_MNT_SHARED(parent))
- goto out;
if (beneath) {
- err = can_move_mount_beneath(old_path, new_path, mp);
+ err = can_move_mount_beneath(old_path, new_path, mp.mp);
if (err)
goto out;
err = -EINVAL;
p = p->mnt_parent;
- flags |= MNT_TREE_BENEATH;
}
/*
@@ -3714,30 +3653,12 @@ static int do_move_mount(struct path *old_path,
err = -ELOOP;
if (!check_for_nsfs_mounts(old))
goto out;
- for (; mnt_has_parent(p); p = p->mnt_parent)
- if (p == old)
- goto out;
-
- err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags);
- if (err)
+ if (mount_is_ancestor(old, p))
goto out;
- /* if the mount is moved, it should no longer be expire
- * automatically */
- list_del_init(&old->mnt_expire);
- if (attached)
- put_mountpoint(old_mp);
+ err = attach_recursive_mnt(old, p, mp.mp);
out:
- unlock_mount(mp);
- if (!err) {
- if (attached) {
- mntput_no_expire(parent);
- } else {
- /* Make sure we notice when we leak mounts. */
- VFS_WARN_ON_ONCE(!mnt_ns_empty(ns));
- free_mnt_ns(ns);
- }
- }
+ unlock_mount(&mp);
return err;
}
@@ -3798,7 +3719,7 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
unsigned int mnt_flags)
{
struct vfsmount *mnt;
- struct mountpoint *mp;
+ struct pinned_mountpoint mp = {};
struct super_block *sb = fc->root->d_sb;
int error;
@@ -3819,13 +3740,12 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
mnt_warn_timestamp_expiry(mountpoint, mnt);
- mp = lock_mount(mountpoint);
- if (IS_ERR(mp)) {
- mntput(mnt);
- return PTR_ERR(mp);
+ error = lock_mount(mountpoint, &mp);
+ if (!error) {
+ error = do_add_mount(real_mount(mnt), mp.mp,
+ mountpoint, mnt_flags);
+ unlock_mount(&mp);
}
- error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
- unlock_mount(mp);
if (error < 0)
mntput(mnt);
return error;
@@ -3893,7 +3813,7 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
int finish_automount(struct vfsmount *m, const struct path *path)
{
struct dentry *dentry = path->dentry;
- struct mountpoint *mp;
+ struct pinned_mountpoint mp = {};
struct mount *mnt;
int err;
@@ -3925,14 +3845,13 @@ int finish_automount(struct vfsmount *m, const struct path *path)
err = 0;
goto discard_locked;
}
- mp = get_mountpoint(dentry);
- if (IS_ERR(mp)) {
- err = PTR_ERR(mp);
+ err = get_mountpoint(dentry, &mp);
+ if (err)
goto discard_locked;
- }
- err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
- unlock_mount(mp);
+ err = do_add_mount(mnt, mp.mp, path,
+ path->mnt->mnt_flags | MNT_SHRINKABLE);
+ unlock_mount(&mp);
if (unlikely(err))
goto discard;
return 0;
@@ -3941,12 +3860,6 @@ discard_locked:
namespace_unlock();
inode_unlock(dentry->d_inode);
discard:
- /* remove m from any expiration list it may be on */
- if (!list_empty(&mnt->mnt_expire)) {
- namespace_lock();
- list_del_init(&mnt->mnt_expire);
- namespace_unlock();
- }
mntput(m);
return err;
}
@@ -3958,11 +3871,9 @@ discard:
*/
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
- namespace_lock();
-
+ read_seqlock_excl(&mount_lock);
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
-
- namespace_unlock();
+ read_sequnlock_excl(&mount_lock);
}
EXPORT_SYMBOL(mnt_set_expiry);
@@ -4316,7 +4227,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
/* First pass: copy the tree topology */
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != ns->user_ns)
- copy_flags |= CL_SHARED_TO_SLAVE;
+ copy_flags |= CL_SLAVE;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
@@ -4741,7 +4652,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
{
struct path new, old, root;
struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
- struct mountpoint *old_mp, *root_mp;
+ struct pinned_mountpoint old_mp = {};
int error;
if (!may_mount())
@@ -4762,9 +4673,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2;
get_fs_root(current->fs, &root);
- old_mp = lock_mount(&old);
- error = PTR_ERR(old_mp);
- if (IS_ERR(old_mp))
+ error = lock_mount(&old, &old_mp);
+ if (error)
goto out3;
error = -EINVAL;
@@ -4791,11 +4701,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
if (!path_mounted(&root))
goto out4; /* not a mountpoint */
if (!mnt_has_parent(root_mnt))
- goto out4; /* not attached */
+ goto out4; /* absolute root */
if (!path_mounted(&new))
goto out4; /* not a mountpoint */
if (!mnt_has_parent(new_mnt))
- goto out4; /* not attached */
+ goto out4; /* absolute root */
/* make sure we can reach put_old from new_root */
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
@@ -4804,29 +4714,25 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4;
lock_mount_hash();
umount_mnt(new_mnt);
- root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
new_mnt->mnt.mnt_flags |= MNT_LOCKED;
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
}
- /* mount old root on put_old */
- attach_mnt(root_mnt, old_mnt, old_mp, false);
/* mount new_root on / */
- attach_mnt(new_mnt, root_parent, root_mp, false);
- mnt_add_count(root_parent, -1);
+ attach_mnt(new_mnt, root_parent, root_mnt->mnt_mp);
+ umount_mnt(root_mnt);
+ /* mount old root on put_old */
+ attach_mnt(root_mnt, old_mnt, old_mp.mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
list_del_init(&new_mnt->mnt_expire);
- put_mountpoint(root_mp);
unlock_mount_hash();
mnt_notify_add(root_mnt);
mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
error = 0;
out4:
- unlock_mount(old_mp);
- if (!error)
- mntput_no_expire(ex_parent);
+ unlock_mount(&old_mp);
out3:
path_put(&root);
out2:
@@ -5028,22 +4934,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
err = -EINVAL;
lock_mount_hash();
- /* Ensure that this isn't anything purely vfs internal. */
- if (!is_mounted(&mnt->mnt))
- goto out;
-
- /*
- * If this is an attached mount make sure it's located in the callers
- * mount namespace. If it's not don't let the caller interact with it.
- *
- * If this mount doesn't have a parent it's most often simply a
- * detached mount with an anonymous mount namespace. IOW, something
- * that's simply not attached yet. But there are apparently also users
- * that do change mount properties on the rootfs itself. That obviously
- * neither has a parent nor is it a detached mount so we cannot
- * unconditionally check for detached mounts.
- */
- if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
+ if (!anon_ns_root(mnt) && !check_mnt(mnt))
goto out;
/*
@@ -5290,16 +5181,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
kattr.kflags |= MOUNT_KATTR_RECURSE;
ret = wants_mount_setattr(uattr, usize, &kattr);
- if (ret < 0)
- return ret;
-
- if (ret) {
+ if (ret > 0) {
ret = do_mount_setattr(&file->f_path, &kattr);
- if (ret)
- return ret;
-
finish_mount_kattr(&kattr);
}
+ if (ret)
+ return ret;
}
fd = get_unused_fd_flags(flags & O_CLOEXEC);
@@ -5411,7 +5298,7 @@ static void statmount_mnt_basic(struct kstatmount *s)
s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id;
s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt);
s->sm.mnt_propagation = mnt_to_propagation_flags(m);
- s->sm.mnt_peer_group = IS_MNT_SHARED(m) ? m->mnt_group_id : 0;
+ s->sm.mnt_peer_group = m->mnt_group_id;
s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0;
}
@@ -6217,7 +6104,6 @@ static void __init init_mount_tree(void)
root.mnt = mnt;
root.dentry = mnt->mnt_root;
- mnt->mnt_flags |= MNT_LOCKED;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
@@ -6264,8 +6150,12 @@ void put_mnt_ns(struct mnt_namespace *ns)
{
if (!refcount_dec_and_test(&ns->ns.count))
return;
- drop_collected_mounts(&ns->root->mnt);
- free_mnt_ns(ns);
+ namespace_lock();
+ emptied_ns = ns;
+ lock_mount_hash();
+ umount_tree(ns->root, 0);
+ unlock_mount_hash();
+ namespace_unlock();
}
struct vfsmount *kern_mount(struct file_system_type *type)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 72a3e6db2524..f27ea5099a68 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
* data written into the pagecache until we can find out from the server what
* the values actually are.
*/
-static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
- loff_t i_size, loff_t pos, size_t copied)
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied)
{
+ loff_t i_size, end = pos + copied;
blkcnt_t add;
size_t gap;
+ if (end <= i_size_read(inode))
+ return;
+
if (ctx->ops->update_i_size) {
- ctx->ops->update_i_size(inode, pos);
+ ctx->ops->update_i_size(inode, end);
return;
}
- i_size_write(inode, pos);
+ spin_lock(&inode->i_lock);
+
+ i_size = i_size_read(inode);
+ if (end > i_size) {
+ i_size_write(inode, end);
#if IS_ENABLED(CONFIG_FSCACHE)
- fscache_update_cookie(ctx->cache, NULL, &pos);
+ fscache_update_cookie(ctx->cache, NULL, &end);
#endif
- gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
- if (copied > gap) {
- add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
+ gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
+ if (copied > gap) {
+ add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
- inode->i_blocks = min_t(blkcnt_t,
- DIV_ROUND_UP(pos, SECTOR_SIZE),
- inode->i_blocks + add);
+ inode->i_blocks = min_t(blkcnt_t,
+ DIV_ROUND_UP(end, SECTOR_SIZE),
+ inode->i_blocks + add);
+ }
}
+ spin_unlock(&inode->i_lock);
}
/**
@@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio = NULL, *writethrough = NULL;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
- loff_t i_size, pos = iocb->ki_pos;
+ loff_t pos = iocb->ki_pos;
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
@@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
+ netfs_update_i_size(ctx, inode, pos, copied);
pos += copied;
- i_size = i_size_read(inode);
- if (pos > i_size)
- netfs_update_i_size(ctx, inode, i_size, pos, copied);
written += copied;
if (likely(!wreq)) {
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index fa9a5bf3c6d5..a16660ab7f83 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -9,20 +9,6 @@
#include <linux/uio.h>
#include "internal.h"
-static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
-{
- struct inode *inode = wreq->inode;
- unsigned long long end = wreq->start + wreq->transferred;
-
- if (!wreq->error &&
- i_size_read(inode) < end) {
- if (wreq->netfs_ops->update_i_size)
- wreq->netfs_ops->update_i_size(inode, end);
- else
- i_size_write(inode, end);
- }
-}
-
/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
@@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (async)
wreq->iocb = iocb;
wreq->len = iov_iter_count(&wreq->buffer.iter);
- wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
_debug("begin = %zd", ret);
@@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
if (!async) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
ret = netfs_wait_for_write(wreq);
if (ret > 0)
iocb->ki_pos += ret;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index e2ee9183392b..d4f16fefd965 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
/*
+ * buffered_write.c
+ */
+void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t pos, size_t copied);
+
+/*
* main.c
*/
extern unsigned int netfs_debug;
@@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
enum netfs_rreq_trace trace)
{
if (test_bit(rreq_flag, &rreq->flags)) {
- trace_netfs_rreq(rreq, trace);
clear_bit_unlock(rreq_flag, &rreq->flags);
smp_mb__after_atomic(); /* Set flag before task state */
+ trace_netfs_rreq(rreq, trace);
wake_up(&rreq->waitq);
}
}
/*
+ * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+}
+
+/*
+ * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier.
+ */
+static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq)
+{
+ /* Order read of flags before read of anything else, such as error. */
+ return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 3db401d269e7..73da6c9f5777 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
if (v == &netfs_io_requests) {
seq_puts(m,
- "REQUEST OR REF FL ERR OPS COVERAGE\n"
- "======== == === == ==== === =========\n"
+ "REQUEST OR REF FLAG ERR OPS COVERAGE\n"
+ "======== == === ==== ==== === =========\n"
);
return 0;
}
rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m,
- "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
+ "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx",
rreq->debug_id,
netfs_origins[rreq->origin],
refcount_read(&rreq->ref),
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 43b67a28a8fa..20748bcfbf59 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
continue;
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce);
for (;;) {
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ if (!netfs_check_subreq_in_progress(subreq))
break;
trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
}
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce);
finish_wait(&rreq->waitq, &myself);
}
@@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
static int netfs_collect_in_app(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
- bool need_collect = false, inactive = true;
+ bool need_collect = false, inactive = true, done = true;
+
+ if (!netfs_check_rreq_in_progress(rreq)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_recollect);
+ return 1; /* Done */
+ }
for (int i = 0; i < NR_IO_STREAMS; i++) {
struct netfs_io_subrequest *subreq;
@@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
struct netfs_io_subrequest,
rreq_link);
if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ (!netfs_check_subreq_in_progress(subreq) ||
test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
need_collect = true;
break;
}
+ if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
+ done = false;
}
- if (!need_collect && !inactive)
+ if (!need_collect && !inactive && !done)
return 0; /* Sleep */
__set_current_state(TASK_RUNNING);
@@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
/*
* Wait for a request to complete, successfully or otherwise.
*/
-static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
- bool (*collector)(struct netfs_io_request *rreq))
+static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
{
DEFINE_WAIT(myself);
ssize_t ret;
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ if (!netfs_check_rreq_in_progress(rreq))
break;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip);
finish_wait(&rreq->waitq, &myself);
ret = rreq->error;
@@ -478,12 +488,12 @@ all_collected:
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_read_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_read_collection);
}
ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_write_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_write_collection);
}
/*
@@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
{
DEFINE_WAIT(myself);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!netfs_check_rreq_in_progress(rreq) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+ cond_resched();
continue;
}
}
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ if (!netfs_check_rreq_in_progress(rreq) ||
!test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
break;
schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
+ trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause);
finish_wait(&rreq->waitq, &myself);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 96ee18af28ef..3e804da1e1eb 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -218,7 +218,7 @@ reassess:
stream->collected_to = front->start;
}
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
+ if (netfs_check_subreq_in_progress(front))
notes |= HIT_PENDING;
smp_rmb(); /* Read counters after IN_PROGRESS flag. */
transferred = READ_ONCE(front->transferred);
@@ -293,7 +293,9 @@ reassess:
spin_lock(&rreq->lock);
remove = front;
- trace_netfs_sreq(front, netfs_sreq_trace_discard);
+ trace_netfs_sreq(front,
+ notes & ABANDON_SREQ ?
+ netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
@@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
- if (rreq->iocb->ki_complete)
+ if (rreq->iocb->ki_complete) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_read_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index 5bbe906a551d..8097bc069c1d 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -110,6 +110,8 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
if (!creq->io_streams[1].avail)
goto cancel_put;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &creq->flags);
+ trace_netfs_copy2cache(rreq, creq);
trace_netfs_write(creq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
rreq->copy_to_cache = creq;
@@ -154,6 +156,9 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
netfs_issue_write(creq, &creq->io_streams[1]);
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_end_copy_to_cache);
+ if (list_empty_careful(&creq->io_streams[1].subrequests))
+ netfs_wake_collector(creq);
netfs_put_request(creq, netfs_rreq_trace_put_return);
creq->copy_to_cache = NULL;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index e2b102ffb768..0f3a36852a4d 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -240,7 +240,7 @@ reassess_streams:
}
/* Stall if the front is still undergoing I/O. */
- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
+ if (netfs_check_subreq_in_progress(front)) {
notes |= HIT_PENDING;
break;
}
@@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
- if (wreq->cleanup)
- wreq->cleanup(wreq);
+ if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
+ wreq->origin == NETFS_DIO_WRITE) &&
+ !wreq->error)
+ netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
@@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
- if (wreq->iocb->ki_complete)
+ if (wreq->iocb->ki_complete) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : written);
+ }
wreq->iocb = VFS_PTR_POISON;
}
@@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_write_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 9d1d8a8bab72..fc9c3e0d34d8 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq = netfs_alloc_subrequest(wreq);
subreq->source = to->source;
subreq->start = start;
- subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = to->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index d8d50a88de04..d526f5ba7887 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -141,24 +141,18 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
.destroy_msg = bl_pipe_destroy_msg,
};
-static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
+static int nfs4blocklayout_register_sb(struct super_block *sb,
struct rpc_pipe *pipe)
{
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
if (dir == NULL)
- return ERR_PTR(-ENOENT);
- dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
+ return -ENOENT;
+ err = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
dput(dir);
- return dentry;
-}
-
-static void nfs4blocklayout_unregister_sb(struct super_block *sb,
- struct rpc_pipe *pipe)
-{
- if (pipe->dentry)
- rpc_unlink(pipe->dentry);
+ return err;
}
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
@@ -167,7 +161,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
struct super_block *sb = ptr;
struct net *net = sb->s_fs_info;
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct dentry *dentry;
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -180,16 +173,10 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- break;
- }
- nn->bl_device_pipe->dentry = dentry;
+ ret = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
break;
case RPC_PIPEFS_UMOUNT:
- if (nn->bl_device_pipe->dentry)
- nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
+ rpc_unlink(nn->bl_device_pipe);
break;
default:
ret = -ENOTSUPP;
@@ -203,18 +190,17 @@ static struct notifier_block nfs4blocklayout_block = {
.notifier_call = rpc_pipefs_event,
};
-static struct dentry *nfs4blocklayout_register_net(struct net *net,
- struct rpc_pipe *pipe)
+static int nfs4blocklayout_register_net(struct net *net, struct rpc_pipe *pipe)
{
struct super_block *pipefs_sb;
- struct dentry *dentry;
+ int ret;
pipefs_sb = rpc_get_sb_net(net);
if (!pipefs_sb)
- return NULL;
- dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
+ return 0;
+ ret = nfs4blocklayout_register_sb(pipefs_sb, pipe);
rpc_put_sb_net(net);
- return dentry;
+ return ret;
}
static void nfs4blocklayout_unregister_net(struct net *net,
@@ -224,7 +210,7 @@ static void nfs4blocklayout_unregister_net(struct net *net,
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
+ rpc_unlink(pipe);
rpc_put_sb_net(net);
}
}
@@ -232,20 +218,17 @@ static void nfs4blocklayout_unregister_net(struct net *net,
static int nfs4blocklayout_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct dentry *dentry;
+ int err;
mutex_init(&nn->bl_mutex);
init_waitqueue_head(&nn->bl_wq);
nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
if (IS_ERR(nn->bl_device_pipe))
return PTR_ERR(nn->bl_device_pipe);
- dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
- if (IS_ERR(dentry)) {
+ err = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
+ if (unlikely(err))
rpc_destroy_pipe_data(nn->bl_device_pipe);
- return PTR_ERR(dentry);
- }
- nn->bl_device_pipe->dentry = dentry;
- return 0;
+ return err;
}
static void nfs4blocklayout_net_exit(struct net *net)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 033feeab8c34..2bd557ca1af9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -342,12 +342,14 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
-static int nfs_write_begin(struct file *file, struct address_space *mapping,
+static int nfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, struct folio **foliop,
void **fsdata)
{
fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
+ struct file *file = iocb->ki_filp;
int once_thru = 0;
int ret;
@@ -377,10 +379,12 @@ start:
return ret;
}
-static int nfs_write_end(struct file *file, struct address_space *mapping,
+static int nfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
+ struct file *file = iocb->ki_filp;
struct nfs_open_context *ctx = nfs_file_open_context(file);
unsigned offset = offset_in_folio(folio, pos);
int status;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df4807460596..4bea008dbebd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
+ goto out_retry;
+ case NFS4ERR_DELAY:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
- case -NFS4ERR_GRACE:
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
case -ENETDOWN:
case -ENETUNREACH:
@@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
+
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1228,6 +1272,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
- case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
+ case 4:
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data, task->tk_status);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg,
+ data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8ab7868807a7..a2fa6bc4d74e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int err;
nfs_clients_init(net);
if (!rpc_proc_register(net, &nn->rpcstats)) {
- nfs_clients_exit(net);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_proc_rpc;
}
- return nfs_fs_proc_net_init(net);
+ err = nfs_fs_proc_net_init(net);
+ if (err)
+ goto err_proc_nfs;
+
+ return 0;
+
+err_proc_nfs:
+ rpc_proc_unregister(net, "nfs");
+err_proc_rpc:
+ nfs_clients_exit(net);
+ return err;
}
static void nfs_net_exit(struct net *net)
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 25a7c771cfd8..00932500fce4 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -424,26 +424,16 @@ static void nfs_idmap_pipe_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
- struct rpc_pipe *pipe = idmap->idmap_pipe;
- if (pipe->dentry) {
- rpc_unlink(pipe->dentry);
- pipe->dentry = NULL;
- }
+ rpc_unlink(idmap->idmap_pipe);
}
static int nfs_idmap_pipe_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
- struct rpc_pipe *pipe = idmap->idmap_pipe;
- struct dentry *dentry;
- dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- pipe->dentry = dentry;
- return 0;
+ return rpc_mkpipe_dentry(dir, "idmap", idmap, idmap->idmap_pipe);
}
static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3adb7d0dbec7..1a7ec68bde15 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
if (atomic_dec_and_test(&lo->plh_outstanding) &&
- test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+ smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 91b5503b6f74..72dee6f3050e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1183,7 +1183,7 @@ static int nfs_set_super(struct super_block *s, struct fs_context *fc)
struct nfs_server *server = fc->s_fs_info;
int ret;
- s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
+ set_default_d_op(s, server->nfs_client->rpc_ops->dentry_ops);
ret = set_anon_super(s, server);
if (ret == 0)
server->s_dev = s->s_dev;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 08a20e5bcf7f..19078a043e85 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -178,11 +178,13 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
{
struct iomap *iomaps;
int nr_iomaps;
+ __be32 nfserr;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
+ nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, &nr_iomaps,
+ i_blocksize(inode));
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
@@ -316,11 +318,13 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
{
struct iomap *iomaps;
int nr_iomaps;
+ __be32 nfserr;
- nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
+ nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, &nr_iomaps,
+ i_blocksize(inode));
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index ce78f74715ee..bcf21fde9120 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -112,35 +112,46 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
return 0;
}
-int
+/**
+ * nfsd4_block_decode_layoutupdate - decode the block layout extent array
+ * @p: pointer to the xdr data
+ * @len: number of bytes to decode
+ * @iomapp: pointer to store the decoded extent array
+ * @nr_iomapsp: pointer to store the number of extents
+ * @block_size: alignment of extent offset and length
+ *
+ * This function decodes the opaque field of the layoutupdate4 structure
+ * in a layoutcommit request for the block layout driver. The field is
+ * actually an array of extents sent by the client. It also checks that
+ * the file offset, storage offset and length of each extent are aligned
+ * by @block_size.
+ *
+ * Return values:
+ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+ * %nfserr_bad_xdr: The encoded array in @p is invalid
+ * %nfserr_inval: An unaligned extent found
+ * %nfserr_delay: Failed to allocate memory for @iomapp
+ */
+__be32
nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+ int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
u32 nr_iomaps, i;
- if (len < sizeof(u32)) {
- dprintk("%s: extent array too small: %u\n", __func__, len);
- return -EINVAL;
- }
+ if (len < sizeof(u32))
+ return nfserr_bad_xdr;
len -= sizeof(u32);
- if (len % PNFS_BLOCK_EXTENT_SIZE) {
- dprintk("%s: extent array invalid: %u\n", __func__, len);
- return -EINVAL;
- }
+ if (len % PNFS_BLOCK_EXTENT_SIZE)
+ return nfserr_bad_xdr;
nr_iomaps = be32_to_cpup(p++);
- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
- dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, nr_iomaps);
- return -EINVAL;
- }
+ if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
+ return nfserr_bad_xdr;
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
- if (!iomaps) {
- dprintk("%s: failed to allocate extent array\n", __func__);
- return -ENOMEM;
- }
+ if (!iomaps)
+ return nfserr_delay;
for (i = 0; i < nr_iomaps; i++) {
struct pnfs_block_extent bex;
@@ -150,26 +161,18 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
p = xdr_decode_hyper(p, &bex.foff);
if (bex.foff & (block_size - 1)) {
- dprintk("%s: unaligned offset 0x%llx\n",
- __func__, bex.foff);
goto fail;
}
p = xdr_decode_hyper(p, &bex.len);
if (bex.len & (block_size - 1)) {
- dprintk("%s: unaligned length 0x%llx\n",
- __func__, bex.foff);
goto fail;
}
p = xdr_decode_hyper(p, &bex.soff);
if (bex.soff & (block_size - 1)) {
- dprintk("%s: unaligned disk offset 0x%llx\n",
- __func__, bex.soff);
goto fail;
}
bex.es = be32_to_cpup(p++);
if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
- dprintk("%s: incorrect extent state %d\n",
- __func__, bex.es);
goto fail;
}
@@ -178,59 +181,71 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
}
*iomapp = iomaps;
- return nr_iomaps;
+ *nr_iomapsp = nr_iomaps;
+ return nfs_ok;
fail:
kfree(iomaps);
- return -EINVAL;
+ return nfserr_inval;
}
-int
+/**
+ * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
+ * @p: pointer to the xdr data
+ * @len: number of bytes to decode
+ * @iomapp: pointer to store the decoded extent array
+ * @nr_iomapsp: pointer to store the number of extents
+ * @block_size: alignment of extent offset and length
+ *
+ * This function decodes the opaque field of the layoutupdate4 structure
+ * in a layoutcommit request for the scsi layout driver. The field is
+ * actually an array of extents sent by the client. It also checks that
+ * the offset and length of each extent are aligned by @block_size.
+ *
+ * Return values:
+ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+ * %nfserr_bad_xdr: The encoded array in @p is invalid
+ * %nfserr_inval: An unaligned extent found
+ * %nfserr_delay: Failed to allocate memory for @iomapp
+ */
+__be32
nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+ int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
u32 nr_iomaps, expected, i;
- if (len < sizeof(u32)) {
- dprintk("%s: extent array too small: %u\n", __func__, len);
- return -EINVAL;
- }
+ if (len < sizeof(u32))
+ return nfserr_bad_xdr;
nr_iomaps = be32_to_cpup(p++);
expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
- if (len != expected) {
- dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, expected);
- return -EINVAL;
- }
+ if (len != expected)
+ return nfserr_bad_xdr;
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
- if (!iomaps) {
- dprintk("%s: failed to allocate extent array\n", __func__);
- return -ENOMEM;
- }
+ if (!iomaps)
+ return nfserr_delay;
for (i = 0; i < nr_iomaps; i++) {
u64 val;
p = xdr_decode_hyper(p, &val);
if (val & (block_size - 1)) {
- dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
goto fail;
}
iomaps[i].offset = val;
p = xdr_decode_hyper(p, &val);
if (val & (block_size - 1)) {
- dprintk("%s: unaligned length 0x%llx\n", __func__, val);
goto fail;
}
iomaps[i].length = val;
}
*iomapp = iomaps;
- return nr_iomaps;
+ *nr_iomapsp = nr_iomaps;
+ return nfs_ok;
fail:
kfree(iomaps);
- return -EINVAL;
+ return nfserr_inval;
}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 4e28ac8f1127..15b3569f3d9a 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
const struct nfsd4_layoutget *lgp);
-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
+__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
+ struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
+__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
+ struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 88ae410b4113..cadfc2bae60e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -82,8 +82,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
int len;
struct auth_domain *dom = NULL;
int err;
- int fsidtype;
- char *ep;
+ u8 fsidtype;
struct svc_expkey key;
struct svc_expkey *ek = NULL;
@@ -109,10 +108,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
err = -EINVAL;
if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
- fsidtype = simple_strtoul(buf, &ep, 10);
- if (*ep)
+ if (kstrtou8(buf, 10, &fsidtype))
goto out;
- dprintk("found fsidtype %d\n", fsidtype);
+ dprintk("found fsidtype %u\n", fsidtype);
if (key_len(fsidtype)==0) /* invalid type */
goto out;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 4d92b99c1ffd..b9c0adb3ce09 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -88,7 +88,7 @@ struct svc_expkey {
struct cache_head h;
struct auth_domain * ek_client;
- int ek_fsidtype;
+ u8 ek_fsidtype;
u32 ek_fsid[6];
struct path ek_path;
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index e108b6c705b4..732abf6b92a5 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -372,7 +372,7 @@ nfsd_file_put(struct nfsd_file *nf)
/**
* nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller
- * @nf: nfsd_file of which to put the reference
+ * @pnf: nfsd_file of which to put the reference
*
* First save the associated net to return to caller, then put
* the reference of the nfsd_file.
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 80d9ff6608a7..4f6468eb2adf 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -32,7 +32,7 @@
* @rpc_clnt: rpc_clnt that the client established
* @cred: cred that the client established
* @nfs_fh: filehandle to lookup
- * @nfp: place to find the nfsd_file, or store it if it was non-NULL
+ * @pnf: place to find the nfsd_file, or store it if it was non-NULL
* @fmode: fmode_t to use for open
*
* This function maps a local fh to a path on a local filesystem.
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a817d8485d21..b6d03e1ef5f7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -561,7 +561,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
- xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
+ xdr_init_encode_pages(xdr, buf);
}
/*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ccb00aa93be0..e00b2aea8da2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
out:
if (!rcl->__nr_referring_calls) {
cb->cb_nr_referring_call_list--;
+ list_del(&rcl->__list);
kfree(rcl);
}
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 290271ac4245..aea905fcaf87 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -65,7 +65,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp)
return;
map->fsid_type = fh->fh_fsid_type;
- memcpy(&map->fsid, fh->fh_fsid, fsid_len);
+ memcpy(&map->fsid, fh_fsid(fh), fsid_len);
spin_lock(&nfsd_devid_lock);
if (fhp->fh_export->ex_devid_map)
@@ -75,7 +75,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp)
list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
if (old->fsid_type != fh->fh_fsid_type)
continue;
- if (memcmp(old->fsid, fh->fh_fsid,
+ if (memcmp(old->fsid, fh_fsid(fh),
key_len(old->fsid_type)))
continue;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f13abbb13b38..71b428efcbb5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1917,13 +1917,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd42_write_res *result;
__be32 status;
- /*
- * Currently, async COPY is not reliable. Force all COPY
- * requests to be synchronous to avoid client application
- * hangs waiting for COPY completion.
- */
- nfsd4_copy_set_sync(copy, true);
-
result = &copy->cp_res;
nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn);
@@ -2842,20 +2835,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
rqstp->rq_lease_breaker = (void **)&cstate->clp;
- trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
+ trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
- if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
- /* If there are still more operations to process,
- * stop here and report NFS4ERR_RESOURCE. */
- if (cstate->minorversion == 0 &&
- args->client_opcnt > resp->opcnt) {
- op->status = nfserr_resource;
- goto encode_op;
- }
- }
-
/*
* The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error
@@ -2932,7 +2915,7 @@ encode_op:
status = op->status;
}
- trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
+ trace_nfsd_compound_status(args->opcnt, resp->opcnt,
status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 82785db730d9..2231192ec33f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -950,38 +950,32 @@ static const struct rpc_pipe_ops cld_upcall_ops = {
.destroy_msg = cld_pipe_destroy_msg,
};
-static struct dentry *
+static int
nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
{
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
if (dir == NULL)
- return ERR_PTR(-ENOENT);
- dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
+ return -ENOENT;
+ err = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
dput(dir);
- return dentry;
+ return err;
}
-static void
-nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
-{
- if (pipe->dentry)
- rpc_unlink(pipe->dentry);
-}
-
-static struct dentry *
+static int
nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
{
struct super_block *sb;
- struct dentry *dentry;
+ int err;
sb = rpc_get_sb_net(net);
if (!sb)
- return NULL;
- dentry = nfsd4_cld_register_sb(sb, pipe);
+ return 0;
+ err = nfsd4_cld_register_sb(sb, pipe);
rpc_put_sb_net(net);
- return dentry;
+ return err;
}
static void
@@ -991,7 +985,7 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
sb = rpc_get_sb_net(net);
if (sb) {
- nfsd4_cld_unregister_sb(pipe);
+ rpc_unlink(pipe);
rpc_put_sb_net(net);
}
}
@@ -1001,7 +995,6 @@ static int
__nfsd4_init_cld_pipe(struct net *net)
{
int ret;
- struct dentry *dentry;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct cld_net *cn;
@@ -1022,13 +1015,10 @@ __nfsd4_init_cld_pipe(struct net *net)
spin_lock_init(&cn->cn_lock);
INIT_LIST_HEAD(&cn->cn_list);
- dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
+ ret = nfsd4_cld_register_net(net, cn->cn_pipe);
+ if (unlikely(ret))
goto err_destroy_data;
- }
- cn->cn_pipe->dentry = dentry;
#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
cn->cn_has_legacy = false;
#endif
@@ -2121,7 +2111,6 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
struct net *net = sb->s_fs_info;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
- struct dentry *dentry;
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -2134,16 +2123,10 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- break;
- }
- cn->cn_pipe->dentry = dentry;
+ ret = nfsd4_cld_register_sb(sb, cn->cn_pipe);
break;
case RPC_PIPEFS_UMOUNT:
- if (cn->cn_pipe->dentry)
- nfsd4_cld_unregister_sb(cn->cn_pipe);
+ rpc_unlink(cn->cn_pipe);
break;
default:
ret = -ENOTSUPP;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5694987f86f..88c347957da5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -633,18 +633,6 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
-static struct nfsd_file *
-find_rw_file(struct nfs4_file *f)
-{
- struct nfsd_file *ret;
-
- spin_lock(&f->fi_lock);
- ret = nfsd_file_get(f->fi_fds[O_RDWR]);
- spin_unlock(&f->fi_lock);
-
- return ret;
-}
-
struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
@@ -1218,15 +1206,20 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
static void put_deleg_file(struct nfs4_file *fp)
{
+ struct nfsd_file *rnf = NULL;
struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock);
- if (--fp->fi_delegees == 0)
+ if (--fp->fi_delegees == 0) {
swap(nf, fp->fi_deleg_file);
+ swap(rnf, fp->fi_rdeleg_file);
+ }
spin_unlock(&fp->fi_lock);
if (nf)
nfsd_file_put(nf);
+ if (rnf)
+ nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ);
}
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
@@ -3872,7 +3865,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->headerpadsz = 0;
ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
- ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
@@ -4697,10 +4689,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
status = nfs_ok;
if (conf) {
- old = unconf;
- unhash_client_locked(old);
- nfsd4_change_callback(conf, &unconf->cl_cb_conn);
- } else {
+ if (get_client_locked(conf) == nfs_ok) {
+ old = unconf;
+ unhash_client_locked(old);
+ nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+ } else {
+ conf = NULL;
+ }
+ }
+
+ if (!conf) {
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = nfserr_clid_inuse;
@@ -4717,10 +4715,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
trace_nfsd_clid_replaced(&old->cl_clientid);
}
+ status = get_client_locked(unconf);
+ if (status != nfs_ok) {
+ old = NULL;
+ goto out;
+ }
move_to_confirmed(unconf);
conf = unconf;
}
- get_client_locked(conf);
spin_unlock(&nn->client_lock);
if (conf == unconf)
fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
@@ -4750,6 +4752,7 @@ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
INIT_LIST_HEAD(&fp->fi_clnt_odstate);
fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
fp->fi_deleg_file = NULL;
+ fp->fi_rdeleg_file = NULL;
fp->fi_had_conflict = false;
fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
@@ -6000,14 +6003,19 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
* "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
* on its own, all opens."
*
- * Furthermore the client can use a write delegation for most READ
- * operations as well, so we require a O_RDWR file here.
+ * Furthermore, section 9.1.2 says:
+ *
+ * "In the case of READ, the server may perform the corresponding
+ * check on the access mode, or it may choose to allow READ for
+ * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE
+ * implementation may unavoidably do reads (e.g., due to buffer
+ * cache constraints)."
*
- * Offer a write delegation in the case of a BOTH open, and ensure
- * we get the O_RDWR descriptor.
+ * We choose to offer a write delegation for OPEN with the
+ * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients.
*/
- if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
- nf = find_rw_file(fp);
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ nf = find_writeable_file(fp);
dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
}
@@ -6138,7 +6146,7 @@ static bool
nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
struct kstat *stat)
{
- struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file);
+ struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file);
struct path path;
int rc;
@@ -6157,6 +6165,34 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
}
/*
+ * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN
+ * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and
+ * struct file to be used for read with delegation stateid.
+ *
+ */
+static bool
+nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open,
+ struct svc_fh *fh, struct nfs4_ol_stateid *stp)
+{
+ struct nfs4_file *fp;
+ struct nfsd_file *nf = NULL;
+
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
+ NFS4_SHARE_ACCESS_WRITE) {
+ if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf))
+ return (false);
+ fp = stp->st_stid.sc_file;
+ spin_lock(&fp->fi_lock);
+ __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ);
+ fp = stp->st_stid.sc_file;
+ fp->fi_fds[O_RDONLY] = nf;
+ fp->fi_rdeleg_file = nf;
+ spin_unlock(&fp->fi_lock);
+ }
+ return true;
+}
+
+/*
* The Linux NFS server does not offer write delegations to NFSv4.0
* clients in order to avoid conflicts between write delegations and
* GETATTRs requesting CHANGE or SIZE attributes.
@@ -6181,8 +6217,9 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
* open or lock state.
*/
static void
-nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
- struct svc_fh *currentfh)
+nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
+ struct nfs4_ol_stateid *stp, struct svc_fh *currentfh,
+ struct svc_fh *fh)
{
struct nfs4_openowner *oo = openowner(stp->st_stateowner);
bool deleg_ts = nfsd4_want_deleg_timestamps(open);
@@ -6227,7 +6264,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
- if (!nfs4_delegation_stat(dp, currentfh, &stat)) {
+ if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) ||
+ !nfs4_delegation_stat(dp, currentfh, &stat)) {
nfs4_put_stid(&dp->dl_stid);
destroy_delegation(dp);
goto out_no_deleg;
@@ -6322,6 +6360,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ if (dp && nfsd4_is_deleg_cur(open) &&
+ (dp->dl_stid.sc_file != fp)) {
+ /*
+ * RFC8881 section 8.2.4 mandates the server to return
+ * NFS4ERR_BAD_STATEID if the selected table entry does
+ * not match the current filehandle. However returning
+ * NFS4ERR_BAD_STATEID in the OPEN can cause the client
+ * to repeatedly retry the operation with the same
+ * stateid, since the stateid itself is valid. To avoid
+ * this situation NFSD returns NFS4ERR_INVAL instead.
+ */
+ status = nfserr_inval;
+ goto out;
+ }
stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
@@ -6383,7 +6435,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
- nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+ nfs4_open_delegation(rqstp, open, stp,
+ &resp->cstate.current_fh, current_fh);
/*
* If there is an existing open stateid, it must be updated and
@@ -7076,7 +7129,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return_revoked = true;
if (typemask & SC_TYPE_DELEG)
/* Always allow REVOKED for DELEG so we can
- * retturn the appropriate error.
+ * return the appropriate error.
*/
statusmask |= SC_STATUS_REVOKED;
@@ -7119,10 +7172,6 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
switch (s->sc_type) {
case SC_TYPE_DELEG:
- spin_lock(&s->sc_file->fi_lock);
- ret = nfsd_file_get(s->sc_file->fi_deleg_file);
- spin_unlock(&s->sc_file->fi_lock);
- break;
case SC_TYPE_OPEN:
case SC_TYPE_LOCK:
if (flags & RD_STATE)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3afcdbed6e14..ea91bad4eee2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2500,10 +2500,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
return false;
- if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
+ if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
return false;
- argp->opcnt = min_t(u32, argp->client_opcnt,
- NFSD_MAX_OPS_PER_COMPOUND);
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3f3e9f6c4250..2909d70de559 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1436,7 +1436,7 @@ unsigned int nfsd_net_id;
static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
struct netlink_callback *cb,
- struct nfsd_genl_rqstp *rqstp)
+ struct nfsd_genl_rqstp *genl_rqstp)
{
void *hdr;
u32 i;
@@ -1446,22 +1446,22 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
if (!hdr)
return -ENOBUFS;
- if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, rqstp->rq_xid) ||
- nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, rqstp->rq_flags) ||
- nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, rqstp->rq_prog) ||
- nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, rqstp->rq_proc) ||
- nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, rqstp->rq_vers) ||
+ if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, genl_rqstp->rq_xid) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, genl_rqstp->rq_flags) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, genl_rqstp->rq_prog) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, genl_rqstp->rq_proc) ||
+ nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, genl_rqstp->rq_vers) ||
nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME,
- ktime_to_us(rqstp->rq_stime),
+ ktime_to_us(genl_rqstp->rq_stime),
NFSD_A_RPC_STATUS_PAD))
return -ENOBUFS;
- switch (rqstp->rq_saddr.sa_family) {
+ switch (genl_rqstp->rq_saddr.sa_family) {
case AF_INET: {
const struct sockaddr_in *s_in, *d_in;
- s_in = (const struct sockaddr_in *)&rqstp->rq_saddr;
- d_in = (const struct sockaddr_in *)&rqstp->rq_daddr;
+ s_in = (const struct sockaddr_in *)&genl_rqstp->rq_saddr;
+ d_in = (const struct sockaddr_in *)&genl_rqstp->rq_daddr;
if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4,
s_in->sin_addr.s_addr) ||
nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4,
@@ -1476,8 +1476,8 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
case AF_INET6: {
const struct sockaddr_in6 *s_in, *d_in;
- s_in = (const struct sockaddr_in6 *)&rqstp->rq_saddr;
- d_in = (const struct sockaddr_in6 *)&rqstp->rq_daddr;
+ s_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_saddr;
+ d_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_daddr;
if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6,
&s_in->sin6_addr) ||
nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6,
@@ -1491,9 +1491,9 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
}
}
- for (i = 0; i < rqstp->rq_opcnt; i++)
+ for (i = 0; i < genl_rqstp->rq_opcnt; i++)
if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS,
- rqstp->rq_opnum[i]))
+ genl_rqstp->rq_opnum[i]))
return -ENOBUFS;
genlmsg_end(skb, hdr);
@@ -1569,7 +1569,8 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
int j;
args = rqstp->rq_argp;
- genl_rqstp.rq_opcnt = args->opcnt;
+ genl_rqstp.rq_opcnt = min_t(u32, args->opcnt,
+ ARRAY_SIZE(genl_rqstp.rq_opnum));
for (j = 0; j < genl_rqstp.rq_opcnt; j++)
genl_rqstp.rq_opnum[j] =
args->ops[j].opnum;
@@ -1611,7 +1612,7 @@ out_unlock:
*/
int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
{
- int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem;
+ int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem;
struct net *net = genl_info_net(info);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
const struct nlattr *attr;
@@ -1623,12 +1624,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
/* count number of SERVER_THREADS values */
nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
if (nla_type(attr) == NFSD_A_SERVER_THREADS)
- count++;
+ nrpools++;
}
mutex_lock(&nfsd_mutex);
- nrpools = max(count, nfsd_nrpools(net));
nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL);
if (!nthreads) {
ret = -ENOMEM;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1bfd0b4e9af7..1cd0bed57bc2 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -57,9 +57,6 @@ struct readdir_cd {
__be32 err; /* 0, nfserr, or nfserr_eof */
};
-/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND 50
-
struct nfsd_genl_rqstp {
struct sockaddr rq_daddr;
struct sockaddr rq_saddr;
@@ -72,7 +69,7 @@ struct nfsd_genl_rqstp {
/* NFSv4 compound */
u32 rq_opcnt;
- u32 rq_opnum[NFSD_MAX_OPS_PER_COMPOUND];
+ u32 rq_opnum[16];
};
extern struct svc_program nfsd_programs[];
@@ -283,6 +280,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_delay cpu_to_be32(NFS4ERR_DELAY)
#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index aef474f1b84b..74cf1f4de174 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -172,6 +172,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
if (len == 0)
return error;
if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+ u32 *fsid = fh_fsid(fh);
+
/* deprecated, convert to type 3 */
len = key_len(FSID_ENCODE_DEV)/4;
fh->fh_fsid_type = FSID_ENCODE_DEV;
@@ -181,17 +183,17 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
* confuses sparse, so we must use __force here to
* keep it from complaining.
*/
- fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
- ntohl((__force __be32)fh->fh_fsid[1])));
- fh->fh_fsid[1] = fh->fh_fsid[2];
+ fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fsid[0]),
+ ntohl((__force __be32)fsid[1])));
+ fsid[1] = fsid[2];
}
data_left -= len;
if (data_left < 0)
return error;
exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL,
net, client, gssclient,
- fh->fh_fsid_type, fh->fh_fsid);
- fid = (struct fid *)(fh->fh_fsid + len);
+ fh->fh_fsid_type, fh_fsid(fh));
+ fid = (struct fid *)(fh_fsid(fh) + len);
error = nfserr_stale;
if (IS_ERR(exp)) {
@@ -463,7 +465,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
{
if (dentry != exp->ex_path.dentry) {
struct fid *fid = (struct fid *)
- (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
+ (fh_fsid(&fhp->fh_handle) + fhp->fh_handle.fh_size/4 - 1);
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
int fh_flags = (exp->ex_flags & NFSEXP_NOSUBTREECHECK) ? 0 :
EXPORT_FH_CONNECTABLE;
@@ -614,7 +616,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
fhp->fh_handle.fh_auth_type = 0;
mk_fsid(fhp->fh_handle.fh_fsid_type,
- fhp->fh_handle.fh_fsid,
+ fh_fsid(&fhp->fh_handle),
ex_dev,
d_inode(exp->ex_path.dentry)->i_ino,
exp->ex_fsid, exp->ex_uuid);
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 5103c2f4d225..1cf979722521 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -49,18 +49,19 @@ struct knfsd_fh {
* Points to the current size while
* building a new file handle.
*/
- union {
- char fh_raw[NFS4_FHSIZE];
- struct {
- u8 fh_version; /* == 1 */
- u8 fh_auth_type; /* deprecated */
- u8 fh_fsid_type;
- u8 fh_fileid_type;
- u32 fh_fsid[]; /* flexible-array member */
- };
- };
+ u8 fh_raw[NFS4_FHSIZE];
};
+#define fh_version fh_raw[0]
+#define fh_auth_type fh_raw[1]
+#define fh_fsid_type fh_raw[2]
+#define fh_fileid_type fh_raw[3]
+
+static inline u32 *fh_fsid(const struct knfsd_fh *fh)
+{
+ return (u32 *)&fh->fh_raw[4];
+}
+
static inline __u32 ino_t_to_u32(ino_t ino)
{
return (__u32) ino;
@@ -260,9 +261,12 @@ static inline bool fh_match(const struct knfsd_fh *fh1,
static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
const struct knfsd_fh *fh2)
{
+ u32 *fsid1 = fh_fsid(fh1);
+ u32 *fsid2 = fh_fsid(fh2);
+
if (fh1->fh_fsid_type != fh2->fh_fsid_type)
return false;
- if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type)) != 0)
+ if (memcmp(fsid1, fsid2, key_len(fh1->fh_fsid_type)) != 0)
return false;
return true;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c10fa8128a8a..8f71f5748c75 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -575,7 +575,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page++;
- xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
+ xdr_init_encode_pages(xdr, buf);
}
/*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 1995bca158b8..8adc2550129e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -665,6 +665,7 @@ struct nfs4_file {
atomic_t fi_access[2];
u32 fi_share_deny;
struct nfsd_file *fi_deleg_file;
+ struct nfsd_file *fi_rdeleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 3c5505ef5e3a..a664fdf1161e 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -344,7 +344,7 @@ TRACE_EVENT(nfsd_exp_find_key,
int status),
TP_ARGS(key, status),
TP_STRUCT__entry(
- __field(int, fsidtype)
+ __field(u8, fsidtype)
__array(u32, fsid, 6)
__string(auth_domain, key->ek_client->name)
__field(int, status)
@@ -367,7 +367,7 @@ TRACE_EVENT(nfsd_expkey_update,
TP_PROTO(const struct svc_expkey *key, const char *exp_path),
TP_ARGS(key, exp_path),
TP_STRUCT__entry(
- __field(int, fsidtype)
+ __field(u8, fsidtype)
__array(u32, fsid, 6)
__string(auth_domain, key->ek_client->name)
__string(path, exp_path)
@@ -1108,7 +1108,6 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
-DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
TRACE_EVENT(nfsd_file_alloc,
TP_PROTO(
@@ -1344,9 +1343,7 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \
TP_ARGS(nf))
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
@@ -1380,7 +1377,6 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
TP_ARGS(removed, remaining))
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
-DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent);
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
TRACE_EVENT(nfsd_file_close,
@@ -2103,25 +2099,6 @@ TRACE_EVENT(nfsd_ctl_maxblksize,
)
);
-TRACE_EVENT(nfsd_ctl_maxconn,
- TP_PROTO(
- const struct net *net,
- int maxconn
- ),
- TP_ARGS(net, maxconn),
- TP_STRUCT__entry(
- __field(unsigned int, netns_ino)
- __field(int, maxconn)
- ),
- TP_fast_assign(
- __entry->netns_ino = net->ns.inum;
- __entry->maxconn = maxconn;
- ),
- TP_printk("maxconn=%d",
- __entry->maxconn
- )
-);
-
TRACE_EVENT(nfsd_ctl_time,
TP_PROTO(
const struct net *net,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cd689df2ca5d..98ab55ba3ced 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1086,10 +1086,13 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
{
unsigned long v, total;
struct iov_iter iter;
- loff_t ppos = offset;
+ struct kiocb kiocb;
ssize_t host_err;
size_t len;
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = offset;
+
v = 0;
total = *count;
while (total) {
@@ -1104,7 +1107,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count);
- host_err = vfs_iter_read(file, &iter, &ppos, 0);
+ host_err = vfs_iocb_iter_read(file, &kiocb, &iter);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1170,15 +1173,14 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
struct super_block *sb = file_inode(file)->i_sb;
+ struct kiocb kiocb;
struct svc_export *exp;
struct iov_iter iter;
errseq_t since;
__be32 nfserr;
int host_err;
- loff_t pos = offset;
unsigned long exp_op_flags = 0;
unsigned int pflags = current->flags;
- rwf_t flags = 0;
bool restore_flags = false;
unsigned int nvecs;
@@ -1204,16 +1206,17 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!EX_ISSYNC(exp))
stable = NFS_UNSTABLE;
-
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = offset;
if (stable && !fhp->fh_use_wgather)
- flags |= RWF_SYNC;
+ kiocb.ki_flags |= IOCB_DSYNC;
nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
+ host_err = vfs_iocb_iter_write(file, &kiocb, &iter);
if (host_err < 0) {
commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr;
@@ -1864,7 +1867,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct svc_fh *tfhp, char *tname, int tlen)
{
struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
- struct inode *fdir, *tdir;
int type = S_IFDIR;
__be32 err;
int host_err;
@@ -1880,10 +1882,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out;
fdentry = ffhp->fh_dentry;
- fdir = d_inode(fdentry);
tdentry = tfhp->fh_dentry;
- tdir = d_inode(tdentry);
err = nfserr_perm;
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
@@ -1944,10 +1944,10 @@ retry:
} else {
struct renamedata rd = {
.old_mnt_idmap = &nop_mnt_idmap,
- .old_dir = fdir,
+ .old_parent = fdentry,
.old_dentry = odentry,
.new_mnt_idmap = &nop_mnt_idmap,
- .new_dir = tdir,
+ .new_parent = tdentry,
.new_dentry = ndentry,
};
int retries;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index aa2a356da784..a23bc56051ca 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -870,7 +870,6 @@ struct nfsd4_compoundargs {
char * tag;
u32 taglen;
u32 minorversion;
- u32 client_opcnt;
u32 opcnt;
bool splice_ok;
struct nfsd4_op *ops;
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 9b7f8e9655a2..6ca3d74be1e1 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -96,7 +96,7 @@ static void nilfs_commit_chunk(struct folio *folio,
int err;
nr_dirty = nilfs_page_count_clean_buffers(folio, from, to);
- copied = block_write_end(NULL, mapping, pos, len, len, folio, NULL);
+ copied = block_write_end(pos, len, len, folio);
if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6613b8fcceb0..87ddde159f0c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -218,7 +218,8 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int nilfs_write_begin(struct file *file, struct address_space *mapping,
+static int nilfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
@@ -237,7 +238,8 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
return err;
}
-static int nilfs_write_end(struct file *file, struct address_space *mapping,
+static int nilfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
@@ -248,7 +250,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
nr_dirty = nilfs_page_count_clean_buffers(folio, start,
start + copied);
- copied = generic_write_end(file, mapping, pos, len, copied, folio,
+ copied = generic_write_end(iocb, mapping, pos, len, copied, folio,
fsdata);
nilfs_set_file_dirty(inode, nr_dirty);
err = nilfs_transaction_commit(inode->i_sb);
@@ -472,11 +474,18 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
- } else {
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &nilfs_special_inode_operations;
init_special_inode(
inode, inode->i_mode,
huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+ } else {
+ nilfs_error(sb,
+ "invalid file type bits in mode 0%o for inode %lu",
+ inode->i_mode, ino);
+ err = -EIO;
+ goto failed_unmap;
}
nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 22aecf6e2344..a9c61d0492cb 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -560,8 +560,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
if (unlikely(err))
goto failed_folio;
- block_write_end(NULL, inode->i_mapping, pos, blocksize,
- blocksize, folio, NULL);
+ block_write_end(pos, blocksize, blocksize, folio);
folio_unlock(folio);
folio_put(folio);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index c4cdaf5fa7ed..9fb73bafd41d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
+ error = file_f_owner_allocate(filp);
+ if (error)
+ goto out_err;
+
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
@@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
- error = file_f_owner_allocate(filp);
- if (error)
- goto out_err;
-
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index b6da80c69ca6..1b5c865a0339 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -304,6 +304,9 @@ static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
return true;
+ if (fname->name_len + sizeof(struct NTFS_DE) > le16_to_cpu(e->size))
+ return true;
+
name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name,
PATH_MAX);
if (name_len <= 0) {
@@ -329,8 +332,7 @@ static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
* It does additional locks/reads just to get the type of name.
* Should we use additional mount option to enable branch below?
*/
- if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) ||
- fname->dup.ea_size) &&
+ if (fname->dup.extend_data &&
ino != ni->mi.rno) {
struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
if (!IS_ERR_OR_NULL(inode)) {
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 1e99a35691cd..c12ae1fb8b37 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -57,6 +57,10 @@ long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
struct inode *inode = file_inode(filp);
struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ntfs_i(inode))))
+ return -EINVAL;
+
switch (cmd) {
case FITRIM:
return ntfs_ioctl_fitrim(sbi, arg);
@@ -81,6 +85,10 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_inode(path->dentry);
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
stat->result_mask |= STATX_BTIME;
stat->btime = ni->i_crtime;
stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
@@ -154,13 +162,13 @@ static int ntfs_extend_initialized_size(struct file *file,
if (pos + len > new_valid)
len = new_valid - pos;
- err = ntfs_write_begin(file, mapping, pos, len, &folio, NULL);
+ err = ntfs_write_begin(NULL, mapping, pos, len, &folio, NULL);
if (err)
goto out;
folio_zero_range(folio, zerofrom, folio_size(folio) - zerofrom);
- err = ntfs_write_end(file, mapping, pos, len, len, folio, NULL);
+ err = ntfs_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (err < 0)
goto out;
pos += len;
@@ -271,6 +279,10 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
bool rw = vma->vm_flags & VM_WRITE;
int err;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -310,7 +322,10 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
}
if (ni->i_valid < to) {
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ err = -EAGAIN;
+ goto out;
+ }
err = ntfs_extend_initialized_size(file, ni,
ni->i_valid, to);
inode_unlock(inode);
@@ -735,6 +750,10 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
umode_t mode = inode->i_mode;
int err;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -795,6 +814,10 @@ static int check_read_restriction(struct inode *inode)
{
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -1130,6 +1153,10 @@ static int check_write_restriction(struct inode *inode)
{
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -1212,6 +1239,10 @@ int ntfs_file_open(struct inode *inode, struct file *file)
{
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -1281,6 +1312,10 @@ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR);
if (err)
return err;
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 756e1306fe6c..8f9fe1d7a690 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -3003,8 +3003,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
* ni_rename - Remove one name and insert new name.
*/
int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
- struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de,
- bool *is_bad)
+ struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de)
{
int err;
struct NTFS_DE *de2 = NULL;
@@ -3027,8 +3026,8 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
err = ni_add_name(new_dir_ni, ni, new_de);
if (!err) {
err = ni_remove_name(dir_ni, ni, de, &de2, &undo);
- if (err && ni_remove_name(new_dir_ni, ni, new_de, &de2, &undo))
- *is_bad = true;
+ WARN_ON(err && ni_remove_name(new_dir_ni, ni, new_de, &de2,
+ &undo));
}
/*
@@ -3119,11 +3118,21 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup,
}
}
- /* TODO: Fill reparse info. */
- dup->reparse = 0;
- dup->ea_size = 0;
+ dup->extend_data = 0;
- if (ni->ni_flags & NI_FLAG_EA) {
+ if (dup->fa & FILE_ATTRIBUTE_REPARSE_POINT) {
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_REPARSE, NULL, 0, NULL,
+ NULL);
+
+ if (attr) {
+ const struct REPARSE_POINT *rp;
+
+ rp = resident_data_ex(attr, sizeof(struct REPARSE_POINT));
+ /* If ATTR_REPARSE exists 'rp' can't be NULL. */
+ if (rp)
+ dup->extend_data = rp->ReparseTag;
+ }
+ } else if (ni->ni_flags & NI_FLAG_EA) {
attr = ni_find_attr(ni, attr, &le, ATTR_EA_INFO, NULL, 0, NULL,
NULL);
if (attr) {
@@ -3132,7 +3141,7 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup,
info = resident_data_ex(attr, sizeof(struct EA_INFO));
/* If ATTR_EA_INFO exists 'info' can't be NULL. */
if (info)
- dup->ea_size = info->size_pack;
+ dup->extend_data = info->size;
}
}
@@ -3199,6 +3208,10 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
if (is_bad_inode(inode) || sb_rdonly(sb))
return 0;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(sb)))
return -EIO;
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index df81f1f7330c..c7a2f191254d 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -905,9 +905,13 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
void ntfs_bad_inode(struct inode *inode, const char *hint)
{
struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+ struct ntfs_inode *ni = ntfs_i(inode);
ntfs_inode_err(inode, "%s", hint);
- make_bad_inode(inode);
+
+ /* Do not call make_bad_inode()! */
+ ni->ni_bad = true;
+
/* Avoid recursion if bad inode is $Volume. */
if (inode->i_ino != MFT_REC_VOL &&
!(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 0f0d27d4644a..37cbbee7fa58 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -878,6 +878,10 @@ static int ntfs_resident_writepage(struct folio *folio,
struct ntfs_inode *ni = ntfs_i(inode);
int ret;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -896,6 +900,10 @@ static int ntfs_writepages(struct address_space *mapping,
{
struct inode *inode = mapping->host;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ntfs_i(inode))))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -912,13 +920,17 @@ static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn,
bh_result, create, GET_BLOCK_WRITE_BEGIN);
}
-int ntfs_write_begin(struct file *file, struct address_space *mapping,
+int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
loff_t pos, u32 len, struct folio **foliop, void **fsdata)
{
int err;
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -957,7 +969,8 @@ out:
/*
* ntfs_write_end - Address_space_operations::write_end.
*/
-int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
+int ntfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
u32 len, u32 copied, struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
@@ -989,7 +1002,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
folio_unlock(folio);
folio_put(folio);
} else {
- err = generic_write_end(file, mapping, pos, len, copied, folio,
+ err = generic_write_end(iocb, mapping, pos, len, copied, folio,
fsdata);
}
@@ -1062,10 +1075,10 @@ int inode_read_data(struct inode *inode, void *data, size_t bytes)
* Number of bytes for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
* for unicode string of @uni_len length.
*/
-static inline u32 ntfs_reparse_bytes(u32 uni_len)
+static inline u32 ntfs_reparse_bytes(u32 uni_len, bool is_absolute)
{
/* Header + unicode string + decorated unicode string. */
- return sizeof(short) * (2 * uni_len + 4) +
+ return sizeof(short) * (2 * uni_len + (is_absolute ? 4 : 0)) +
offsetof(struct REPARSE_DATA_BUFFER,
SymbolicLinkReparseBuffer.PathBuffer);
}
@@ -1078,8 +1091,11 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
struct REPARSE_DATA_BUFFER *rp;
__le16 *rp_name;
typeof(rp->SymbolicLinkReparseBuffer) *rs;
+ bool is_absolute;
- rp = kzalloc(ntfs_reparse_bytes(2 * size + 2), GFP_NOFS);
+ is_absolute = (strlen(symname) > 1 && symname[1] == ':');
+
+ rp = kzalloc(ntfs_reparse_bytes(2 * size + 2, is_absolute), GFP_NOFS);
if (!rp)
return ERR_PTR(-ENOMEM);
@@ -1094,7 +1110,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
goto out;
/* err = the length of unicode name of symlink. */
- *nsize = ntfs_reparse_bytes(err);
+ *nsize = ntfs_reparse_bytes(err, is_absolute);
if (*nsize > sbi->reparse.max_size) {
err = -EFBIG;
@@ -1114,7 +1130,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
/* PrintName + SubstituteName. */
rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err);
- rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8);
+ rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + (is_absolute ? 8 : 0));
rs->PrintNameLength = rs->SubstituteNameOffset;
/*
@@ -1122,16 +1138,18 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
* parse this path.
* 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE).
*/
- rs->Flags = 0;
+ rs->Flags = cpu_to_le32(is_absolute ? 0 : SYMLINK_FLAG_RELATIVE);
- memmove(rp_name + err + 4, rp_name, sizeof(short) * err);
+ memmove(rp_name + err + (is_absolute ? 4 : 0), rp_name, sizeof(short) * err);
- /* Decorate SubstituteName. */
- rp_name += err;
- rp_name[0] = cpu_to_le16('\\');
- rp_name[1] = cpu_to_le16('?');
- rp_name[2] = cpu_to_le16('?');
- rp_name[3] = cpu_to_le16('\\');
+ if (is_absolute) {
+ /* Decorate SubstituteName. */
+ rp_name += err;
+ rp_name[0] = cpu_to_le16('\\');
+ rp_name[1] = cpu_to_le16('?');
+ rp_name[2] = cpu_to_le16('?');
+ rp_name[3] = cpu_to_le16('\\');
+ }
return rp;
out:
@@ -1260,6 +1278,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
goto out1;
}
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(dir_ni))) {
+ err = -EINVAL;
+ goto out2;
+ }
+
if (unlikely(ntfs3_forced_shutdown(sb))) {
err = -EIO;
goto out2;
@@ -1350,7 +1374,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
fname->dup.a_time = std5->cr_time;
fname->dup.alloc_size = fname->dup.data_size = 0;
fname->dup.fa = std5->fa;
- fname->dup.ea_size = fname->dup.reparse = 0;
+ fname->dup.extend_data = S_ISLNK(mode) ? IO_REPARSE_TAG_SYMLINK : 0;
dsize = le16_to_cpu(new_de->key_size);
asize = ALIGN(SIZEOF_RESIDENT + dsize, 8);
@@ -1590,27 +1614,29 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
inode->i_flags |= S_NOSEC;
}
- /*
- * ntfs_init_acl and ntfs_save_wsl_perm update extended attribute.
- * The packed size of extended attribute is stored in direntry too.
- * 'fname' here points to inside new_de.
- */
- err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size);
- if (err)
- goto out6;
+ if (!S_ISLNK(mode)) {
+ /*
+ * ntfs_init_acl and ntfs_save_wsl_perm update extended attribute.
+ * The packed size of extended attribute is stored in direntry too.
+ * 'fname' here points to inside new_de.
+ */
+ err = ntfs_save_wsl_perm(inode, &fname->dup.extend_data);
+ if (err)
+ goto out6;
- /*
- * update ea_size in file_name attribute too.
- * Use ni_find_attr cause layout of MFT record may be changed
- * in ntfs_init_acl and ntfs_save_wsl_perm.
- */
- attr = ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL);
- if (attr) {
- struct ATTR_FILE_NAME *fn;
+ /*
+ * update ea_size in file_name attribute too.
+ * Use ni_find_attr cause layout of MFT record may be changed
+ * in ntfs_init_acl and ntfs_save_wsl_perm.
+ */
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL);
+ if (attr) {
+ struct ATTR_FILE_NAME *fn;
- fn = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
- if (fn)
- fn->dup.ea_size = fname->dup.ea_size;
+ fn = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME);
+ if (fn)
+ fn->dup.extend_data = fname->dup.extend_data;
+ }
}
/* We do not need to update parent directory later */
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index b807744fc6a9..82c8ae56beee 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -171,6 +171,10 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
return -EIO;
@@ -191,6 +195,10 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
{
u32 size = strlen(symname);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ntfs_i(dir))))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
return -EIO;
@@ -216,6 +224,10 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
return -EIO;
@@ -244,7 +256,7 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
struct ntfs_inode *ni = ntfs_i(inode);
struct inode *new_inode = d_inode(new_dentry);
struct NTFS_DE *de, *new_de;
- bool is_same, is_bad;
+ bool is_same;
/*
* de - memory of PATH_MAX bytes:
* [0-1024) - original name (dentry->d_name)
@@ -256,6 +268,10 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
1024);
static_assert(PATH_MAX >= 4 * 1024);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(sb)))
return -EIO;
@@ -313,12 +329,8 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
if (dir_ni != new_dir_ni)
ni_lock_dir2(new_dir_ni);
- is_bad = false;
- err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad);
- if (is_bad) {
- /* Restore after failed rename failed too. */
- _ntfs_bad_inode(inode);
- } else if (!err) {
+ err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de);
+ if (!err) {
simple_rename_timestamp(dir, dentry, new_dir, new_dentry);
mark_inode_dirty(inode);
mark_inode_dirty(dir);
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 1ff13b6f9613..552b97905813 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -561,8 +561,7 @@ struct NTFS_DUP_INFO {
__le64 alloc_size; // 0x20: Data attribute allocated size, multiple of cluster size.
__le64 data_size; // 0x28: Data attribute size <= Dataalloc_size.
enum FILE_ATTRIBUTE fa; // 0x30: Standard DOS attributes & more.
- __le16 ea_size; // 0x34: Packed EAs.
- __le16 reparse; // 0x36: Used by Reparse.
+ __le32 extend_data; // 0x34: Extended data.
}; // 0x38
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 36b8052660d5..1296e6fcc779 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -377,6 +377,13 @@ struct ntfs_inode {
*/
u8 mi_loaded;
+ /*
+ * Use this field to avoid any write(s).
+ * If inode is bad during initialization - use make_bad_inode
+ * If inode is bad during operations - use this field
+ */
+ u8 ni_bad;
+
union {
struct ntfs_index dir;
struct {
@@ -577,8 +584,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
struct NTFS_DE *de);
int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
- struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de,
- bool *is_bad);
+ struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de);
bool ni_is_dirty(struct inode *inode);
@@ -702,10 +708,12 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
int ntfs_set_size(struct inode *inode, u64 new_size);
int ntfs_get_block(struct inode *inode, sector_t vbn,
struct buffer_head *bh_result, int create);
-int ntfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, u32 len, struct folio **foliop, void **fsdata);
-int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
- u32 len, u32 copied, struct folio *folio, void *fsdata);
+int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, u32 len, struct folio **foliop,
+ void **fsdata);
+int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, u32 len, u32 copied, struct folio *folio,
+ void *fsdata);
int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
int ntfs_sync_inode(struct inode *inode);
int inode_read_data(struct inode *inode, void *data, size_t bytes);
@@ -874,7 +882,7 @@ int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry);
ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
extern const struct xattr_handler *const ntfs_xattr_handlers[];
-int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size);
+int ntfs_save_wsl_perm(struct inode *inode, __le32 *ea_size);
void ntfs_get_wsl_perm(struct inode *inode);
/* globals from lznt.c */
@@ -1025,6 +1033,11 @@ static inline bool is_compressed(const struct ntfs_inode *ni)
(ni->ni_flags & NI_FLAG_COMPRESSED_MASK);
}
+static inline bool is_bad_ni(const struct ntfs_inode *ni)
+{
+ return ni->ni_bad;
+}
+
static inline int ni_ext_compress_bits(const struct ntfs_inode *ni)
{
return 0xb + (ni->ni_flags & NI_FLAG_COMPRESSED_MASK);
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 920a1ab47b63..ddff94c091b8 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -1223,7 +1223,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_export_op = &ntfs_export_ops;
sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec
sb->s_xattr = ntfs_xattr_handlers;
- sb->s_d_op = options->nocase ? &ntfs_dentry_ops : NULL;
+ if (options->nocase)
+ set_default_d_op(sb, &ntfs_dentry_ops);
options->nls = ntfs_load_nls(options->nls_name);
if (IS_ERR(options->nls)) {
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index e0055dcf8fe3..e519e21596a7 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -313,7 +313,7 @@ out:
static noinline int ntfs_set_ea(struct inode *inode, const char *name,
size_t name_len, const void *value,
size_t val_size, int flags, bool locked,
- __le16 *ea_size)
+ __le32 *ea_size)
{
struct ntfs_inode *ni = ntfs_i(inode);
struct ntfs_sb_info *sbi = ni->mi.sbi;
@@ -522,7 +522,7 @@ update_ea:
if (ea_info.size_pack != size_pack)
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
if (ea_size)
- *ea_size = ea_info.size_pack;
+ *ea_size = ea_info.size;
mark_inode_dirty(&ni->vfs_inode);
out:
@@ -552,6 +552,10 @@ struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
int err;
void *buf;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return ERR_PTR(-EINVAL);
+
/* Allocate PATH_MAX bytes. */
buf = __getname();
if (!buf)
@@ -600,6 +604,10 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
int flags;
umode_t mode;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ntfs_i(inode))))
+ return -EINVAL;
+
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
@@ -730,6 +738,10 @@ ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct ntfs_inode *ni = ntfs_i(inode);
ssize_t ret;
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (!(ni->ni_flags & NI_FLAG_EA)) {
/* no xattr in file */
return 0;
@@ -751,6 +763,10 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ /* Avoid any operation if inode is bad. */
+ if (unlikely(is_bad_ni(ni)))
+ return -EINVAL;
+
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
@@ -950,7 +966,7 @@ out:
*
* save uid/gid/mode in xattr
*/
-int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size)
+int ntfs_save_wsl_perm(struct inode *inode, __le32 *ea_size)
{
int err;
__le32 value;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..2203438738f6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1856,7 +1856,8 @@ out:
return ret;
}
-static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
+static int ocfs2_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -2047,7 +2048,8 @@ out:
return copied;
}
-static int ocfs2_write_end(struct file *file, struct address_space *mapping,
+static int ocfs2_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3d2533950bae..53daa4482406 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1962,7 +1962,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
- sb->s_d_op = &ocfs2_dentry_ops;
+ set_default_d_op(sb, &ocfs2_dentry_ops);
sb->s_export_op = &ocfs2_export_ops;
sb->s_qcop = &dquot_quotactl_sysfile_ops;
sb->dq_op = &ocfs2_quota_operations;
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 98358d405b6a..8d70f816b0c9 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -310,9 +310,10 @@ static void omfs_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int omfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int omfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
diff --git a/fs/open.c b/fs/open.c
index 7828234a7caa..b29d1e077164 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1204,14 +1204,11 @@ struct file *kernel_file_open(const struct path *path, int flags,
if (IS_ERR(f))
return f;
- f->f_path = *path;
- error = do_dentry_open(f, NULL);
+ error = vfs_open(path, f);
if (error) {
fput(f);
return ERR_PTR(error);
}
-
- fsnotify_open(f);
return f;
}
EXPORT_SYMBOL_GPL(kernel_file_open);
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 08a6f372a352..a7ab63776735 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -285,9 +285,10 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
return ret;
}
-static int orangefs_write_begin(struct file *file,
- struct address_space *mapping, loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int orangefs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, struct folio **foliop,
+ void **fsdata)
{
struct orangefs_write_range *wr;
struct folio *folio;
@@ -340,9 +341,10 @@ okay:
return 0;
}
-static int orangefs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct folio *folio,
- void *fsdata)
+static int orangefs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = folio->mapping->host;
loff_t last_pos = pos + copied;
@@ -372,7 +374,7 @@ static int orangefs_write_end(struct file *file, struct address_space *mapping,
folio_unlock(folio);
folio_put(folio);
- mark_inode_dirty_sync(file_inode(file));
+ mark_inode_dirty_sync(file_inode(iocb->ki_filp));
return copied;
}
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 64ca9498f550..f3da840758e7 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -416,7 +416,7 @@ static int orangefs_fill_sb(struct super_block *sb,
sb->s_xattr = orangefs_xattr_handlers;
sb->s_magic = ORANGEFS_SUPER_MAGIC;
sb->s_op = &orangefs_s_ops;
- sb->s_d_op = &orangefs_dentry_operations;
+ set_default_d_op(sb, &orangefs_dentry_operations);
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d7310fcf3888..c4d7c281d473 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -517,15 +517,12 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
/*
* Create and install index entry.
- *
- * Caller must hold i_mutex on indexdir.
*/
static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
struct dentry *upper)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
- struct inode *dir = d_inode(indexdir);
struct dentry *index = NULL;
struct dentry *temp = NULL;
struct qstr name = { };
@@ -559,16 +556,20 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
if (err)
goto out;
+ err = ovl_parent_lock(indexdir, temp);
+ if (err)
+ goto out;
index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
} else {
- err = ovl_do_rename(ofs, dir, temp, dir, index, 0);
+ err = ovl_do_rename(ofs, indexdir, temp, indexdir, index, 0);
dput(index);
}
+ ovl_parent_unlock(indexdir);
out:
if (err)
- ovl_cleanup(ofs, dir, temp);
+ ovl_cleanup(ofs, indexdir, temp);
dput(temp);
free_name:
kfree(name.name);
@@ -762,7 +763,6 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
{
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *inode;
- struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper, *trap;
struct ovl_cu_creds cc;
@@ -779,9 +779,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
return err;
ovl_start_write(c->dentry);
- inode_lock(wdir);
temp = ovl_create_temp(ofs, c->workdir, &cattr);
- inode_unlock(wdir);
ovl_end_write(c->dentry);
ovl_revert_cu_creds(&cc);
@@ -794,45 +792,47 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
*/
path.dentry = temp;
err = ovl_copy_up_data(c, &path);
+ ovl_start_write(c->dentry);
+ if (err)
+ goto cleanup_unlocked;
+
+ if (S_ISDIR(c->stat.mode) && c->indexed) {
+ err = ovl_create_index(c->dentry, c->origin_fh, temp);
+ if (err)
+ goto cleanup_unlocked;
+ }
+
/*
* We cannot hold lock_rename() throughout this helper, because of
* lock ordering with sb_writers, which shouldn't be held when calling
* ovl_copy_up_data(), so lock workdir and destdir and make sure that
* temp wasn't moved before copy up completion or cleanup.
*/
- ovl_start_write(c->dentry);
trap = lock_rename(c->workdir, c->destdir);
if (trap || temp->d_parent != c->workdir) {
/* temp or workdir moved underneath us? abort without cleanup */
dput(temp);
err = -EIO;
- if (IS_ERR(trap))
- goto out;
- goto unlock;
- } else if (err) {
- goto cleanup;
+ if (!IS_ERR(trap))
+ unlock_rename(c->workdir, c->destdir);
+ goto out;
}
err = ovl_copy_up_metadata(c, temp);
if (err)
goto cleanup;
- if (S_ISDIR(c->stat.mode) && c->indexed) {
- err = ovl_create_index(c->dentry, c->origin_fh, temp);
- if (err)
- goto cleanup;
- }
-
upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
c->destname.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto cleanup;
- err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0);
+ err = ovl_do_rename(ofs, c->workdir, temp, c->destdir, upper, 0);
+ unlock_rename(c->workdir, c->destdir);
dput(upper);
if (err)
- goto cleanup;
+ goto cleanup_unlocked;
inode = d_inode(c->dentry);
if (c->metacopy_digest)
@@ -846,17 +846,17 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_inode_update(inode, temp);
if (S_ISDIR(inode->i_mode))
ovl_set_flag(OVL_WHITEOUTS, inode);
-unlock:
- unlock_rename(c->workdir, c->destdir);
out:
ovl_end_write(c->dentry);
return err;
cleanup:
- ovl_cleanup(ofs, wdir, temp);
+ unlock_rename(c->workdir, c->destdir);
+cleanup_unlocked:
+ ovl_cleanup(ofs, c->workdir, temp);
dput(temp);
- goto unlock;
+ goto out;
}
/* Copyup using O_TMPFILE which does not require cross dir locking */
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index fe493f3ed6b6..70b8687dc45e 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,7 +24,8 @@ MODULE_PARM_DESC(redirect_max,
static int ovl_set_redirect(struct dentry *dentry, bool samedir);
-int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry)
+static int ovl_cleanup_locked(struct ovl_fs *ofs, struct inode *wdir,
+ struct dentry *wdentry)
{
int err;
@@ -43,6 +44,21 @@ int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry)
return err;
}
+int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir,
+ struct dentry *wdentry)
+{
+ int err;
+
+ err = ovl_parent_lock(workdir, wdentry);
+ if (err)
+ return err;
+
+ ovl_cleanup_locked(ofs, workdir->d_inode, wdentry);
+ ovl_parent_unlock(workdir);
+
+ return 0;
+}
+
struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
{
struct dentry *temp;
@@ -62,7 +78,6 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
return temp;
}
-/* caller holds i_mutex on workdir */
static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
{
int err;
@@ -70,47 +85,52 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
struct dentry *workdir = ofs->workdir;
struct inode *wdir = workdir->d_inode;
+ guard(mutex)(&ofs->whiteout_lock);
+
if (!ofs->whiteout) {
+ inode_lock_nested(wdir, I_MUTEX_PARENT);
whiteout = ovl_lookup_temp(ofs, workdir);
- if (IS_ERR(whiteout))
- goto out;
-
- err = ovl_do_whiteout(ofs, wdir, whiteout);
- if (err) {
- dput(whiteout);
- whiteout = ERR_PTR(err);
- goto out;
+ if (!IS_ERR(whiteout)) {
+ err = ovl_do_whiteout(ofs, wdir, whiteout);
+ if (err) {
+ dput(whiteout);
+ whiteout = ERR_PTR(err);
+ }
}
+ inode_unlock(wdir);
+ if (IS_ERR(whiteout))
+ return whiteout;
ofs->whiteout = whiteout;
}
if (!ofs->no_shared_whiteout) {
+ inode_lock_nested(wdir, I_MUTEX_PARENT);
whiteout = ovl_lookup_temp(ofs, workdir);
- if (IS_ERR(whiteout))
- goto out;
-
- err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
- if (!err)
- goto out;
-
- if (err != -EMLINK) {
- pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
- ofs->whiteout->d_inode->i_nlink, err);
+ if (!IS_ERR(whiteout)) {
+ err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
+ if (err) {
+ dput(whiteout);
+ whiteout = ERR_PTR(err);
+ }
+ }
+ inode_unlock(wdir);
+ if (!IS_ERR(whiteout))
+ return whiteout;
+ if (PTR_ERR(whiteout) != -EMLINK) {
+ pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%lu)\n",
+ ofs->whiteout->d_inode->i_nlink,
+ PTR_ERR(whiteout));
ofs->no_shared_whiteout = true;
}
- dput(whiteout);
}
whiteout = ofs->whiteout;
ofs->whiteout = NULL;
-out:
return whiteout;
}
-/* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
struct dentry *dentry)
{
- struct inode *wdir = ofs->workdir->d_inode;
struct dentry *whiteout;
int err;
int flags = 0;
@@ -123,24 +143,29 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
if (d_is_dir(dentry))
flags = RENAME_EXCHANGE;
- err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags);
+ err = ovl_lock_rename_workdir(ofs->workdir, whiteout, dir, dentry);
+ if (!err) {
+ err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags);
+ unlock_rename(ofs->workdir, dir);
+ }
if (err)
goto kill_whiteout;
if (flags)
- ovl_cleanup(ofs, wdir, dentry);
+ ovl_cleanup(ofs, ofs->workdir, dentry);
out:
dput(whiteout);
return err;
kill_whiteout:
- ovl_cleanup(ofs, wdir, whiteout);
+ ovl_cleanup(ofs, ofs->workdir, whiteout);
goto out;
}
-struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
+struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent,
struct dentry *newdentry, struct ovl_cattr *attr)
{
+ struct inode *dir = parent->d_inode;
int err;
if (IS_ERR(newdentry))
@@ -199,8 +224,12 @@ out:
struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr)
{
- return ovl_create_real(ofs, d_inode(workdir),
- ovl_lookup_temp(ofs, workdir), attr);
+ struct dentry *ret;
+ inode_lock(workdir->d_inode);
+ ret = ovl_create_real(ofs, workdir,
+ ovl_lookup_temp(ofs, workdir), attr);
+ inode_unlock(workdir->d_inode);
+ return ret;
}
static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
@@ -303,13 +332,13 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
int err;
inode_lock_nested(udir, I_MUTEX_PARENT);
- newdentry = ovl_create_real(ofs, udir,
+ newdentry = ovl_create_real(ofs, upperdir,
ovl_lookup_upper(ofs, dentry->d_name.name,
upperdir, dentry->d_name.len),
attr);
- err = PTR_ERR(newdentry);
+ inode_unlock(udir);
if (IS_ERR(newdentry))
- goto out_unlock;
+ return PTR_ERR(newdentry);
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
!ovl_allow_offline_changes(ofs)) {
@@ -321,14 +350,12 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink, NULL);
if (err)
goto out_cleanup;
-out_unlock:
- inode_unlock(udir);
- return err;
+ return 0;
out_cleanup:
- ovl_cleanup(ofs, udir, newdentry);
+ ovl_cleanup(ofs, upperdir, newdentry);
dput(newdentry);
- goto out_unlock;
+ return err;
}
static struct dentry *ovl_clear_empty(struct dentry *dentry,
@@ -336,9 +363,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
struct path upperpath;
struct dentry *upper;
struct dentry *opaquedir;
@@ -348,27 +373,25 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (WARN_ON(!workdir))
return ERR_PTR(-EROFS);
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out;
-
ovl_path_upper(dentry, &upperpath);
err = vfs_getattr(&upperpath, &stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
- goto out_unlock;
+ goto out;
err = -ESTALE;
if (!S_ISDIR(stat.mode))
- goto out_unlock;
+ goto out;
upper = upperpath.dentry;
- if (upper->d_parent->d_inode != udir)
- goto out_unlock;
opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode));
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
- goto out_unlock;
+ goto out;
+
+ err = ovl_lock_rename_workdir(workdir, opaquedir, upperdir, upper);
+ if (err)
+ goto out_cleanup_unlocked;
err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir);
if (err)
@@ -384,13 +407,13 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (err)
goto out_cleanup;
- err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+ err = ovl_do_rename(ofs, workdir, opaquedir, upperdir, upper, RENAME_EXCHANGE);
+ unlock_rename(workdir, upperdir);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
ovl_cleanup_whiteouts(ofs, upper, list);
- ovl_cleanup(ofs, wdir, upper);
- unlock_rename(workdir, upperdir);
+ ovl_cleanup(ofs, workdir, upper);
/* dentry's upper doesn't match now, get rid of it */
d_drop(dentry);
@@ -398,10 +421,10 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
return opaquedir;
out_cleanup:
- ovl_cleanup(ofs, wdir, opaquedir);
- dput(opaquedir);
-out_unlock:
unlock_rename(workdir, upperdir);
+out_cleanup_unlocked:
+ ovl_cleanup(ofs, workdir, opaquedir);
+ dput(opaquedir);
out:
return ERR_PTR(err);
}
@@ -420,9 +443,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
struct dentry *upper;
struct dentry *newdentry;
int err;
@@ -439,15 +460,11 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
return err;
}
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out;
-
- upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out;
err = -ESTALE;
if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper))
@@ -458,6 +475,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (IS_ERR(newdentry))
goto out_dput;
+ err = ovl_lock_rename_workdir(workdir, newdentry, upperdir, upper);
+ if (err)
+ goto out_cleanup_unlocked;
+
/*
* mode could have been mutilated due to umask (e.g. sgid directory)
*/
@@ -491,27 +512,27 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out_cleanup;
- err = ovl_do_rename(ofs, wdir, newdentry, udir, upper,
+ err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper,
RENAME_EXCHANGE);
+ unlock_rename(workdir, upperdir);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
- ovl_cleanup(ofs, wdir, upper);
+ ovl_cleanup(ofs, workdir, upper);
} else {
- err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0);
+ err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, 0);
+ unlock_rename(workdir, upperdir);
if (err)
- goto out_cleanup;
+ goto out_cleanup_unlocked;
}
ovl_dir_modified(dentry->d_parent, false);
err = ovl_instantiate(dentry, inode, newdentry, hardlink, NULL);
if (err) {
- ovl_cleanup(ofs, udir, newdentry);
+ ovl_cleanup(ofs, upperdir, newdentry);
dput(newdentry);
}
out_dput:
dput(upper);
-out_unlock:
- unlock_rename(workdir, upperdir);
out:
if (!hardlink) {
posix_acl_release(acl);
@@ -520,7 +541,9 @@ out:
return err;
out_cleanup:
- ovl_cleanup(ofs, wdir, newdentry);
+ unlock_rename(workdir, upperdir);
+out_cleanup_unlocked:
+ ovl_cleanup(ofs, workdir, newdentry);
dput(newdentry);
goto out_dput;
}
@@ -757,15 +780,11 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out;
}
- err = ovl_lock_rename_workdir(workdir, upperdir);
- if (err)
- goto out_dput;
-
- upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_lookup_upper_unlocked(ofs, dentry->d_name.name, upperdir,
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out_dput;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
@@ -774,17 +793,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out_dput_upper;
}
- err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
- if (err)
- goto out_d_drop;
+ err = ovl_cleanup_and_whiteout(ofs, upperdir, upper);
+ if (!err)
+ ovl_dir_modified(dentry->d_parent, true);
- ovl_dir_modified(dentry->d_parent, true);
-out_d_drop:
d_drop(dentry);
out_dput_upper:
dput(upper);
-out_unlock:
- unlock_rename(workdir, upperdir);
out_dput:
dput(opaquedir);
out:
@@ -1069,9 +1084,9 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
int err;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
- struct dentry *olddentry;
- struct dentry *newdentry;
- struct dentry *trap;
+ struct dentry *olddentry = NULL;
+ struct dentry *newdentry = NULL;
+ struct dentry *trap, *de;
bool old_opaque;
bool new_opaque;
bool cleanup_whiteout = false;
@@ -1184,21 +1199,23 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
goto out_revert_creds;
}
- olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
- old->d_name.len);
- err = PTR_ERR(olddentry);
- if (IS_ERR(olddentry))
+ de = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
+ old->d_name.len);
+ err = PTR_ERR(de);
+ if (IS_ERR(de))
goto out_unlock;
+ olddentry = de;
err = -ESTALE;
if (!ovl_matches_upper(old, olddentry))
- goto out_dput_old;
+ goto out_unlock;
- newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
- new->d_name.len);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out_dput_old;
+ de = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
+ new->d_name.len);
+ err = PTR_ERR(de);
+ if (IS_ERR(de))
+ goto out_unlock;
+ newdentry = de;
old_opaque = ovl_dentry_is_opaque(old);
new_opaque = ovl_dentry_is_opaque(new);
@@ -1207,28 +1224,28 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
- goto out_dput;
+ goto out_unlock;
} else {
if (!ovl_matches_upper(new, newdentry))
- goto out_dput;
+ goto out_unlock;
}
} else {
if (!d_is_negative(newdentry)) {
if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
- goto out_dput;
+ goto out_unlock;
} else {
if (flags & RENAME_EXCHANGE)
- goto out_dput;
+ goto out_unlock;
}
}
if (olddentry == trap)
- goto out_dput;
+ goto out_unlock;
if (newdentry == trap)
- goto out_dput;
+ goto out_unlock;
if (olddentry->d_inode == newdentry->d_inode)
- goto out_dput;
+ goto out_unlock;
err = 0;
if (ovl_type_merge_or_lower(old))
@@ -1236,7 +1253,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
if (err)
- goto out_dput;
+ goto out_unlock;
if (!overwrite && ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
@@ -1244,15 +1261,16 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
ovl_type_merge(old->d_parent))
err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
if (err)
- goto out_dput;
+ goto out_unlock;
- err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry,
- new_upperdir->d_inode, newdentry, flags);
+ err = ovl_do_rename(ofs, old_upperdir, olddentry,
+ new_upperdir, newdentry, flags);
+ unlock_rename(new_upperdir, old_upperdir);
if (err)
- goto out_dput;
+ goto out_revert_creds;
if (cleanup_whiteout)
- ovl_cleanup(ofs, old_upperdir->d_inode, newdentry);
+ ovl_cleanup(ofs, old_upperdir, newdentry);
if (overwrite && d_inode(new)) {
if (new_is_dir)
@@ -1271,12 +1289,6 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
if (d_inode(new) && ovl_dentry_upper(new))
ovl_copyattr(d_inode(new));
-out_dput:
- dput(newdentry);
-out_dput_old:
- dput(olddentry);
-out_unlock:
- unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
ovl_revert_creds(old_cred);
if (update_nlink)
@@ -1284,9 +1296,15 @@ out_revert_creds:
else
ovl_drop_write(old);
out:
+ dput(newdentry);
+ dput(olddentry);
dput(opaquedir);
ovl_cache_free(&list);
return err;
+
+out_unlock:
+ unlock_rename(new_upperdir, old_upperdir);
+ goto out_revert_creds;
}
static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index dfea7bd800cb..f5b8877d5fe2 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
- realfile = backing_file_open(file_user_path((struct file *) file),
+ realfile = backing_file_open(file_user_path(file),
flags, realpath, current_cred());
}
ovl_revert_creds(old_cred);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 0b8b28392eb7..76d6248b625e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -230,13 +230,26 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
struct dentry **ret, bool drop_negative)
{
struct ovl_fs *ofs = OVL_FS(d->sb);
- struct dentry *this;
+ struct dentry *this = NULL;
+ const char *warn;
struct path path;
int err;
bool last_element = !post[0];
bool is_upper = d->layer->idx == 0;
char val;
+ /*
+ * We allow filesystems that are case-folding capable but deny composing
+ * ovl stack from case-folded directories. If someone has enabled case
+ * folding on a directory on underlying layer, the warranty of the ovl
+ * stack is voided.
+ */
+ if (ovl_dentry_casefolded(base)) {
+ warn = "case folded parent";
+ err = -ESTALE;
+ goto out_warn;
+ }
+
this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
if (IS_ERR(this)) {
err = PTR_ERR(this);
@@ -246,10 +259,17 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out_err;
}
+ if (ovl_dentry_casefolded(this)) {
+ warn = "case folded child";
+ err = -EREMOTE;
+ goto out_warn;
+ }
+
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
+ warn = "unsupported object type";
err = -EREMOTE;
- goto out_err;
+ goto out_warn;
}
path.dentry = this;
@@ -283,8 +303,9 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
} else {
if (ovl_lookup_trap_inode(d->sb, this)) {
/* Caught in a trap of overlapping layers */
+ warn = "overlapping layers";
err = -ELOOP;
- goto out_err;
+ goto out_warn;
}
if (last_element)
@@ -316,6 +337,10 @@ put_and_out:
this = NULL;
goto out;
+out_warn:
+ pr_warn_ratelimited("failed lookup in %s (%pd2, name='%.*s', err=%i): %s\n",
+ is_upper ? "upper" : "lower", base,
+ namelen, name, err, warn);
out_err:
dput(this);
return err;
@@ -1393,7 +1418,7 @@ out:
bool ovl_lower_positive(struct dentry *dentry)
{
struct ovl_entry *poe = OVL_E(dentry->d_parent);
- struct qstr *name = &dentry->d_name;
+ const struct qstr *name = &dentry->d_name;
const struct cred *old_cred;
unsigned int i;
bool positive = false;
@@ -1416,9 +1441,15 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
+ /*
+ * We need to make a non-const copy of dentry->d_name,
+ * because lookup_one_positive_unlocked() will hash name
+ * with parentpath base, which is on another (lower fs).
+ */
this = lookup_one_positive_unlocked(
mnt_idmap(parentpath->layer->mnt),
- name, parentpath->dentry);
+ &QSTR_LEN(name->name, name->len),
+ parentpath->dentry);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
case -ENOENT:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8baaba0a3fe5..d0518bdd1134 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
struct dentry *dentry,
umode_t mode)
{
- dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
- pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry));
- return dentry;
+ struct dentry *ret;
+
+ ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
+ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret));
+ return ret;
}
static inline int ovl_do_mknod(struct ovl_fs *ofs,
@@ -353,19 +355,19 @@ static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry,
return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name);
}
-static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir,
- struct dentry *olddentry, struct inode *newdir,
+static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
+ struct dentry *olddentry, struct dentry *newdir,
struct dentry *newdentry, unsigned int flags)
{
int err;
struct renamedata rd = {
.old_mnt_idmap = ovl_upper_mnt_idmap(ofs),
- .old_dir = olddir,
- .old_dentry = olddentry,
+ .old_parent = olddir,
+ .old_dentry = olddentry,
.new_mnt_idmap = ovl_upper_mnt_idmap(ofs),
- .new_dir = newdir,
- .new_dentry = newdentry,
- .flags = flags,
+ .new_parent = newdir,
+ .new_dentry = newdentry,
+ .flags = flags,
};
pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
@@ -405,6 +407,15 @@ static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs,
return lookup_one(ovl_upper_mnt_idmap(ofs), &QSTR_LEN(name, len), base);
}
+static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs,
+ const char *name,
+ struct dentry *base,
+ int len)
+{
+ return lookup_one_unlocked(ovl_upper_mnt_idmap(ofs),
+ &QSTR_LEN(name, len), base);
+}
+
static inline bool ovl_open_flags_need_copy_up(int flags)
{
if (!flags)
@@ -414,6 +425,11 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
}
/* util.c */
+int ovl_parent_lock(struct dentry *parent, struct dentry *child);
+static inline void ovl_parent_unlock(struct dentry *parent)
+{
+ inode_unlock(parent->d_inode);
+}
int ovl_get_write_access(struct dentry *dentry);
void ovl_put_write_access(struct dentry *dentry);
void ovl_start_write(struct dentry *dentry);
@@ -446,6 +462,12 @@ void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry,
void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
struct ovl_entry *oe, unsigned int mask);
bool ovl_dentry_weird(struct dentry *dentry);
+
+static inline bool ovl_dentry_casefolded(struct dentry *dentry)
+{
+ return sb_has_encoding(dentry->d_sb) && IS_CASEFOLDED(d_inode(dentry));
+}
+
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
@@ -533,7 +555,8 @@ bool ovl_is_inuse(struct dentry *dentry);
bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry);
void ovl_nlink_end(struct dentry *dentry);
-int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work,
+ struct dentry *upperdir, struct dentry *upper);
int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path,
struct ovl_metacopy *data);
int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d,
@@ -721,7 +744,7 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
int ovl_check_d_type_supported(const struct path *realpath);
-int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
struct vfsmount *mnt, struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct ovl_fs *ofs);
@@ -826,7 +849,7 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
@@ -838,9 +861,9 @@ struct ovl_cattr {
#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
struct dentry *ovl_create_real(struct ovl_fs *ofs,
- struct inode *dir, struct dentry *newdentry,
+ struct dentry *parent, struct dentry *newdentry,
struct ovl_cattr *attr);
-int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry);
+int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry);
struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir);
struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index afb7762f873f..4c1bae935ced 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -88,6 +88,7 @@ struct ovl_fs {
/* Shared whiteout cache */
struct dentry *whiteout;
bool no_shared_whiteout;
+ struct mutex whiteout_lock;
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq;
};
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index f42488c01957..f4e7fff909ac 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -282,13 +282,11 @@ static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
return invalfc(fc, "%s is not a directory", name);
/*
- * Root dentries of case-insensitive capable filesystems might
- * not have the dentry operations set, but still be incompatible
- * with overlayfs. Check explicitly to prevent post-mount
- * failures.
+ * Allow filesystems that are case-folding capable but deny composing
+ * ovl stack from case-folded directories.
*/
- if (sb_has_encoding(path->mnt->mnt_sb))
- return invalfc(fc, "case-insensitive capable filesystem on %s not supported", name);
+ if (ovl_dentry_casefolded(path->dentry))
+ return invalfc(fc, "case-insensitive directory on %s not supported", name);
if (ovl_dentry_weird(path->dentry))
return invalfc(fc, "filesystem on %s not supported", name);
@@ -797,6 +795,8 @@ int ovl_init_fs_context(struct fs_context *fc)
fc->s_fs_info = ofs;
fc->fs_private = ctx;
fc->ops = &ovl_context_ops;
+
+ mutex_init(&ofs->whiteout_lock);
return 0;
out_err:
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 474c80d210d1..b65cdfce31ce 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1034,14 +1034,13 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
{
struct ovl_cache_entry *p;
- inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
if (WARN_ON(!p->is_whiteout || !p->is_upper))
continue;
- dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
+ dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len);
if (IS_ERR(dentry)) {
pr_err("lookup '%s/%.*s' failed (%i)\n",
upper->d_name.name, p->len, p->name,
@@ -1049,10 +1048,9 @@ void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
continue;
}
if (dentry->d_inode)
- ovl_cleanup(ofs, upper->d_inode, dentry);
+ ovl_cleanup(ofs, upper, dentry);
dput(dentry);
}
- inode_unlock(upper->d_inode);
}
static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
@@ -1098,7 +1096,6 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
int level)
{
int err;
- struct inode *dir = path->dentry->d_inode;
LIST_HEAD(list);
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
@@ -1124,7 +1121,6 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
if (err)
goto out;
- inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
struct dentry *dentry;
@@ -1139,39 +1135,40 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa
err = -EINVAL;
break;
}
- dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
+ dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len);
if (IS_ERR(dentry))
continue;
if (dentry->d_inode)
- err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
+ err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
+ dentry, level);
dput(dentry);
if (err)
break;
}
- inode_unlock(dir);
out:
ovl_cache_free(&list);
return err;
}
-int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
+int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
struct vfsmount *mnt, struct dentry *dentry, int level)
{
int err;
- if (!d_is_dir(dentry) || level > 1) {
- return ovl_cleanup(ofs, dir, dentry);
- }
+ if (!d_is_dir(dentry) || level > 1)
+ return ovl_cleanup(ofs, parent, dentry);
- err = ovl_do_rmdir(ofs, dir, dentry);
+ err = ovl_parent_lock(parent, dentry);
+ if (err)
+ return err;
+ err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
+ ovl_parent_unlock(parent);
if (err) {
struct path path = { .mnt = mnt, .dentry = dentry };
- inode_unlock(dir);
err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
- inode_lock_nested(dir, I_MUTEX_PARENT);
if (!err)
- err = ovl_cleanup(ofs, dir, dentry);
+ err = ovl_cleanup(ofs, parent, dentry);
}
return err;
@@ -1182,7 +1179,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
int err;
struct dentry *indexdir = ofs->workdir;
struct dentry *index = NULL;
- struct inode *dir = indexdir->d_inode;
struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
LIST_HEAD(list);
struct ovl_cache_entry *p;
@@ -1196,7 +1192,6 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
if (err)
goto out;
- inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
if (p->name[0] == '.') {
if (p->len == 1)
@@ -1204,7 +1199,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
if (p->len == 2 && p->name[1] == '.')
continue;
}
- index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
+ index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
@@ -1212,7 +1207,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
}
/* Cleanup leftover from index create/cleanup attempt */
if (index->d_name.name[0] == '#') {
- err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
+ err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1);
if (err)
break;
goto next;
@@ -1222,7 +1217,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
goto next;
} else if (err == -ESTALE) {
/* Cleanup stale index entries */
- err = ovl_cleanup(ofs, dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
} else if (err != -ENOENT) {
/*
* Abort mount to avoid corrupting the index if
@@ -1235,10 +1230,10 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
* Whiteout orphan index to block future open by
* handle after overlay nlink dropped to zero.
*/
- err = ovl_cleanup_and_whiteout(ofs, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(ofs, dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
}
if (err)
@@ -1249,7 +1244,6 @@ next:
index = NULL;
}
dput(index);
- inode_unlock(dir);
out:
ovl_cache_free(&list);
if (err)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index e19940d649ca..df85a76597e9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -299,8 +299,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
int err;
bool retried = false;
- inode_lock_nested(dir, I_MUTEX_PARENT);
retry:
+ inode_lock_nested(dir, I_MUTEX_PARENT);
work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
if (!IS_ERR(work)) {
@@ -311,23 +311,24 @@ retry:
if (work->d_inode) {
err = -EEXIST;
+ inode_unlock(dir);
if (retried)
goto out_dput;
if (persist)
- goto out_unlock;
+ return work;
retried = true;
- err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0);
+ err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0);
dput(work);
- if (err == -EINVAL) {
- work = ERR_PTR(err);
- goto out_unlock;
- }
+ if (err == -EINVAL)
+ return ERR_PTR(err);
+
goto retry;
}
work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
+ inode_unlock(dir);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
@@ -365,11 +366,10 @@ retry:
if (err)
goto out_dput;
} else {
+ inode_unlock(dir);
err = PTR_ERR(work);
goto out_err;
}
-out_unlock:
- inode_unlock(dir);
return work;
out_dput:
@@ -377,8 +377,7 @@ out_dput:
out_err:
pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
ofs->config.workdir, name, -err);
- work = NULL;
- goto out_unlock;
+ return NULL;
}
static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs,
@@ -557,37 +556,42 @@ out:
static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
{
struct dentry *workdir = ofs->workdir;
- struct inode *dir = d_inode(workdir);
struct dentry *temp;
struct dentry *dest;
struct dentry *whiteout;
struct name_snapshot name;
int err;
- inode_lock_nested(dir, I_MUTEX_PARENT);
-
temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
err = PTR_ERR(temp);
if (IS_ERR(temp))
- goto out_unlock;
+ return err;
+ err = ovl_parent_lock(workdir, temp);
+ if (err) {
+ dput(temp);
+ return err;
+ }
dest = ovl_lookup_temp(ofs, workdir);
err = PTR_ERR(dest);
if (IS_ERR(dest)) {
dput(temp);
- goto out_unlock;
+ ovl_parent_unlock(workdir);
+ return err;
}
/* Name is inline and stable - using snapshot as a copy helper */
take_dentry_name_snapshot(&name, temp);
- err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT);
+ err = ovl_do_rename(ofs, workdir, temp, workdir, dest, RENAME_WHITEOUT);
+ ovl_parent_unlock(workdir);
if (err) {
if (err == -EINVAL)
err = 0;
goto cleanup_temp;
}
- whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len);
+ whiteout = ovl_lookup_upper_unlocked(ofs, name.name.name,
+ workdir, name.name.len);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
goto cleanup_temp;
@@ -596,18 +600,15 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
/* Best effort cleanup of whiteout and temp file */
if (err)
- ovl_cleanup(ofs, dir, whiteout);
+ ovl_cleanup(ofs, workdir, whiteout);
dput(whiteout);
cleanup_temp:
- ovl_cleanup(ofs, dir, temp);
+ ovl_cleanup(ofs, workdir, temp);
release_dentry_name_snapshot(&name);
dput(temp);
dput(dest);
-out_unlock:
- inode_unlock(dir);
-
return err;
}
@@ -621,8 +622,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
child = ovl_lookup_upper(ofs, name, parent, len);
if (!IS_ERR(child) && !child->d_inode)
- child = ovl_create_real(ofs, parent->d_inode, child,
- OVL_CATTR(mode));
+ child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode));
inode_unlock(parent->d_inode);
dput(parent);
@@ -1322,7 +1322,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
if (WARN_ON(fc->user_ns != current_user_ns()))
goto out_err;
- sb->s_d_op = &ovl_dentry_operations;
+ set_default_d_op(sb, &ovl_dentry_operations);
err = -ENOMEM;
if (!ofs->creator_cred)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index dcccb4b4a66c..71674b633bc4 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -206,10 +206,17 @@ bool ovl_dentry_weird(struct dentry *dentry)
if (!d_can_lookup(dentry) && !d_is_file(dentry) && !d_is_symlink(dentry))
return true;
- return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
- DCACHE_MANAGE_TRANSIT |
- DCACHE_OP_HASH |
- DCACHE_OP_COMPARE);
+ if (dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | DCACHE_MANAGE_TRANSIT))
+ return true;
+
+ /*
+ * Allow filesystems that are case-folding capable but deny composing
+ * ovl stack from case-folded directories.
+ */
+ if (sb_has_encoding(dentry->d_sb))
+ return IS_CASEFOLDED(d_inode(dentry));
+
+ return dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE);
}
enum ovl_path_type ovl_path_type(struct dentry *dentry)
@@ -1071,7 +1078,6 @@ static void ovl_cleanup_index(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
- struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
@@ -1107,21 +1113,18 @@ static void ovl_cleanup_index(struct dentry *dentry)
goto out;
}
- inode_lock_nested(dir, I_MUTEX_PARENT);
- index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
+ index = ovl_lookup_upper_unlocked(ofs, name.name, indexdir, name.len);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
- dir, index);
+ indexdir, index);
} else {
/* Cleanup orphan index entries */
- err = ovl_cleanup(ofs, dir, index);
+ err = ovl_cleanup(ofs, indexdir, index);
}
-
- inode_unlock(dir);
if (err)
goto fail;
@@ -1220,20 +1223,21 @@ void ovl_nlink_end(struct dentry *dentry)
ovl_inode_unlock(inode);
}
-int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work,
+ struct dentry *upperdir, struct dentry *upper)
{
struct dentry *trap;
- /* Workdir should not be the same as upperdir */
- if (workdir == upperdir)
- goto err;
-
/* Workdir should not be subdir of upperdir and vice versa */
trap = lock_rename(workdir, upperdir);
if (IS_ERR(trap))
goto err;
if (trap)
goto err_unlock;
+ if (work && work->d_parent != workdir)
+ goto err_unlock;
+ if (upper && upper->d_parent != upperdir)
+ goto err_unlock;
return 0;
@@ -1544,3 +1548,13 @@ void ovl_copyattr(struct inode *inode)
i_size_write(inode, i_size_read(realinode));
spin_unlock(&inode->i_lock);
}
+
+int ovl_parent_lock(struct dentry *parent, struct dentry *child)
+{
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ if (!child || child->d_parent == parent)
+ return 0;
+
+ inode_unlock(parent->d_inode);
+ return -EINVAL;
+}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index c1f0a067be40..4625e097e3a0 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -319,7 +319,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (!c)
return -ESRCH;
- if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) {
+ if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) {
task_lock(task);
if (task->mm)
kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags);
@@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
kinfo.pid = task_pid_vnr(task);
kinfo.mask |= PIDFD_INFO_PID;
- if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
+ if (kinfo.pid == 0 || kinfo.tgid == 0)
return -ESRCH;
copy_out:
diff --git a/fs/pipe.c b/fs/pipe.c
index 45077c37bad1..731622d0738d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -963,6 +963,11 @@ int create_pipe_files(struct file **res, int flags)
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
+
+ /* pipe groks IOCB_NOWAIT */
+ res[0]->f_mode |= FMODE_NOWAIT;
+ res[1]->f_mode |= FMODE_NOWAIT;
+
/*
* Disable permission and pre-content events, but enable legacy
* inotify events for legacy users.
@@ -997,9 +1002,6 @@ static int __do_pipe_flags(int *fd, struct file **files, int flags)
audit_fd_pair(fdr, fdw);
fd[0] = fdr;
fd[1] = fdw;
- /* pipe groks IOCB_NOWAIT */
- files[0]->f_mode |= FMODE_NOWAIT;
- files[1]->f_mode |= FMODE_NOWAIT;
return 0;
err_fdr:
diff --git a/fs/pnode.c b/fs/pnode.c
index ffd429b760d5..81f7599bdac4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -21,17 +21,12 @@ static inline struct mount *next_peer(struct mount *p)
static inline struct mount *first_slave(struct mount *p)
{
- return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
-}
-
-static inline struct mount *last_slave(struct mount *p)
-{
- return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+ return hlist_entry(p->mnt_slave_list.first, struct mount, mnt_slave);
}
static inline struct mount *next_slave(struct mount *p)
{
- return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
+ return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave);
}
static struct mount *get_peer_under_root(struct mount *mnt,
@@ -70,69 +65,90 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
return 0;
}
-static int do_make_slave(struct mount *mnt)
+static inline bool will_be_unmounted(struct mount *m)
{
- struct mount *master, *slave_mnt;
+ return m->mnt.mnt_flags & MNT_UMOUNT;
+}
- if (list_empty(&mnt->mnt_share)) {
- if (IS_MNT_SHARED(mnt)) {
- mnt_release_group_id(mnt);
- CLEAR_MNT_SHARED(mnt);
- }
- master = mnt->mnt_master;
- if (!master) {
- struct list_head *p = &mnt->mnt_slave_list;
- while (!list_empty(p)) {
- slave_mnt = list_first_entry(p,
- struct mount, mnt_slave);
- list_del_init(&slave_mnt->mnt_slave);
- slave_mnt->mnt_master = NULL;
- }
- return 0;
- }
- } else {
+static struct mount *propagation_source(struct mount *mnt)
+{
+ do {
struct mount *m;
- /*
- * slave 'mnt' to a peer mount that has the
- * same root dentry. If none is available then
- * slave it to anything that is available.
- */
- for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
- if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
- master = m;
- break;
- }
+ for (m = next_peer(mnt); m != mnt; m = next_peer(m)) {
+ if (!will_be_unmounted(m))
+ return m;
}
- list_del_init(&mnt->mnt_share);
- mnt->mnt_group_id = 0;
- CLEAR_MNT_SHARED(mnt);
+ mnt = mnt->mnt_master;
+ } while (mnt && will_be_unmounted(mnt));
+ return mnt;
+}
+
+static void transfer_propagation(struct mount *mnt, struct mount *to)
+{
+ struct hlist_node *p = NULL, *n;
+ struct mount *m;
+
+ hlist_for_each_entry_safe(m, n, &mnt->mnt_slave_list, mnt_slave) {
+ m->mnt_master = to;
+ if (!to)
+ hlist_del_init(&m->mnt_slave);
+ else
+ p = &m->mnt_slave;
}
- list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
- slave_mnt->mnt_master = master;
- list_move(&mnt->mnt_slave, &master->mnt_slave_list);
- list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
- mnt->mnt_master = master;
- return 0;
+ if (p)
+ hlist_splice_init(&mnt->mnt_slave_list, p, &to->mnt_slave_list);
}
/*
- * vfsmount lock must be held for write
+ * EXCL[namespace_sem]
*/
void change_mnt_propagation(struct mount *mnt, int type)
{
+ struct mount *m = mnt->mnt_master;
+
if (type == MS_SHARED) {
set_mnt_shared(mnt);
return;
}
- do_make_slave(mnt);
- if (type != MS_SLAVE) {
- list_del_init(&mnt->mnt_slave);
+ if (IS_MNT_SHARED(mnt)) {
+ m = propagation_source(mnt);
+ if (list_empty(&mnt->mnt_share)) {
+ mnt_release_group_id(mnt);
+ } else {
+ list_del_init(&mnt->mnt_share);
+ mnt->mnt_group_id = 0;
+ }
+ CLEAR_MNT_SHARED(mnt);
+ transfer_propagation(mnt, m);
+ }
+ hlist_del_init(&mnt->mnt_slave);
+ if (type == MS_SLAVE) {
+ mnt->mnt_master = m;
+ if (m)
+ hlist_add_head(&mnt->mnt_slave, &m->mnt_slave_list);
+ } else {
mnt->mnt_master = NULL;
if (type == MS_UNBINDABLE)
- mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
+ mnt->mnt_t_flags |= T_UNBINDABLE;
else
- mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
+ mnt->mnt_t_flags &= ~T_UNBINDABLE;
+ }
+}
+
+static struct mount *__propagation_next(struct mount *m,
+ struct mount *origin)
+{
+ while (1) {
+ struct mount *master = m->mnt_master;
+
+ if (master == origin->mnt_master) {
+ struct mount *next = next_peer(m);
+ return (next == origin) ? NULL : next;
+ } else if (m->mnt_slave.next)
+ return next_slave(m);
+
+ /* back at master */
+ m = master;
}
}
@@ -150,34 +166,24 @@ static struct mount *propagation_next(struct mount *m,
struct mount *origin)
{
/* are there any slaves of this mount? */
- if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
return first_slave(m);
- while (1) {
- struct mount *master = m->mnt_master;
-
- if (master == origin->mnt_master) {
- struct mount *next = next_peer(m);
- return (next == origin) ? NULL : next;
- } else if (m->mnt_slave.next != &master->mnt_slave_list)
- return next_slave(m);
-
- /* back at master */
- m = master;
- }
+ return __propagation_next(m, origin);
}
static struct mount *skip_propagation_subtree(struct mount *m,
struct mount *origin)
{
/*
- * Advance m such that propagation_next will not return
- * the slaves of m.
+ * Advance m past everything that gets propagation from it.
*/
- if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
- m = last_slave(m);
+ struct mount *p = __propagation_next(m, origin);
+
+ while (p && peers(m, p))
+ p = __propagation_next(p, origin);
- return m;
+ return p;
}
static struct mount *next_group(struct mount *m, struct mount *origin)
@@ -185,7 +191,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
while (1) {
while (1) {
struct mount *next;
- if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
return first_slave(m);
next = next_peer(m);
if (m->mnt_group_id == origin->mnt_group_id) {
@@ -198,7 +204,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
/* m is the last peer */
while (1) {
struct mount *master = m->mnt_master;
- if (m->mnt_slave.next != &master->mnt_slave_list)
+ if (m->mnt_slave.next)
return next_slave(m);
m = next_peer(master);
if (master->mnt_group_id == origin->mnt_group_id)
@@ -212,142 +218,113 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
}
}
-/* all accesses are serialized by namespace_sem */
-static struct mount *last_dest, *first_source, *last_source, *dest_master;
-static struct hlist_head *list;
-
-static inline bool peers(const struct mount *m1, const struct mount *m2)
-{
- return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
-}
-
-static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
+static bool need_secondary(struct mount *m, struct mountpoint *dest_mp)
{
- struct mount *child;
- int type;
/* skip ones added by this propagate_mnt() */
if (IS_MNT_NEW(m))
- return 0;
+ return false;
/* skip if mountpoint isn't visible in m */
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
- return 0;
+ return false;
/* skip if m is in the anon_ns */
if (is_anon_ns(m->mnt_ns))
- return 0;
+ return false;
+ return true;
+}
- if (peers(m, last_dest)) {
- type = CL_MAKE_SHARED;
- } else {
- struct mount *n, *p;
- bool done;
- for (n = m; ; n = p) {
- p = n->mnt_master;
- if (p == dest_master || IS_MNT_MARKED(p))
- break;
+static struct mount *find_master(struct mount *m,
+ struct mount *last_copy,
+ struct mount *original)
+{
+ struct mount *p;
+
+ // ascend until there's a copy for something with the same master
+ for (;;) {
+ p = m->mnt_master;
+ if (!p || IS_MNT_MARKED(p))
+ break;
+ m = p;
+ }
+ while (!peers(last_copy, original)) {
+ struct mount *parent = last_copy->mnt_parent;
+ if (parent->mnt_master == p) {
+ if (!peers(parent, m))
+ last_copy = last_copy->mnt_master;
+ break;
}
- do {
- struct mount *parent = last_source->mnt_parent;
- if (peers(last_source, first_source))
- break;
- done = parent->mnt_master == p;
- if (done && peers(n, parent))
- break;
- last_source = last_source->mnt_master;
- } while (!done);
-
- type = CL_SLAVE;
- /* beginning of peer group among the slaves? */
- if (IS_MNT_SHARED(m))
- type |= CL_MAKE_SHARED;
+ last_copy = last_copy->mnt_master;
}
-
- child = copy_tree(last_source, last_source->mnt.mnt_root, type);
- if (IS_ERR(child))
- return PTR_ERR(child);
- read_seqlock_excl(&mount_lock);
- mnt_set_mountpoint(m, dest_mp, child);
- if (m->mnt_master != dest_master)
- SET_MNT_MARK(m->mnt_master);
- read_sequnlock_excl(&mount_lock);
- last_dest = m;
- last_source = child;
- hlist_add_head(&child->mnt_hash, list);
- return count_mounts(m->mnt_ns, child);
+ return last_copy;
}
-/*
- * mount 'source_mnt' under the destination 'dest_mnt' at
- * dentry 'dest_dentry'. And propagate that mount to
- * all the peer and slave mounts of 'dest_mnt'.
- * Link all the new mounts into a propagation tree headed at
- * source_mnt. Also link all the new mounts using ->mnt_list
- * headed at source_mnt's ->mnt_list
+/**
+ * propagate_mnt() - create secondary copies for tree attachment
+ * @dest_mnt: destination mount.
+ * @dest_mp: destination mountpoint.
+ * @source_mnt: source mount.
+ * @tree_list: list of secondaries to be attached.
*
- * @dest_mnt: destination mount.
- * @dest_dentry: destination dentry.
- * @source_mnt: source mount.
- * @tree_list : list of heads of trees to be attached.
+ * Create secondary copies for attaching a tree with root @source_mnt
+ * at mount @dest_mnt with mountpoint @dest_mp. Link all new mounts
+ * into a propagation graph. Set mountpoints for all secondaries,
+ * link their roots into @tree_list via ->mnt_hash.
*/
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
- struct mount *source_mnt, struct hlist_head *tree_list)
+ struct mount *source_mnt, struct hlist_head *tree_list)
{
- struct mount *m, *n;
- int ret = 0;
-
- /*
- * we don't want to bother passing tons of arguments to
- * propagate_one(); everything is serialized by namespace_sem,
- * so globals will do just fine.
- */
- last_dest = dest_mnt;
- first_source = source_mnt;
- last_source = source_mnt;
- list = tree_list;
- dest_master = dest_mnt->mnt_master;
-
- /* all peers of dest_mnt, except dest_mnt itself */
- for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
- ret = propagate_one(n, dest_mp);
- if (ret)
- goto out;
- }
-
- /* all slave groups */
- for (m = next_group(dest_mnt, dest_mnt); m;
- m = next_group(m, dest_mnt)) {
- /* everything in that slave group */
- n = m;
+ struct mount *m, *n, *copy, *this;
+ int err = 0, type;
+
+ if (dest_mnt->mnt_master)
+ SET_MNT_MARK(dest_mnt->mnt_master);
+
+ /* iterate over peer groups, depth first */
+ for (m = dest_mnt; m && !err; m = next_group(m, dest_mnt)) {
+ if (m == dest_mnt) { // have one for dest_mnt itself
+ copy = source_mnt;
+ type = CL_MAKE_SHARED;
+ n = next_peer(m);
+ if (n == m)
+ continue;
+ } else {
+ type = CL_SLAVE;
+ /* beginning of peer group among the slaves? */
+ if (IS_MNT_SHARED(m))
+ type |= CL_MAKE_SHARED;
+ n = m;
+ }
do {
- ret = propagate_one(n, dest_mp);
- if (ret)
- goto out;
- n = next_peer(n);
- } while (n != m);
+ if (!need_secondary(n, dest_mp))
+ continue;
+ if (type & CL_SLAVE) // first in this peer group
+ copy = find_master(n, copy, source_mnt);
+ this = copy_tree(copy, copy->mnt.mnt_root, type);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
+ break;
+ }
+ read_seqlock_excl(&mount_lock);
+ mnt_set_mountpoint(n, dest_mp, this);
+ read_sequnlock_excl(&mount_lock);
+ if (n->mnt_master)
+ SET_MNT_MARK(n->mnt_master);
+ copy = this;
+ hlist_add_head(&this->mnt_hash, tree_list);
+ err = count_mounts(n->mnt_ns, this);
+ if (err)
+ break;
+ type = CL_MAKE_SHARED;
+ } while ((n = next_peer(n)) != m);
}
-out:
- read_seqlock_excl(&mount_lock);
+
hlist_for_each_entry(n, tree_list, mnt_hash) {
m = n->mnt_parent;
- if (m->mnt_master != dest_mnt->mnt_master)
+ if (m->mnt_master)
CLEAR_MNT_MARK(m->mnt_master);
}
- read_sequnlock_excl(&mount_lock);
- return ret;
-}
-
-static struct mount *find_topper(struct mount *mnt)
-{
- /* If there is exactly one mount covering mnt completely return it. */
- struct mount *child;
-
- if (!list_is_singular(&mnt->mnt_mounts))
- return NULL;
-
- child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
- if (child->mnt_mountpoint != mnt->mnt.mnt_root)
- return NULL;
-
- return child;
+ if (dest_mnt->mnt_master)
+ CLEAR_MNT_MARK(dest_mnt->mnt_master);
+ return err;
}
/*
@@ -407,12 +384,8 @@ bool propagation_would_overmount(const struct mount *from,
*/
int propagate_mount_busy(struct mount *mnt, int refcnt)
{
- struct mount *m, *child, *topper;
struct mount *parent = mnt->mnt_parent;
- if (mnt == parent)
- return do_refcount_check(mnt, refcnt);
-
/*
* quickly check if the current mount can be unmounted.
* If not, we don't have to go checking for all other
@@ -421,23 +394,27 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
return 1;
- for (m = propagation_next(parent, parent); m;
+ if (mnt == parent)
+ return 0;
+
+ for (struct mount *m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- int count = 1;
- child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
- if (!child)
- continue;
+ struct list_head *head;
+ struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
- /* Is there exactly one mount on the child that covers
- * it completely whose reference should be ignored?
- */
- topper = find_topper(child);
- if (topper)
- count += 1;
- else if (!list_empty(&child->mnt_mounts))
+ if (!child)
continue;
- if (do_refcount_check(child, count))
+ head = &child->mnt_mounts;
+ if (!list_empty(head)) {
+ /*
+ * a mount that covers child completely wouldn't prevent
+ * it being pulled out; any other would.
+ */
+ if (!list_is_singular(head) || !child->overmount)
+ continue;
+ }
+ if (do_refcount_check(child, 1))
return 1;
}
return 0;
@@ -463,181 +440,209 @@ void propagate_mount_unlock(struct mount *mnt)
}
}
-static void umount_one(struct mount *mnt, struct list_head *to_umount)
+static inline bool is_candidate(struct mount *m)
{
- CLEAR_MNT_MARK(mnt);
- mnt->mnt.mnt_flags |= MNT_UMOUNT;
- list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_umounting);
- move_from_ns(mnt, to_umount);
+ return m->mnt_t_flags & T_UMOUNT_CANDIDATE;
}
-/*
- * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
- * parent propagates to.
- */
-static bool __propagate_umount(struct mount *mnt,
- struct list_head *to_umount,
- struct list_head *to_restore)
+static void umount_one(struct mount *m, struct list_head *to_umount)
{
- bool progress = false;
- struct mount *child;
-
- /*
- * The state of the parent won't change if this mount is
- * already unmounted or marked as without children.
- */
- if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
- goto out;
+ m->mnt.mnt_flags |= MNT_UMOUNT;
+ list_del_init(&m->mnt_child);
+ move_from_ns(m);
+ list_add_tail(&m->mnt_list, to_umount);
+}
- /* Verify topper is the only grandchild that has not been
- * speculatively unmounted.
- */
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (child->mnt_mountpoint == mnt->mnt.mnt_root)
- continue;
- if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
- continue;
- /* Found a mounted child */
- goto children;
- }
+static void remove_from_candidate_list(struct mount *m)
+{
+ m->mnt_t_flags &= ~(T_MARKED | T_UMOUNT_CANDIDATE);
+ list_del_init(&m->mnt_list);
+}
- /* Mark mounts that can be unmounted if not locked */
- SET_MNT_MARK(mnt);
- progress = true;
+static void gather_candidates(struct list_head *set,
+ struct list_head *candidates)
+{
+ struct mount *m, *p, *q;
- /* If a mount is without children and not locked umount it. */
- if (!IS_MNT_LOCKED(mnt)) {
- umount_one(mnt, to_umount);
- } else {
-children:
- list_move_tail(&mnt->mnt_umounting, to_restore);
+ list_for_each_entry(m, set, mnt_list) {
+ if (is_candidate(m))
+ continue;
+ m->mnt_t_flags |= T_UMOUNT_CANDIDATE;
+ p = m->mnt_parent;
+ q = propagation_next(p, p);
+ while (q) {
+ struct mount *child = __lookup_mnt(&q->mnt,
+ m->mnt_mountpoint);
+ if (child) {
+ /*
+ * We might've already run into this one. That
+ * must've happened on earlier iteration of the
+ * outer loop; in that case we can skip those
+ * parents that get propagation from q - there
+ * will be nothing new on those as well.
+ */
+ if (is_candidate(child)) {
+ q = skip_propagation_subtree(q, p);
+ continue;
+ }
+ child->mnt_t_flags |= T_UMOUNT_CANDIDATE;
+ if (!will_be_unmounted(child))
+ list_add(&child->mnt_list, candidates);
+ }
+ q = propagation_next(q, p);
+ }
}
-out:
- return progress;
+ list_for_each_entry(m, set, mnt_list)
+ m->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
}
-static void umount_list(struct list_head *to_umount,
- struct list_head *to_restore)
+/*
+ * We know that some child of @m can't be unmounted. In all places where the
+ * chain of descent of @m has child not overmounting the root of parent,
+ * the parent can't be unmounted either.
+ */
+static void trim_ancestors(struct mount *m)
{
- struct mount *mnt, *child, *tmp;
- list_for_each_entry(mnt, to_umount, mnt_list) {
- list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
- /* topper? */
- if (child->mnt_mountpoint == mnt->mnt.mnt_root)
- list_move_tail(&child->mnt_umounting, to_restore);
- else
- umount_one(child, to_umount);
- }
+ struct mount *p;
+
+ for (p = m->mnt_parent; is_candidate(p); m = p, p = p->mnt_parent) {
+ if (IS_MNT_MARKED(m)) // all candidates beneath are overmounts
+ return;
+ SET_MNT_MARK(m);
+ if (m != p->overmount)
+ p->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
}
}
-static void restore_mounts(struct list_head *to_restore)
+/*
+ * Find and exclude all umount candidates forbidden by @m
+ * (see Documentation/filesystems/propagate_umount.txt)
+ * If we can immediately tell that @m is OK to unmount (unlocked
+ * and all children are already committed to unmounting) commit
+ * to unmounting it.
+ * Only @m itself might be taken from the candidates list;
+ * anything found by trim_ancestors() is marked non-candidate
+ * and left on the list.
+ */
+static void trim_one(struct mount *m, struct list_head *to_umount)
{
- /* Restore mounts to a clean working state */
- while (!list_empty(to_restore)) {
- struct mount *mnt, *parent;
- struct mountpoint *mp;
-
- mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
- CLEAR_MNT_MARK(mnt);
- list_del_init(&mnt->mnt_umounting);
-
- /* Should this mount be reparented? */
- mp = mnt->mnt_mp;
- parent = mnt->mnt_parent;
- while (parent->mnt.mnt_flags & MNT_UMOUNT) {
- mp = parent->mnt_mp;
- parent = parent->mnt_parent;
- }
- if (parent != mnt->mnt_parent) {
- mnt_change_mountpoint(parent, mp, mnt);
- mnt_notify_add(mnt);
+ bool remove_this = false, found = false, umount_this = false;
+ struct mount *n;
+
+ if (!is_candidate(m)) { // trim_ancestors() left it on list
+ remove_from_candidate_list(m);
+ return;
+ }
+
+ list_for_each_entry(n, &m->mnt_mounts, mnt_child) {
+ if (!is_candidate(n)) {
+ found = true;
+ if (n != m->overmount) {
+ remove_this = true;
+ break;
+ }
}
}
+ if (found) {
+ trim_ancestors(m);
+ } else if (!IS_MNT_LOCKED(m) && list_empty(&m->mnt_mounts)) {
+ remove_this = true;
+ umount_this = true;
+ }
+ if (remove_this) {
+ remove_from_candidate_list(m);
+ if (umount_this)
+ umount_one(m, to_umount);
+ }
}
-static void cleanup_umount_visitations(struct list_head *visited)
+static void handle_locked(struct mount *m, struct list_head *to_umount)
{
- while (!list_empty(visited)) {
- struct mount *mnt =
- list_first_entry(visited, struct mount, mnt_umounting);
- list_del_init(&mnt->mnt_umounting);
+ struct mount *cutoff = m, *p;
+
+ if (!is_candidate(m)) { // trim_ancestors() left it on list
+ remove_from_candidate_list(m);
+ return;
+ }
+ for (p = m; is_candidate(p); p = p->mnt_parent) {
+ remove_from_candidate_list(p);
+ if (!IS_MNT_LOCKED(p))
+ cutoff = p->mnt_parent;
+ }
+ if (will_be_unmounted(p))
+ cutoff = p;
+ while (m != cutoff) {
+ umount_one(m, to_umount);
+ m = m->mnt_parent;
}
}
/*
- * collect all mounts that receive propagation from the mount in @list,
- * and return these additional mounts in the same list.
- * @list: the list of mounts to be unmounted.
+ * @m is not to going away, and it overmounts the top of a stack of mounts
+ * that are going away. We know that all of those are fully overmounted
+ * by the one above (@m being the topmost of the chain), so @m can be slid
+ * in place where the bottom of the stack is attached.
*
- * vfsmount lock must be held for write
+ * NOTE: here we temporarily violate a constraint - two mounts end up with
+ * the same parent and mountpoint; that will be remedied as soon as we
+ * return from propagate_umount() - its caller (umount_tree()) will detach
+ * the stack from the parent it (and now @m) is attached to. umount_tree()
+ * might choose to keep unmounted pieces stuck to each other, but it always
+ * detaches them from the mounts that remain in the tree.
*/
-int propagate_umount(struct list_head *list)
+static void reparent(struct mount *m)
{
- struct mount *mnt;
- LIST_HEAD(to_restore);
- LIST_HEAD(to_umount);
- LIST_HEAD(visited);
-
- /* Find candidates for unmounting */
- list_for_each_entry_reverse(mnt, list, mnt_list) {
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
+ struct mount *p = m;
+ struct mountpoint *mp;
- /*
- * If this mount has already been visited it is known that it's
- * entire peer group and all of their slaves in the propagation
- * tree for the mountpoint has already been visited and there is
- * no need to visit them again.
- */
- if (!list_empty(&mnt->mnt_umounting))
- continue;
+ do {
+ mp = p->mnt_mp;
+ p = p->mnt_parent;
+ } while (will_be_unmounted(p));
- list_add_tail(&mnt->mnt_umounting, &visited);
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- if (!child)
- continue;
+ mnt_change_mountpoint(p, mp, m);
+ mnt_notify_add(m);
+}
- if (!list_empty(&child->mnt_umounting)) {
- /*
- * If the child has already been visited it is
- * know that it's entire peer group and all of
- * their slaves in the propgation tree for the
- * mountpoint has already been visited and there
- * is no need to visit this subtree again.
- */
- m = skip_propagation_subtree(m, parent);
- continue;
- } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
- /*
- * We have come across a partially unmounted
- * mount in a list that has not been visited
- * yet. Remember it has been visited and
- * continue about our merry way.
- */
- list_add_tail(&child->mnt_umounting, &visited);
- continue;
- }
+/**
+ * propagate_umount - apply propagation rules to the set of mounts for umount()
+ * @set: the list of mounts to be unmounted.
+ *
+ * Collect all mounts that receive propagation from the mount in @set and have
+ * no obstacles to being unmounted. Add these additional mounts to the set.
+ *
+ * See Documentation/filesystems/propagate_umount.txt if you do anything in
+ * this area.
+ *
+ * Locks held:
+ * mount_lock (write_seqlock), namespace_sem (exclusive).
+ */
+void propagate_umount(struct list_head *set)
+{
+ struct mount *m, *p;
+ LIST_HEAD(to_umount); // committed to unmounting
+ LIST_HEAD(candidates); // undecided umount candidates
- /* Check the child and parents while progress is made */
- while (__propagate_umount(child,
- &to_umount, &to_restore)) {
- /* Is the parent a umount candidate? */
- child = child->mnt_parent;
- if (list_empty(&child->mnt_umounting))
- break;
- }
- }
+ // collect all candidates
+ gather_candidates(set, &candidates);
+
+ // reduce the set until it's non-shifting
+ list_for_each_entry_safe(m, p, &candidates, mnt_list)
+ trim_one(m, &to_umount);
+
+ // ... and non-revealing
+ while (!list_empty(&candidates)) {
+ m = list_first_entry(&candidates,struct mount, mnt_list);
+ handle_locked(m, &to_umount);
}
- umount_list(&to_umount, &to_restore);
- restore_mounts(&to_restore);
- cleanup_umount_visitations(&visited);
- list_splice_tail(&to_umount, list);
+ // now to_umount consists of all acceptable candidates
+ // deal with reparenting of remaining overmounts on those
+ list_for_each_entry(m, &to_umount, mnt_list) {
+ if (m->overmount)
+ reparent(m->overmount);
+ }
- return 0;
+ // and fold them into the set
+ list_splice_tail_init(&to_umount, set);
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 34b6247af01d..00ab153e3e9d 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -10,14 +10,14 @@
#include <linux/list.h>
#include "mount.h"
-#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
+#define IS_MNT_SHARED(m) ((m)->mnt_t_flags & T_SHARED)
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
#define IS_MNT_NEW(m) (!(m)->mnt_ns)
-#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
-#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
-#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
-#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
-#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
+#define CLEAR_MNT_SHARED(m) ((m)->mnt_t_flags &= ~T_SHARED)
+#define IS_MNT_UNBINDABLE(m) ((m)->mnt_t_flags & T_UNBINDABLE)
+#define IS_MNT_MARKED(m) ((m)->mnt_t_flags & T_MARKED)
+#define SET_MNT_MARK(m) ((m)->mnt_t_flags |= T_MARKED)
+#define CLEAR_MNT_MARK(m) ((m)->mnt_t_flags &= ~T_MARKED)
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
#define CL_EXPIRE 0x01
@@ -25,21 +25,26 @@
#define CL_COPY_UNBINDABLE 0x04
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
-#define CL_SHARED_TO_SLAVE 0x20
#define CL_COPY_MNT_NS_FILE 0x40
-#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
-
+/*
+ * EXCL[namespace_sem]
+ */
static inline void set_mnt_shared(struct mount *mnt)
{
- mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
- mnt->mnt.mnt_flags |= MNT_SHARED;
+ mnt->mnt_t_flags &= ~T_SHARED_MASK;
+ mnt->mnt_t_flags |= T_SHARED;
+}
+
+static inline bool peers(const struct mount *m1, const struct mount *m2)
+{
+ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
}
void change_mnt_propagation(struct mount *, int);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
struct hlist_head *);
-int propagate_umount(struct list_head *);
+void propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
void propagate_mount_unlock(struct mount *);
void mnt_release_group_id(struct mount *);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c667702dc69b..e93149a01341 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2704,8 +2704,7 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry,
inode->i_fop = p->fop;
ei->op = p->op;
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -3501,8 +3500,7 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
set_nlink(inode, nlink_tgid);
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
@@ -3804,8 +3802,7 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
set_nlink(inode, nlink_tid);
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 37aa778d1af7..9eeccff49b2a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -352,18 +352,9 @@ static int proc_fd_getattr(struct mnt_idmap *idmap,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- int rv = 0;
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
-
- /* If it's a directory, put the number of open fds there */
- if (S_ISDIR(inode->i_mode)) {
- rv = proc_readfd_count(inode, &stat->size);
- if (rv < 0)
- return rv;
- }
-
- return rv;
+ return proc_readfd_count(inode, &stat->size);
}
const struct inode_operations proc_fd_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a3e22803cddf..5635453cd476 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -254,8 +254,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, de->proc_dops);
- return d_splice_alias(inode, dentry);
+ if (de->flags & PROC_ENTRY_FORCE_LOOKUP)
+ return d_splice_alias_ops(inode, dentry,
+ &proc_net_dentry_ops);
+ return d_splice_alias_ops(inode, dentry,
+ &proc_misc_dentry_ops);
}
read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
@@ -448,9 +451,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
- ent->proc_dops = &proc_misc_dentry_ops;
/* Revalidate everything under /proc/${pid}/net */
- if ((*parent)->proc_dops == &proc_net_dentry_ops)
+ if ((*parent)->flags & PROC_ENTRY_FORCE_LOOKUP)
pde_force_lookup(ent);
out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a3eb3b740f76..3604b616311c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode)
head = ei->sysctl;
if (head) {
- RCU_INIT_POINTER(ei->sysctl, NULL);
+ WRITE_ONCE(ei->sysctl, NULL);
proc_sys_evict_inode(inode, head);
}
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 96122e91c645..520c4742101d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,6 @@ struct proc_dir_entry {
const struct proc_ops *proc_ops;
const struct file_operations *proc_dir_ops;
};
- const struct dentry_operations *proc_dops;
union {
const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *);
@@ -403,7 +402,7 @@ extern const struct dentry_operations proc_net_dentry_ops;
static inline void pde_force_lookup(struct proc_dir_entry *pde)
{
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
- pde->proc_dops = &proc_net_dentry_ops;
+ pde->flags |= PROC_ENTRY_FORCE_LOOKUP;
}
/*
@@ -414,7 +413,6 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
static inline struct dentry *proc_splice_unmountable(struct inode *inode,
struct dentry *dentry, const struct dentry_operations *d_ops)
{
- d_set_d_op(dentry, d_ops);
dont_mount(dentry);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, d_ops);
}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index c610224faf10..4403a2e20c16 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -111,8 +111,7 @@ static struct dentry *proc_ns_instantiate(struct dentry *dentry,
ei->ns_ops = ns_ops;
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index cc9d74a06ff0..49ab74e0bfde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -540,9 +540,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- d_set_d_op(dentry, &proc_sys_dentry_operations);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
- err = d_splice_alias(inode, dentry);
+ err = d_splice_alias_ops(inode, dentry, &proc_sys_dentry_operations);
out:
if (h)
@@ -699,9 +698,9 @@ static bool proc_sys_fill_cache(struct file *file,
return false;
if (d_in_lookup(child)) {
struct dentry *res;
- d_set_d_op(child, &proc_sys_dentry_operations);
inode = proc_sys_make_inode(dir->d_sb, head, table);
- res = d_splice_alias(inode, child);
+ res = d_splice_alias_ops(inode, child,
+ &proc_sys_dentry_operations);
d_lookup_done(child);
if (unlikely(res)) {
dput(child);
@@ -918,17 +917,21 @@ static int proc_sys_compare(const struct dentry *dentry,
struct ctl_table_header *head;
struct inode *inode;
- /* Although proc doesn't have negative dentries, rcu-walk means
- * that inode here can be NULL */
- /* AV: can it, indeed? */
- inode = d_inode_rcu(dentry);
- if (!inode)
- return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- head = rcu_dereference(PROC_I(inode)->sysctl);
+
+ // false positive is fine here - we'll recheck anyway
+ if (d_in_lookup(dentry))
+ return 0;
+
+ inode = d_inode_rcu(dentry);
+ // we just might have run into dentry in the middle of __dentry_kill()
+ if (!inode)
+ return 1;
+
+ head = READ_ONCE(PROC_I(inode)->sysctl);
return !head || !sysctl_is_seen(head);
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 27972c0749e7..751479eb128f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_FILE;
}
- if (is_zero_pfn(pmd_pfn(pmd)))
+ if (is_huge_zero_pmd(pmd))
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index bb3b769edc71..1a2e1185426c 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -300,7 +300,7 @@ static struct dentry *psinfo_lock_root(void)
return NULL;
root = pstore_sb->s_root;
- inode_lock(d_inode(root));
+ inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
return root;
}
@@ -318,8 +318,7 @@ int pstore_put_backend_records(struct pstore_info *psi)
list_for_each_entry_safe(pos, tmp, &records_list, list) {
if (pos->record->psi == psi) {
list_del_init(&pos->list);
- d_invalidate(pos->dentry);
- simple_unlink(d_inode(root), pos->dentry);
+ locked_recursive_removal(pos->dentry, NULL);
pos->dentry = NULL;
}
}
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 775fa905fda0..f8874c3b8c1e 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -269,6 +269,7 @@ static int ramfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_time_gran = 1;
inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
diff --git a/fs/read_write.c b/fs/read_write.c
index 0ef70e128c4a..81a9474dc396 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -237,7 +237,7 @@ EXPORT_SYMBOL(generic_llseek_cookie);
* @offset: file offset to seek to
* @whence: type of seek
*
- * This is a generic implemenation of ->llseek useable for all normal local
+ * This is a generic implementation of ->llseek useable for all normal local
* filesystems. It just updates the file offset to the value specified by
* @offset and @whence.
*/
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 6ed2dfd4dbbd..d98e0d2de09f 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
struct rmid_read rr = {0};
struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
+ int domid, cpu, ret = 0;
struct rdt_resource *r;
+ struct cacheinfo *ci;
struct mon_data *md;
- int domid, ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
* one that matches this cache id.
*/
list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->ci->id == domid) {
- rr.ci = d->ci;
+ if (d->ci_id == domid) {
+ rr.ci_id = d->ci_id;
+ cpu = cpumask_any(&d->hdr.cpu_mask);
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci)
+ continue;
mon_event_read(&rr, r, NULL, rdtgrp,
- &d->ci->shared_cpu_map, evtid, false);
+ &ci->shared_cpu_map, evtid, false);
goto checkresult;
}
}
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 9a8cf6f11151..0a1eedba2b03 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -98,7 +98,7 @@ struct mon_data {
* domains in @r sharing L3 @ci.id
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
- * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
* @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
- struct cacheinfo *ci;
+ unsigned int ci_id;
int err;
u64 val;
void *arch_mon_ctx;
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index bde2801289d3..f5637855c3ac 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
int cpu = smp_processor_id();
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
struct mbm_state *m;
int err, ret;
u64 tval = 0;
@@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
}
/* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci || ci->id != rr->ci_id)
return -EINVAL;
/*
@@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
ret = -EINVAL;
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci->id != rr->ci->id)
+ if (d->ci_id != rr->ci_id)
continue;
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
rr->evtid, &tval, rr->arch_mon_ctx);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 1beb124e25f6..77d08229d855 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
char name[32];
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
if (snc_mode)
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
@@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
return -EPERM;
list_for_each_entry(mevt, &r->evt_list, list) {
- domid = do_sum ? d->ci->id : d->hdr.id;
+ domid = do_sum ? d->ci_id : d->hdr.id;
priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
if (WARN_ON_ONCE(!priv))
return -EINVAL;
@@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
lockdep_assert_held(&rdtgroup_mutex);
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
kn = kernfs_find_and_get(parent_kn, name);
if (kn) {
/*
diff --git a/fs/select.c b/fs/select.c
index 9fb650d03d52..082cf60c7e23 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -192,7 +192,7 @@ static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *k
* and is paired with smp_store_mb() in poll_schedule_timeout.
*/
smp_wmb();
- pwq->triggered = 1;
+ WRITE_ONCE(pwq->triggered, 1);
/*
* Perform the default wake up operation using a dummy
@@ -237,7 +237,7 @@ static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
int rc = -EINTR;
set_current_state(state);
- if (!pwq->triggered)
+ if (!READ_ONCE(pwq->triggered))
rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 5200a0f3cafc..368e870624da 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
spin_lock(&cfids->cfid_list_lock);
list_for_each_entry(cfid, &cfids->entries, entry) {
tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC);
- if (tmp_list == NULL)
- break;
+ if (tmp_list == NULL) {
+ /*
+ * If the malloc() fails, we won't drop all
+ * dentries, and unmounting is likely to trigger
+ * a 'Dentry still in use' error.
+ */
+ cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n");
+ spin_unlock(&cfids->cfid_list_lock);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ goto done;
+ }
spin_lock(&cfid->fid_lock);
tmp_list->dentry = cfid->dentry;
cfid->dentry = NULL;
@@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
}
spin_unlock(&cifs_sb->tlink_tree_lock);
+done:
list_for_each_entry_safe(tmp_list, q, &entry, entry) {
list_del(&tmp_list->entry);
dput(tmp_list->dentry);
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 1dfe79d947a6..a28f7cae3caa 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -21,12 +21,12 @@ struct cached_dirent {
struct cached_dirents {
bool is_valid:1;
bool is_failed:1;
- struct dir_context *ctx; /*
- * Only used to make sure we only take entries
- * from a single context. Never dereferenced.
- */
+ struct file *file; /*
+ * Used to associate the cache with a single
+ * open file instance.
+ */
struct mutex de_mutex;
- int pos; /* Expected ctx->pos */
+ loff_t pos; /* Expected ctx->pos */
struct list_head entries;
};
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index c0196be0e65f..3fdf75737d43 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if ((count < 1) || (count > 11))
return -EINVAL;
- memset(flags_string, 0, 12);
+ memset(flags_string, 0, sizeof(flags_string));
if (copy_from_user(flags_string, buffer, count))
return -EFAULT;
diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index 26327442e383..b51ce64fcccf 100644
--- a/fs/smb/client/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
@@ -61,7 +61,7 @@ struct smb_query_info {
struct smb3_key_debug_info {
__u64 Suid;
__u16 cipher_type;
- __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
+ __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE];
__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
} __packed;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 0a5266ecfd15..d4ec73359922 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -260,9 +260,9 @@ cifs_read_super(struct super_block *sb)
}
if (tcon->nocase)
- sb->s_d_op = &cifs_ci_dentry_ops;
+ set_default_d_op(sb, &cifs_ci_dentry_ops);
else
- sb->s_d_op = &cifs_dentry_ops;
+ set_default_d_op(sb, &cifs_dentry_ops);
sb->s_root = d_make_root(inode);
if (!sb->s_root) {
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 45e94e18f4d5..89160bc34d35 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
spinlock_t srv_lock; /* protect anything here that is not protected */
__u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */
@@ -776,6 +777,7 @@ struct TCP_Server_Info {
__le32 session_key_id; /* retrieved from negotiate response and send in session setup request */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
+ unsigned long neg_start; /* when negotiate started (jiffies) */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
@@ -1302,6 +1304,7 @@ struct cifs_tcon {
bool use_persistent:1; /* use persistent instead of durable handles */
bool no_lease:1; /* Do not request leases on files or directories */
bool use_witness:1; /* use witness protocol */
+ bool dummy:1; /* dummy tcon used for reconnecting channels */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 66093fa78aed..045227ed4efc 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
+void smb2_query_server_interfaces(struct work_struct *work);
void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels);
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 7216fcec79e8..75142f49d65d 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */
@@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
task_io_account_read(rdata->got_bytes);
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
if (rdata->result == -ENODATA) {
@@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 28bc33496623..205f547ca49e 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
return rc;
}
-static void smb2_query_server_interfaces(struct work_struct *work)
+void smb2_query_server_interfaces(struct work_struct *work)
{
int rc;
int xid;
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
+#define set_need_reco(server) \
+do { \
+ spin_lock(&server->srv_lock); \
+ if (server->tcpStatus != CifsExiting) \
+ server->tcpStatus = CifsNeedReconnect; \
+ spin_unlock(&server->srv_lock); \
+} while (0)
+
/*
* Update the tcpStatus for the server.
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels)
{
- struct TCP_Server_Info *pserver;
+ struct TCP_Server_Info *nserver;
struct cifs_ses *ses;
+ LIST_HEAD(reco);
int i;
- /* If server is a channel, select the primary channel */
- pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
-
/* if we need to signal just this channel */
if (!all_channels) {
- spin_lock(&server->srv_lock);
- if (server->tcpStatus != CifsExiting)
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&server->srv_lock);
+ set_need_reco(server);
return;
}
- spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
- if (cifs_ses_exiting(ses))
- continue;
- spin_lock(&ses->chan_lock);
- for (i = 0; i < ses->chan_count; i++) {
- if (!ses->chans[i].server)
+ if (SERVER_IS_CHAN(server))
+ server = server->primary_server;
+ scoped_guard(spinlock, &cifs_tcp_ses_lock) {
+ set_need_reco(server);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
continue;
-
- spin_lock(&ses->chans[i].server->srv_lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&ses->chans[i].server->srv_lock);
+ }
+ spin_lock(&ses->chan_lock);
+ for (i = 1; i < ses->chan_count; i++) {
+ nserver = ses->chans[i].server;
+ if (!nserver)
+ continue;
+ nserver->srv_count++;
+ list_add(&nserver->rlist, &reco);
+ }
+ spin_unlock(&ses->chan_lock);
+ spin_unlock(&ses->ses_lock);
}
- spin_unlock(&ses->chan_lock);
}
- spin_unlock(&cifs_tcp_ses_lock);
+
+ list_for_each_entry_safe(server, nserver, &reco, rlist) {
+ list_del_init(&server->rlist);
+ set_need_reco(server);
+ cifs_put_tcp_session(server, 0);
+ }
}
/*
@@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server)
/*
* If we're in the process of mounting a share or reconnecting a session
* and the server abruptly shut down (e.g. socket wasn't closed, packet
- * had been ACK'ed but no SMB response), don't wait longer than 20s to
- * negotiate protocol.
+ * had been ACK'ed but no SMB response), don't wait longer than 20s from
+ * when negotiate actually started.
*/
spin_lock(&server->srv_lock);
if (server->tcpStatus == CifsInNegotiate &&
- time_after(jiffies, server->lstrp + 20 * HZ)) {
+ time_after(jiffies, server->neg_start + 20 * HZ)) {
spin_unlock(&server->srv_lock);
cifs_reconnect(server, false);
return true;
@@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
tcon->max_cached_dirs = ctx->max_cached_dirs;
tcon->nodelete = ctx->nodelete;
tcon->local_lease = ctx->local_lease;
- INIT_LIST_HEAD(&tcon->pending_opens);
tcon->status = TID_GOOD;
- INIT_DELAYED_WORK(&tcon->query_interfaces,
- smb2_query_server_interfaces);
if (ses->server->dialect >= SMB30_PROT_ID &&
(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
/* schedule query interfaces poll */
queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh);
-#endif
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -3718,9 +3726,15 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
goto out;
}
- /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
- if (tcon->posix_extensions)
+ /*
+ * if new SMB3.11 POSIX extensions are supported, do not change anything in the
+ * path (i.e., do not remap / and \ and do not map any special characters)
+ */
+ if (tcon->posix_extensions) {
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+ cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR |
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ }
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/* tell server which Unix caps we support */
@@ -4193,7 +4207,9 @@ retry:
return 0;
}
+ server->lstrp = jiffies;
server->tcpStatus = CifsInNegotiate;
+ server->neg_start = jiffies;
spin_unlock(&server->srv_lock);
rc = server->ops->negotiate(xid, ses, server);
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 1c6e5389c51f..5223edf6d11a 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -190,6 +190,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ struct cached_fid *parent_cfid = NULL;
int rdwr_for_fscache = 0;
__le32 lease_flags = 0;
@@ -313,10 +314,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
+
retry_open:
if (tcon->cfids && direntry->d_parent && server->dialect >= SMB30_PROT_ID) {
- struct cached_fid *parent_cfid;
-
+ parent_cfid = NULL;
spin_lock(&tcon->cfids->cfid_list_lock);
list_for_each_entry(parent_cfid, &tcon->cfids->entries, entry) {
if (parent_cfid->dentry == direntry->d_parent) {
@@ -327,6 +328,7 @@ retry_open:
memcpy(fid->parent_lease_key,
parent_cfid->fid.lease_key,
SMB2_LEASE_KEY_SIZE);
+ parent_cfid->dirents.is_valid = false;
}
break;
}
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index d2df10b8e6fd..1421bde045c2 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq)
struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr];
struct TCP_Server_Info *server;
struct cifsFileInfo *open_file = req->cfile;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb);
size_t wsize = req->rreq.wsize;
int rc;
@@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq)
server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
wdata->server = server;
+ if (cifs_sb->ctx->wsize == 0)
+ cifs_negotiate_wsize(server, cifs_sb->ctx,
+ tlink_tcon(req->cfile->tlink));
+
retry:
if (open_file->invalidHandle) {
rc = cifs_reopen_file(open_file, false);
@@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses);
rdata->server = server;
- if (cifs_sb->ctx->rsize == 0) {
+ if (cifs_sb->ctx->rsize == 0)
cifs_negotiate_rsize(server, cifs_sb->ctx,
tlink_tcon(req->cfile->tlink));
- }
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&size, &rdata->credits);
@@ -999,15 +1003,18 @@ int cifs_open(struct inode *inode, struct file *file)
rc = cifs_get_readable_path(tcon, full_path, &cfile);
}
if (rc == 0) {
- if (file->f_flags == cfile->f_flags) {
+ unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+ unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+
+ if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) &&
+ (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) {
file->private_data = cfile;
spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
goto use_cache;
- } else {
- _cifsFileInfo_put(cfile, true, false);
}
+ _cifsFileInfo_put(cfile, true, false);
} else {
/* hard link on the defeered close file */
rc = cifs_get_hardlink_path(tcon, inode, file);
@@ -3081,7 +3088,8 @@ void cifs_oplock_break(struct work_struct *work)
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
struct inode *inode = d_inode(cfile->dentry);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
@@ -3091,6 +3099,12 @@ void cifs_oplock_break(struct work_struct *work)
__u64 persistent_fid, volatile_fid;
__u16 net_fid;
+ /*
+ * Hold a reference to the superblock to prevent it and its inodes from
+ * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put()
+ * may release the last reference to the sb and trigger inode eviction.
+ */
+ cifs_sb_active(sb);
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -3163,6 +3177,7 @@ oplock_break_ack:
cifs_put_tlink(tlink);
out:
cifs_done_oplock_break(cinode);
+ cifs_sb_deactive(sb);
}
static int cifs_swap_activate(struct swap_info_struct *sis,
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a634a34d4086..59ccc2229ab3 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_errorf(fc, "symlinkroot mount options must be absolute path\n");
goto cifs_parse_mount_err;
}
- kfree(ctx->symlinkroot);
- ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->symlinkroot)
+ if (strnlen(param->string, PATH_MAX) == PATH_MAX) {
+ cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n",
+ PATH_MAX - 1);
goto cifs_parse_mount_err;
+ }
+ kfree(ctx->symlinkroot);
+ ctx->symlinkroot = param->string;
+ param->string = NULL;
break;
}
/* case Opt_ignore: - is ignored as expected ... */
@@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
- /*
- * By default resolve all native absolute symlinks relative to "/mnt/".
- * Same default has drvfs driver running in WSL for resolving SMB shares.
- */
- if (!ctx->symlinkroot)
- ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL);
-
return 0;
cifs_parse_mount_err:
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 56439da4f119..0a9935ce05a5 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
le16_to_cpu(tcon->ses->server->cipher_type);
pkey_inf.Suid = tcon->ses->Suid;
memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response,
- 16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
+ SMB2_NTLMV2_SESSKEY_SIZE);
memcpy(pkey_inf.smb3decryptionkey,
tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
memcpy(pkey_inf.smb3encryptionkey,
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index e77017f47084..da23cc12a52c 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
#ifdef CONFIG_CIFS_DFS_UPCALL
INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
#endif
+ INIT_LIST_HEAD(&ret_buf->pending_opens);
+ INIT_DELAYED_WORK(&ret_buf->query_interfaces,
+ smb2_query_server_interfaces);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh);
+#endif
return ret_buf;
}
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index f9f11cbf89be..4e5460206397 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info,
/* The Mode field in the response can now include the file type as well */
fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode),
fattr->cf_cifsattrs & ATTR_DIRECTORY);
- fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode));
+ fattr->cf_dtype = S_DT(fattr->cf_mode);
switch (fattr->cf_mode & S_IFMT) {
case S_IFLNK:
@@ -851,9 +851,9 @@ static bool emit_cached_dirents(struct cached_dirents *cde,
}
static void update_cached_dirents_count(struct cached_dirents *cde,
- struct dir_context *ctx)
+ struct file *file)
{
- if (cde->ctx != ctx)
+ if (cde->file != file)
return;
if (cde->is_valid || cde->is_failed)
return;
@@ -862,9 +862,9 @@ static void update_cached_dirents_count(struct cached_dirents *cde,
}
static void finished_cached_dirents_count(struct cached_dirents *cde,
- struct dir_context *ctx)
+ struct dir_context *ctx, struct file *file)
{
- if (cde->ctx != ctx)
+ if (cde->file != file)
return;
if (cde->is_valid || cde->is_failed)
return;
@@ -877,11 +877,12 @@ static void finished_cached_dirents_count(struct cached_dirents *cde,
static void add_cached_dirent(struct cached_dirents *cde,
struct dir_context *ctx,
const char *name, int namelen,
- struct cifs_fattr *fattr)
+ struct cifs_fattr *fattr,
+ struct file *file)
{
struct cached_dirent *de;
- if (cde->ctx != ctx)
+ if (cde->file != file)
return;
if (cde->is_valid || cde->is_failed)
return;
@@ -911,7 +912,8 @@ static void add_cached_dirent(struct cached_dirents *cde,
static bool cifs_dir_emit(struct dir_context *ctx,
const char *name, int namelen,
struct cifs_fattr *fattr,
- struct cached_fid *cfid)
+ struct cached_fid *cfid,
+ struct file *file)
{
bool rc;
ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
@@ -923,7 +925,7 @@ static bool cifs_dir_emit(struct dir_context *ctx,
if (cfid) {
mutex_lock(&cfid->dirents.de_mutex);
add_cached_dirent(&cfid->dirents, ctx, name, namelen,
- fattr);
+ fattr, file);
mutex_unlock(&cfid->dirents.de_mutex);
}
@@ -1023,7 +1025,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
cifs_prime_dcache(file_dentry(file), &name, &fattr);
return !cifs_dir_emit(ctx, name.name, name.len,
- &fattr, cfid);
+ &fattr, cfid, file);
}
@@ -1074,8 +1076,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
* we need to initialize scanning and storing the
* directory content.
*/
- if (ctx->pos == 0 && cfid->dirents.ctx == NULL) {
- cfid->dirents.ctx = ctx;
+ if (ctx->pos == 0 && cfid->dirents.file == NULL) {
+ cfid->dirents.file = file;
cfid->dirents.pos = 2;
}
/*
@@ -1143,7 +1145,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
} else {
if (cfid) {
mutex_lock(&cfid->dirents.de_mutex);
- finished_cached_dirents_count(&cfid->dirents, ctx);
+ finished_cached_dirents_count(&cfid->dirents, ctx, file);
mutex_unlock(&cfid->dirents.de_mutex);
}
cifs_dbg(FYI, "Could not find entry\n");
@@ -1184,7 +1186,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
ctx->pos++;
if (cfid) {
mutex_lock(&cfid->dirents.de_mutex);
- update_cached_dirents_count(&cfid->dirents, ctx);
+ update_cached_dirents_count(&cfid->dirents, file);
mutex_unlock(&cfid->dirents.de_mutex);
}
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index bb25e77c5540..5fa29a97ac15 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
struct reparse_symlink_data_buffer *buf = NULL;
struct cifs_open_info_data data = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ const char *symroot = cifs_sb->ctx->symlinkroot;
struct inode *new;
struct kvec iov;
__le16 *path = NULL;
@@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
.symlink_target = symlink_target,
};
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && symname[0] == '/') {
/*
* This is a request to create an absolute symlink on the server
* which does not support POSIX paths, and expects symlink in
@@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
* ensure compatibility of this symlink stored in absolute form
* on the SMB server.
*/
- if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) {
+ if (!strstarts(symname, symroot)) {
/*
* If the absolute Linux symlink target path is not
* inside "symlinkroot" location then there is no way
@@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
cifs_dbg(VFS,
"absolute symlink '%s' cannot be converted to NT format "
"because it is outside of symlinkroot='%s'\n",
- symname, cifs_sb->ctx->symlinkroot);
+ symname, symroot);
rc = -EINVAL;
goto out;
}
- len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[len-1] != '/')
+ len = strlen(symroot);
+ if (symroot[len - 1] != '/')
len++;
if (symname[len] >= 'a' && symname[len] <= 'z' &&
(symname[len+1] == '/' || symname[len+1] == '\0')) {
@@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
const char *full_path,
struct cifs_sb_info *cifs_sb)
{
+ const char *symroot = cifs_sb->ctx->symlinkroot;
char sep = CIFS_DIR_SEP(cifs_sb);
char *linux_target = NULL;
char *smb_target = NULL;
@@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
goto out;
}
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && !relative) {
/*
* This is an absolute symlink from the server which does not
* support POSIX paths, so the symlink is in NT-style path.
@@ -875,15 +879,8 @@ globalroot:
abs_path += sizeof("\\DosDevices\\")-1;
else if (strstarts(abs_path, "\\GLOBAL??\\"))
abs_path += sizeof("\\GLOBAL??\\")-1;
- else {
- /* Unhandled absolute symlink, points outside of DOS/Win32 */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
- }
+ else
+ goto out_unhandled_target;
/* Sometimes path separator after \?? is double backslash */
if (abs_path[0] == '\\')
@@ -910,25 +907,19 @@ globalroot:
abs_path++;
abs_path[0] = drive_letter;
} else {
- /* Unhandled absolute symlink. Report an error. */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
+ goto out_unhandled_target;
}
abs_path_len = strlen(abs_path)+1;
- symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/')
+ symlinkroot_len = strlen(symroot);
+ if (symroot[symlinkroot_len - 1] == '/')
symlinkroot_len--;
linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL);
if (!linux_target) {
rc = -ENOMEM;
goto out;
}
- memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len);
+ memcpy(linux_target, symroot, symlinkroot_len);
linux_target[symlinkroot_len] = '/';
memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len);
} else if (smb_target[0] == sep && relative) {
@@ -966,6 +957,7 @@ globalroot:
* These paths have same format as Linux symlinks, so no
* conversion is needed.
*/
+out_unhandled_target:
linux_target = smb_target;
smb_target = NULL;
}
@@ -1172,7 +1164,6 @@ out:
if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK))
return false;
- fattr->cf_dtype = S_DT(fattr->cf_mode);
return true;
}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index ec0db32c7d98..330bc3d25bad 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses,
ctx->domainauto = ses->domainAuto;
ctx->domainname = ses->domainName;
- /* no hostname for extra channels */
- ctx->server_hostname = "";
+ ctx->server_hostname = ses->server->hostname;
ctx->username = ses->user_name;
ctx->password = ses->password;
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 2a3e46b8e15a..a11a2a693c51 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1346,7 +1346,8 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data,
* empty object on the server.
*/
if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS))
- return ERR_PTR(-EOPNOTSUPP);
+ if (!tcon->posix_extensions)
+ return ERR_PTR(-EOPNOTSUPP);
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
SYNCHRONIZE | DELETE |
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 1468c16ea9b8..938a8a7c5d21 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4316,6 +4316,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
u8 key[SMB3_ENC_DEC_KEY_SIZE];
struct aead_request *req;
u8 *iv;
+ DECLARE_CRYPTO_WAIT(wait);
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
size_t sensitive_size;
@@ -4366,7 +4367,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
aead_request_set_crypt(req, sg, sg, crypt_len, iv);
aead_request_set_ad(req, assoc_data_len);
- rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+
+ rc = crypto_wait_req(enc ? crypto_aead_encrypt(req)
+ : crypto_aead_decrypt(req), &wait);
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
@@ -5255,7 +5260,8 @@ static int smb2_make_node(unsigned int xid, struct inode *inode,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
rc = cifs_sfu_make_node(xid, inode, dentry, tcon,
full_path, mode, dev);
- } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) {
+ } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)
+ || (tcon->posix_extensions)) {
rc = smb2_mknod_reparse(xid, inode, dentry, tcon,
full_path, mode, dev);
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index a717be1626a3..2df93a75e3b8 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -424,9 +424,9 @@ skip_sess_setup:
free_xid(xid);
ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES;
- /* regardless of rc value, setup polling */
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ if (!tcon->ipc && !tcon->dummy)
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
mutex_unlock(&ses->session_mutex);
@@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work)
}
goto done;
}
-
tcon->status = TID_GOOD;
- tcon->retry = false;
- tcon->need_reconnect = false;
+ tcon->dummy = true;
/* now reconnect sessions for necessary channels */
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
@@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
+ goto do_retry;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
+do_retry:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
@@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(shdr->CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ rdata->result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
+ break;
}
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
@@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
result = smb2_check_receive(mid, server, 0);
- if (result != 0)
+ if (result != 0) {
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad);
break;
+ }
written = le32_to_cpu(rsp->DataLength);
/*
@@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
+ result = -EAGAIN;
+ break;
case MID_RETRY_NEEDED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
- fallthrough;
+ result = -EIO;
+ break;
default:
+ trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
@@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 5ae847919da5..754e94a0e07f 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -907,8 +907,10 @@ wait_send_queue:
.local_dma_lkey = sc->ib.pd->local_dma_lkey,
.direction = DMA_TO_DEVICE,
};
+ size_t payload_len = umin(*_remaining_data_length,
+ sp->max_send_size - sizeof(*packet));
- rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length,
+ rc = smb_extract_iter_to_rdma(iter, payload_len,
&extract);
if (rc < 0)
goto err_dma;
@@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info)
return smbd_post_send_iter(info, NULL, &remaining_data_length);
}
+static int smbd_post_send_full_iter(struct smbd_connection *info,
+ struct iov_iter *iter,
+ int *_remaining_data_length)
+{
+ int rc = 0;
+
+ /*
+ * smbd_post_send_iter() respects the
+ * negotiated max_send_size, so we need to
+ * loop until the full iter is posted
+ */
+
+ while (iov_iter_count(iter) > 0) {
+ rc = smbd_post_send_iter(info, iter, _remaining_data_length);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
/*
* Post a receive request to the transport
* The remote peer can only send data when a receive request is posted
@@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
char name[MAX_NAME_LEN];
int rc;
+ if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer)))
+ return -ENOMEM;
+
scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
info->request_cache =
kmem_cache_create(
@@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
goto out1;
scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
+
+ struct kmem_cache_args response_args = {
+ .align = __alignof__(struct smbd_response),
+ .useroffset = (offsetof(struct smbd_response, packet) +
+ sizeof(struct smbdirect_data_transfer)),
+ .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer),
+ };
info->response_cache =
- kmem_cache_create(
- name,
- sizeof(struct smbd_response) +
- sp->max_recv_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ kmem_cache_create(name,
+ sizeof(struct smbd_response) + sp->max_recv_size,
+ &response_args, SLAB_HWCACHE_ALIGN);
if (!info->response_cache)
goto out2;
@@ -1747,35 +1778,39 @@ try_again:
}
/*
- * Receive data from receive reassembly queue
+ * Receive data from the transport's receive reassembly queue
* All the incoming data packets are placed in reassembly queue
- * buf: the buffer to read data into
+ * iter: the buffer to read data into
* size: the length of data to read
* return value: actual data read
- * Note: this implementation copies the data from reassebmly queue to receive
+ *
+ * Note: this implementation copies the data from reassembly queue to receive
* buffers used by upper layer. This is not the optimal code path. A better way
* to do it is to not have upper layer allocate its receive buffers but rather
* borrow the buffer from reassembly queue, and return it after data is
* consumed. But this will require more changes to upper layer code, and also
* need to consider packet boundaries while they still being reassembled.
*/
-static int smbd_recv_buf(struct smbd_connection *info, char *buf,
- unsigned int size)
+int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{
struct smbdirect_socket *sc = &info->socket;
struct smbd_response *response;
struct smbdirect_data_transfer *data_transfer;
+ size_t size = iov_iter_count(&msg->msg_iter);
int to_copy, to_read, data_read, offset;
u32 data_length, remaining_data_length, data_offset;
int rc;
+ if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE))
+ return -EINVAL; /* It's a bug in upper layer to get there */
+
again:
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue. The transport
* may add more entries to the back of the queue at the same time
*/
- log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
+ log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size,
info->reassembly_data_length);
if (info->reassembly_data_length >= size) {
int queue_length;
@@ -1813,7 +1848,10 @@ again:
if (response->first_segment && size == 4) {
unsigned int rfc1002_len =
data_length + remaining_data_length;
- *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len);
+ if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr),
+ &msg->msg_iter) != sizeof(rfc1002_hdr))
+ return -EFAULT;
data_read = 4;
response->first_segment = false;
log_read(INFO, "returning rfc1002 length %d\n",
@@ -1822,10 +1860,9 @@ again:
}
to_copy = min_t(int, data_length - offset, to_read);
- memcpy(
- buf + data_read,
- (char *)data_transfer + data_offset + offset,
- to_copy);
+ if (copy_to_iter((char *)data_transfer + data_offset + offset,
+ to_copy, &msg->msg_iter) != to_copy)
+ return -EFAULT;
/* move on to the next buffer? */
if (to_copy == data_length - offset) {
@@ -1891,90 +1928,6 @@ read_rfc1002_done:
}
/*
- * Receive a page from receive reassembly queue
- * page: the page to read data into
- * to_read: the length of data to read
- * return value: actual data read
- */
-static int smbd_recv_page(struct smbd_connection *info,
- struct page *page, unsigned int page_offset,
- unsigned int to_read)
-{
- struct smbdirect_socket *sc = &info->socket;
- int ret;
- char *to_address;
- void *page_address;
-
- /* make sure we have the page ready for read */
- ret = wait_event_interruptible(
- info->wait_reassembly_queue,
- info->reassembly_data_length >= to_read ||
- sc->status != SMBDIRECT_SOCKET_CONNECTED);
- if (ret)
- return ret;
-
- /* now we can read from reassembly queue and not sleep */
- page_address = kmap_atomic(page);
- to_address = (char *) page_address + page_offset;
-
- log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
- page, to_address, to_read);
-
- ret = smbd_recv_buf(info, to_address, to_read);
- kunmap_atomic(page_address);
-
- return ret;
-}
-
-/*
- * Receive data from transport
- * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
- * return: total bytes read, or 0. SMB Direct will not do partial read.
- */
-int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
-{
- char *buf;
- struct page *page;
- unsigned int to_read, page_offset;
- int rc;
-
- if (iov_iter_rw(&msg->msg_iter) == WRITE) {
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg iter dir %u\n",
- iov_iter_rw(&msg->msg_iter));
- rc = -EINVAL;
- goto out;
- }
-
- switch (iov_iter_type(&msg->msg_iter)) {
- case ITER_KVEC:
- buf = msg->msg_iter.kvec->iov_base;
- to_read = msg->msg_iter.kvec->iov_len;
- rc = smbd_recv_buf(info, buf, to_read);
- break;
-
- case ITER_BVEC:
- page = msg->msg_iter.bvec->bv_page;
- page_offset = msg->msg_iter.bvec->bv_offset;
- to_read = msg->msg_iter.bvec->bv_len;
- rc = smbd_recv_page(info, page, page_offset, to_read);
- break;
-
- default:
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg type %d\n",
- iov_iter_type(&msg->msg_iter));
- rc = -EINVAL;
- }
-
-out:
- /* SMBDirect will read it all or nothing */
- if (rc > 0)
- msg->msg_iter.count = 0;
- return rc;
-}
-
-/*
* Send data to transport
* Each rqst is transported as a SMBDirect payload
* rqst: the data to write
@@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server,
klen += rqst->rq_iov[i].iov_len;
iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen);
- rc = smbd_post_send_iter(info, &iter, &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length);
if (rc < 0)
break;
if (iov_iter_count(&rqst->rq_iter) > 0) {
/* And then the data pages if there are any */
- rc = smbd_post_send_iter(info, &rqst->rq_iter,
- &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &rqst->rq_iter,
+ &remaining_data_length);
if (rc < 0)
break;
}
@@ -2589,13 +2542,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter,
size_t fsize = folioq_folio_size(folioq, slot);
if (offset < fsize) {
- size_t part = umin(maxsize - ret, fsize - offset);
+ size_t part = umin(maxsize, fsize - offset);
if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part))
return -EIO;
offset += part;
ret += part;
+ maxsize -= part;
}
if (offset >= fsize) {
@@ -2610,7 +2564,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter,
slot = 0;
}
}
- } while (rdma->nr_sge < rdma->max_sge || maxsize > 0);
+ } while (rdma->nr_sge < rdma->max_sge && maxsize > 0);
iter->folioq = folioq;
iter->folioq_slot = slot;
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 52bcb55d9952..93e5b2bb9f28 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->rreq_debug_id, __entry->rreq_debug_index,
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
@@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
)
@@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc)
)
@@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class,
__entry->target_offset = target_offset;
__entry->len = len;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len)
)
@@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class,
__entry->tid = tid;
__entry->sesid = sesid;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid)
)
@@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class,
__entry->sesid = sesid;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->rc)
)
@@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class,
__entry->status = status;
__entry->rc = rc;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
__entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
__entry->status, __entry->rc)
)
@@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class,
__entry->cmd = cmd;
__entry->mid = mid;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu",
__entry->sesid, __entry->tid,
__entry->cmd, __entry->mid)
)
@@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class,
__entry->when_sent = when_sent;
__entry->when_received = when_received;
),
- TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
+ TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
__entry->cmd, __entry->mid, __entry->pid, __entry->when_sent,
__entry->when_received)
)
@@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
__assign_str(func_name);
__entry->rc = rc;
),
- TP_printk("\t%s: xid=%u rc=%d",
+ TP_printk("%s: xid=%u rc=%d",
__get_str(func_name), __entry->xid, __entry->rc)
)
@@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class,
__entry->ino = ino;
__entry->rc = rc;
),
- TP_printk("\tino=%lu rc=%d",
+ TP_printk("ino=%lu rc=%d",
__entry->ino, __entry->rc)
)
@@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
__entry->xid = xid;
__assign_str(func_name);
),
- TP_printk("\t%s: xid=%u",
+ TP_printk("%s: xid=%u",
__get_str(func_name), __entry->xid)
)
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index 83764c230e9d..3f04a2977ba8 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn)
kvfree(conn->request_buf);
kfree(conn->preauth_info);
if (atomic_dec_and_test(&conn->refcnt)) {
- ksmbd_free_transport(conn->transport);
+ conn->transport->ops->free_transport(conn->transport);
kfree(conn);
}
}
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 6efed923bd68..dd3e0e3f7bf0 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -133,6 +133,7 @@ struct ksmbd_transport_ops {
void *buf, unsigned int len,
struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len);
+ void (*free_transport)(struct ksmbd_transport *kt);
};
struct ksmbd_transport {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 1a308171b599..63d17cea2e95 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work,
out_len = work->response_sz -
(le16_to_cpu(rsp->SecurityBufferOffset) + 4);
- /* Check previous session */
- prev_sess_id = le64_to_cpu(req->PreviousSessionId);
- if (prev_sess_id && prev_sess_id != sess->id)
- destroy_previous_session(conn, sess->user, prev_sess_id);
-
retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
out_blob, &out_len);
if (retval) {
ksmbd_debug(SMB, "krb5 authentication failed\n");
return -EINVAL;
}
+
+ /* Check previous session */
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_sess_id && prev_sess_id != sess->id)
+ destroy_previous_session(conn, sess->user, prev_sess_id);
+
rsp->SecurityBufferLength = cpu_to_le16(out_len);
if ((conn->sign || server_conf.enforced_signing) ||
@@ -4871,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp,
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
- sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9);
- sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ sinfo->AllocationSize = cpu_to_le64(fp->stream.size);
+ sinfo->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
sinfo->DeletePending = delete_pending;
sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
@@ -4935,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
file_info->Pad1 = 0;
- file_info->AllocationSize =
- cpu_to_le64(stat.blocks << 9);
- file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->AllocationSize =
+ cpu_to_le64(stat.blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
file_info->NumberOfLinks =
cpu_to_le32(get_nlink(&stat) - delete_pending);
file_info->DeletePending = delete_pending;
@@ -4946,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->IndexNumber = cpu_to_le64(stat.ino);
file_info->EASize = 0;
file_info->AccessFlags = fp->daccess;
- file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ if (ksmbd_stream_fd(fp) == false)
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ else
+ file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos);
file_info->Mode = fp->coption;
file_info->AlignmentRequirement = 0;
conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
@@ -5134,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
time = ksmbd_UnixTimeToNT(stat.ctime);
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
- file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
- file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ } else {
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ }
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
@@ -5158,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_pos_info *file_info;
file_info = (struct smb2_file_pos_info *)rsp->Buffer;
- file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ if (ksmbd_stream_fd(fp) == false)
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ else
+ file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos);
+
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_pos_info));
}
@@ -5247,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
file_info->ChangeTime = cpu_to_le64(time);
file_info->DosAttributes = fp->f_ci->m_fattr;
file_info->Inode = cpu_to_le64(stat.ino);
- file_info->EndOfFile = cpu_to_le64(stat.size);
- file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ if (ksmbd_stream_fd(fp) == false) {
+ file_info->EndOfFile = cpu_to_le64(stat.size);
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ } else {
+ file_info->EndOfFile = cpu_to_le64(fp->stream.size);
+ file_info->AllocationSize = cpu_to_le64(fp->stream.size);
+ }
file_info->HardLinks = cpu_to_le32(stat.nlink);
file_info->Mode = cpu_to_le32(stat.mode & 0777);
switch (stat.mode & S_IFMT) {
@@ -6190,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work,
if (!(fp->daccess & FILE_WRITE_DATA_LE))
return -EACCES;
+ if (ksmbd_stream_fd(fp) == true)
+ return 0;
+
rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
AT_STATX_SYNC_AS_STAT);
if (rc)
@@ -6248,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
* truncate of some filesystem like FAT32 fill zero data in
* truncated range.
*/
- if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
+ if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC &&
+ ksmbd_stream_fd(fp) == false) {
ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize);
rc = ksmbd_vfs_truncate(work, fp, newsize);
if (rc) {
@@ -6321,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp,
return -EINVAL;
}
- fp->filp->f_pos = current_byte_offset;
+ if (ksmbd_stream_fd(fp) == false)
+ fp->filp->f_pos = current_byte_offset;
+ else {
+ if (current_byte_offset > XATTR_SIZE_MAX)
+ current_byte_offset = XATTR_SIZE_MAX;
+ fp->stream.pos = current_byte_offset;
+ }
return 0;
}
@@ -8535,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
-
rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
rsp->Reserved = 0;
@@ -8547,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
rsp->VolatileFid = volatile_id;
rsp->PersistentFid = persistent_id;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
-
opinfo_put(opinfo);
ksmbd_fd_put(work, fp);
- smb2_set_err_rsp(work);
}
static int check_lease_state(struct lease *lease, __le32 req_state)
@@ -8686,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
}
lease_state = lease->state;
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- atomic_dec(&opinfo->breaking_cnt);
- wake_up_interruptible_all(&opinfo->oplock_brk);
- opinfo_put(opinfo);
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
@@ -8699,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
rsp->LeaseState = lease_state;
rsp->LeaseDuration = 0;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
+ opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
-
opinfo_put(opinfo);
- smb2_set_err_rsp(work);
}
/**
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 4998df04ab95..c6cbe0d56e32 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -159,7 +159,8 @@ struct smb_direct_transport {
};
#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
-
+#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \
+ struct smb_direct_transport, transport))
enum {
SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
SMB_DIRECT_MSG_DATA_TRANSFER
@@ -410,6 +411,11 @@ err:
return NULL;
}
+static void smb_direct_free_transport(struct ksmbd_transport *kt)
+{
+ kfree(SMBD_TRANS(kt));
+}
+
static void free_transport(struct smb_direct_transport *t)
{
struct smb_direct_recvmsg *recvmsg;
@@ -427,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
- ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
ksmbd_debug(RDMA, "drain the reassembly queue\n");
@@ -455,7 +462,6 @@ static void free_transport(struct smb_direct_transport *t)
smb_direct_destroy_pools(t);
ksmbd_conn_free(KSMBD_TRANS(t)->conn);
- kfree(t);
}
static struct smb_direct_sendmsg
@@ -1935,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
return 0;
err:
if (t->qp) {
- ib_destroy_qp(t->qp);
t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
if (t->recv_cq) {
ib_destroy_cq(t->recv_cq);
@@ -2281,4 +2287,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
.rdma_write = smb_direct_rdma_write,
+ .free_transport = smb_direct_free_transport,
};
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index abedf510899a..4e9f98db9ff4 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk)
return t;
}
-void ksmbd_free_transport(struct ksmbd_transport *kt)
+static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt)
{
struct tcp_transport *t = TCP_TRANS(kt);
@@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
.read = ksmbd_tcp_read,
.writev = ksmbd_tcp_writev,
.disconnect = ksmbd_tcp_disconnect,
+ .free_transport = ksmbd_tcp_free_transport,
};
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index ba45e809555a..34871a7f3e4b 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
if (v_len - *pos < count)
count = v_len - *pos;
+ fp->stream.pos = v_len;
memcpy(buf, &stream_buf[*pos], count);
@@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
true);
if (err < 0)
goto out;
-
- fp->filp->f_pos = *pos;
+ else
+ fp->stream.pos = size;
err = 0;
out:
kvfree(stream_buf);
@@ -764,10 +765,10 @@ retry:
}
rd.old_mnt_idmap = mnt_idmap(old_path->mnt),
- rd.old_dir = d_inode(old_parent),
+ rd.old_parent = old_parent,
rd.old_dentry = old_child,
rd.new_mnt_idmap = mnt_idmap(new_path.mnt),
- rd.new_dir = new_path.dentry->d_inode,
+ rd.new_parent = new_path.dentry,
rd.new_dentry = new_dentry,
rd.flags = flags,
rd.delegated_inode = NULL,
@@ -1281,6 +1282,7 @@ out1:
err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
if (err) {
+ mnt_drop_write(parent_path->mnt);
path_put(path);
path_put(parent_path);
}
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 5bbb179736c2..0708155b5caf 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -44,6 +44,7 @@ struct ksmbd_lock {
struct stream {
char *name;
ssize_t size;
+ loff_t pos;
};
struct ksmbd_inode {
diff --git a/fs/stack.c b/fs/stack.c
index f18920119944..d8c782e064e3 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -3,7 +3,7 @@
#include <linux/fs.h>
#include <linux/fs_stack.h>
-/* does _NOT_ require i_mutex to be held.
+/* does _NOT_ require i_rwsem to be held.
*
* This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems
@@ -41,7 +41,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
* If CONFIG_SMP or CONFIG_PREEMPTION on 32-bit, it's vital for
* fsstack_copy_inode_size() to hold some lock around
* i_size_write(), otherwise i_size_read() may spin forever (see
- * include/linux/fs.h). We don't necessarily hold i_mutex when this
+ * include/linux/fs.h). We don't necessarily hold i_rwsem when this
* is called, so take i_lock for that case.
*
* And if on 32-bit, continue our effort to keep the two halves of
diff --git a/fs/super.c b/fs/super.c
index 21799e213fd7..80418ca8e215 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type,
spin_unlock(&sb_lock);
locked = super_lock_shared(sb);
- if (locked)
+ if (locked) {
f(sb, arg);
+ super_unlock_shared(sb);
+ }
spin_lock(&sb_lock);
if (p)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index a3fd3cc591bd..0c023941a316 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -465,9 +465,20 @@ static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name,
return !(ei && ei->is_freed);
}
+static int tracefs_d_delete(const struct dentry *dentry)
+{
+ /*
+ * We want to keep eventfs dentries around but not tracefs
+ * ones. eventfs dentries have content in d_fsdata.
+ * Use d_fsdata to determine if it's a eventfs dentry or not.
+ */
+ return dentry->d_fsdata == NULL;
+}
+
static const struct dentry_operations tracefs_dentry_operations = {
.d_revalidate = tracefs_d_revalidate,
.d_release = tracefs_d_release,
+ .d_delete = tracefs_d_delete,
};
static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc)
@@ -480,7 +491,7 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc)
return err;
sb->s_op = &tracefs_super_operations;
- sb->s_d_op = &tracefs_dentry_operations;
+ set_default_d_op(sb, &tracefs_dentry_operations);
return 0;
}
@@ -551,20 +562,9 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = tracefs_mount->mnt_root;
- inode_lock(d_inode(parent));
- if (unlikely(IS_DEADDIR(d_inode(parent))))
- dentry = ERR_PTR(-ENOENT);
- else
- dentry = lookup_noperm(&QSTR(name), parent);
- if (!IS_ERR(dentry) && d_inode(dentry)) {
- dput(dentry);
- dentry = ERR_PTR(-EEXIST);
- }
-
- if (IS_ERR(dentry)) {
- inode_unlock(d_inode(parent));
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry))
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- }
return dentry;
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index bf311c38d9a8..cee8bec1ea26 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -404,7 +404,8 @@ static int allocate_budget(struct ubifs_info *c, struct folio *folio,
* there is a plenty of flash space and the budget will be acquired quickly,
* without forcing write-back. The slow path does not make this assumption.
*/
-static int ubifs_write_begin(struct file *file, struct address_space *mapping,
+static int ubifs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
@@ -514,8 +515,9 @@ static void cancel_budget(struct ubifs_info *c, struct folio *folio,
}
}
-static int ubifs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
+static int ubifs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4386dd845e40..356b75676fa9 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -244,10 +244,12 @@ static void udf_readahead(struct readahead_control *rac)
mpage_readahead(rac, udf_get_block);
}
-static int udf_write_begin(struct file *file, struct address_space *mapping,
+static int udf_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata)
{
+ struct file *file = iocb->ki_filp;
struct udf_inode_info *iinfo = UDF_I(file_inode(file));
struct folio *folio;
int ret;
@@ -271,15 +273,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
-static int udf_write_end(struct file *file, struct address_space *mapping,
+static int udf_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata)
{
- struct inode *inode = file_inode(file);
+ struct inode *inode = file_inode(iocb->ki_filp);
loff_t last_pos;
if (UDF_I(inode)->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
- return generic_write_end(file, mapping, pos, len, copied, folio,
+ return generic_write_end(iocb, mapping, pos, len, copied, folio,
fsdata);
last_pos = pos + copied;
if (last_pos > inode->i_size)
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 88d0062cfdb9..0388a1bae326 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -48,7 +48,7 @@ static void ufs_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
struct inode *dir = mapping->host;
inode_inc_iversion(dir);
- block_write_end(NULL, mapping, pos, len, len, folio, NULL);
+ block_write_end(pos, len, len, folio);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
mark_inode_dirty(dir);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7dc38fdef2ea..8361c00e8fa6 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -474,9 +474,10 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int ufs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+static int ufs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
int ret;
@@ -487,13 +488,14 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static int ufs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+static int ufs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
+ ret = generic_write_end(iocb, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
ufs_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index eea718ac66b4..6e4585169f94 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -397,7 +397,7 @@ static int ufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
pr_err("ufstype can't be changed during remount\n");
return -EINVAL;
}
- if (!ctx->flavour) {
+ if (ctx->flavour) {
pr_err("conflicting ufstype options\n");
return -EINVAL;
}
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index b492794f8e9a..af01e3beaa42 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -300,12 +300,13 @@ static int vboxsf_writepages(struct address_space *mapping,
return error;
}
-static int vboxsf_write_end(struct file *file, struct address_space *mapping,
+static int vboxsf_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int copied,
struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
- struct vboxsf_handle *sf_handle = file->private_data;
+ struct vboxsf_handle *sf_handle = iocb->ki_filp->private_data;
size_t from = offset_in_folio(folio, pos);
u32 nwritten = len;
u8 *buf;
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 0bc96ab6580b..241647b060ee 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -189,7 +189,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize = 1024;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_op = &vboxsf_super_ops;
- sb->s_d_op = &vboxsf_dentry_ops;
+ set_default_d_op(sb, &vboxsf_dentry_ops);
iroot = iget_locked(sb, 0);
if (!iroot) {
diff --git a/fs/xattr.c b/fs/xattr.c
index 8ec5b0204bfd..8851a5ef34f5 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -215,7 +215,7 @@ EXPORT_SYMBOL(__vfs_setxattr);
*
* returns the result of the internal setxattr or setsecurity operations.
*
- * This function requires the caller to lock the inode's i_mutex before it
+ * This function requires the caller to lock the inode's i_rwsem before it
* is executed. It also assumes that the caller will make the appropriate
* permission checks.
*/
@@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
buffer += err;
}
remaining_size -= err;
+ err = 0;
read_lock(&xattrs->lock);
for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7839efe050bf..000cc7f4a3ce 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3444,16 +3444,41 @@ xfs_alloc_read_agf(
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
+
#ifdef DEBUG
- else if (!xfs_is_shutdown(mp)) {
- ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
- ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
- ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
- ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
- ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
- ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
+ /*
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
+ */
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
+ ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks);
+ ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount);
+ ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest);
+ ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level);
+ ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
+ xfs_trans_brelse(tp, agfbp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
}
-#endif
+#endif /* DEBUG */
+
if (agfbpp)
*agfbpp = agfbp;
else
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index a4ac37ba5d51..fa1f03c1331e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -186,35 +186,32 @@ xfs_allocbt_init_ptr_from_cur(
ptr->s = agf->agf_cnt_root;
}
-STATIC int64_t
-xfs_bnobt_key_diff(
+STATIC int
+xfs_bnobt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
const struct xfs_alloc_rec *kp = &key->alloc;
- return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+ return cmp_int(be32_to_cpu(kp->ar_startblock),
+ rec->ar_startblock);
}
-STATIC int64_t
-xfs_cntbt_key_diff(
+STATIC int
+xfs_cntbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a;
const struct xfs_alloc_rec *kp = &key->alloc;
- int64_t diff;
- diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
- if (diff)
- return diff;
-
- return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+ return cmp_int(be32_to_cpu(kp->ar_blockcount), rec->ar_blockcount) ?:
+ cmp_int(be32_to_cpu(kp->ar_startblock), rec->ar_startblock);
}
-STATIC int64_t
-xfs_bnobt_diff_two_keys(
+STATIC int
+xfs_bnobt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -222,29 +219,24 @@ xfs_bnobt_diff_two_keys(
{
ASSERT(!mask || mask->alloc.ar_startblock);
- return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
- be32_to_cpu(k2->alloc.ar_startblock);
+ return cmp_int(be32_to_cpu(k1->alloc.ar_startblock),
+ be32_to_cpu(k2->alloc.ar_startblock));
}
-STATIC int64_t
-xfs_cntbt_diff_two_keys(
+STATIC int
+xfs_cntbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
const union xfs_btree_key *mask)
{
- int64_t diff;
-
ASSERT(!mask || (mask->alloc.ar_blockcount &&
mask->alloc.ar_startblock));
- diff = be32_to_cpu(k1->alloc.ar_blockcount) -
- be32_to_cpu(k2->alloc.ar_blockcount);
- if (diff)
- return diff;
-
- return be32_to_cpu(k1->alloc.ar_startblock) -
- be32_to_cpu(k2->alloc.ar_startblock);
+ return cmp_int(be32_to_cpu(k1->alloc.ar_blockcount),
+ be32_to_cpu(k2->alloc.ar_blockcount)) ?:
+ cmp_int(be32_to_cpu(k1->alloc.ar_startblock),
+ be32_to_cpu(k2->alloc.ar_startblock));
}
static xfs_failaddr_t
@@ -438,9 +430,9 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
.init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_bnobt_key_diff,
+ .cmp_key_with_cur = xfs_bnobt_cmp_key_with_cur,
.buf_ops = &xfs_bnobt_buf_ops,
- .diff_two_keys = xfs_bnobt_diff_two_keys,
+ .cmp_two_keys = xfs_bnobt_cmp_two_keys,
.keys_inorder = xfs_bnobt_keys_inorder,
.recs_inorder = xfs_bnobt_recs_inorder,
.keys_contiguous = xfs_allocbt_keys_contiguous,
@@ -468,9 +460,9 @@ const struct xfs_btree_ops xfs_cntbt_ops = {
.init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_cntbt_key_diff,
+ .cmp_key_with_cur = xfs_cntbt_cmp_key_with_cur,
.buf_ops = &xfs_cntbt_buf_ops,
- .diff_two_keys = xfs_cntbt_diff_two_keys,
+ .cmp_two_keys = xfs_cntbt_cmp_two_keys,
.keys_inorder = xfs_cntbt_keys_inorder,
.recs_inorder = xfs_cntbt_recs_inorder,
.keys_contiguous = NULL, /* not needed right now */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 908d7b050e9c..188feac04b60 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -369,38 +369,26 @@ xfs_bmbt_init_rec_from_cur(
xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
}
-STATIC int64_t
-xfs_bmbt_key_diff(
+STATIC int
+xfs_bmbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
- cur->bc_rec.b.br_startoff;
+ return cmp_int(be64_to_cpu(key->bmbt.br_startoff),
+ cur->bc_rec.b.br_startoff);
}
-STATIC int64_t
-xfs_bmbt_diff_two_keys(
+STATIC int
+xfs_bmbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
const union xfs_btree_key *mask)
{
- uint64_t a = be64_to_cpu(k1->bmbt.br_startoff);
- uint64_t b = be64_to_cpu(k2->bmbt.br_startoff);
-
ASSERT(!mask || mask->bmbt.br_startoff);
- /*
- * Note: This routine previously casted a and b to int64 and subtracted
- * them to generate a result. This lead to problems if b was the
- * "maximum" key value (all ones) being signed incorrectly, hence this
- * somewhat less efficient version.
- */
- if (a > b)
- return 1;
- if (b > a)
- return -1;
- return 0;
+ return cmp_int(be64_to_cpu(k1->bmbt.br_startoff),
+ be64_to_cpu(k2->bmbt.br_startoff));
}
static xfs_failaddr_t
@@ -647,8 +635,8 @@ const struct xfs_btree_ops xfs_bmbt_ops = {
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
.init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
- .key_diff = xfs_bmbt_key_diff,
- .diff_two_keys = xfs_bmbt_diff_two_keys,
+ .cmp_key_with_cur = xfs_bmbt_cmp_key_with_cur,
+ .cmp_two_keys = xfs_bmbt_cmp_two_keys,
.buf_ops = &xfs_bmbt_buf_ops,
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 299ce7fd11b0..a61211d253f1 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1985,7 +1985,7 @@ xfs_btree_lookup(
int *stat) /* success/failure */
{
struct xfs_btree_block *block; /* current btree block */
- int64_t diff; /* difference for the current key */
+ int cmp_r; /* current key comparison result */
int error; /* error return value */
int keyno; /* current key number */
int level; /* level in the btree */
@@ -2013,13 +2013,13 @@ xfs_btree_lookup(
* on the lookup record, then follow the corresponding block
* pointer down to the next level.
*/
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+ for (level = cur->bc_nlevels - 1, cmp_r = 1; level >= 0; level--) {
/* Get the block we need to do the lookup on. */
error = xfs_btree_lookup_get_block(cur, level, pp, &block);
if (error)
goto error0;
- if (diff == 0) {
+ if (cmp_r == 0) {
/*
* If we already had a key match at a higher level, we
* know we need to use the first entry in this block.
@@ -2065,15 +2065,16 @@ xfs_btree_lookup(
keyno, block, &key);
/*
- * Compute difference to get next direction:
+ * Compute comparison result to get next
+ * direction:
* - less than, move right
* - greater than, move left
* - equal, we're done
*/
- diff = cur->bc_ops->key_diff(cur, kp);
- if (diff < 0)
+ cmp_r = cur->bc_ops->cmp_key_with_cur(cur, kp);
+ if (cmp_r < 0)
low = keyno + 1;
- else if (diff > 0)
+ else if (cmp_r > 0)
high = keyno - 1;
else
break;
@@ -2089,7 +2090,7 @@ xfs_btree_lookup(
* If we moved left, need the previous key number,
* unless there isn't one.
*/
- if (diff > 0 && --keyno < 1)
+ if (cmp_r > 0 && --keyno < 1)
keyno = 1;
pp = xfs_btree_ptr_addr(cur, keyno, block);
@@ -2102,7 +2103,7 @@ xfs_btree_lookup(
}
/* Done with the search. See if we need to adjust the results. */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
+ if (dir != XFS_LOOKUP_LE && cmp_r < 0) {
keyno++;
/*
* If ge search and we went off the end of the block, but it's
@@ -2125,14 +2126,14 @@ xfs_btree_lookup(
*stat = 1;
return 0;
}
- } else if (dir == XFS_LOOKUP_LE && diff > 0)
+ } else if (dir == XFS_LOOKUP_LE && cmp_r > 0)
keyno--;
cur->bc_levels[0].ptr = keyno;
/* Return if we succeeded or not. */
if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
*stat = 0;
- else if (dir != XFS_LOOKUP_EQ || diff == 0)
+ else if (dir != XFS_LOOKUP_EQ || cmp_r == 0)
*stat = 1;
else
*stat = 0;
@@ -5058,7 +5059,7 @@ xfs_btree_simple_query_range(
int error;
ASSERT(cur->bc_ops->init_high_key_from_rec);
- ASSERT(cur->bc_ops->diff_two_keys);
+ ASSERT(cur->bc_ops->cmp_two_keys);
/*
* Find the leftmost record. The btree cursor must be set
@@ -5352,15 +5353,15 @@ xfs_btree_count_blocks(
}
/* Compare two btree pointers. */
-int64_t
-xfs_btree_diff_two_ptrs(
+int
+xfs_btree_cmp_two_ptrs(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *a,
const union xfs_btree_ptr *b)
{
if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
- return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
- return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
+ return cmp_int(be64_to_cpu(a->l), be64_to_cpu(b->l));
+ return cmp_int(be32_to_cpu(a->s), be32_to_cpu(b->s));
}
struct xfs_btree_has_records {
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 355b304696e6..60e78572e725 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -171,20 +171,23 @@ struct xfs_btree_ops {
void (*init_high_key_from_rec)(union xfs_btree_key *key,
const union xfs_btree_rec *rec);
- /* difference between key value and cursor value */
- int64_t (*key_diff)(struct xfs_btree_cur *cur,
- const union xfs_btree_key *key);
+ /*
+ * Compare key value and cursor value -- positive if key > cur,
+ * negative if key < cur, and zero if equal.
+ */
+ int (*cmp_key_with_cur)(struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key);
/*
- * Difference between key2 and key1 -- positive if key1 > key2,
- * negative if key1 < key2, and zero if equal. If the @mask parameter
- * is non NULL, each key field to be used in the comparison must
- * contain a nonzero value.
+ * Compare key1 and key2 -- positive if key1 > key2, negative if
+ * key1 < key2, and zero if equal. If the @mask parameter is non NULL,
+ * each key field to be used in the comparison must contain a nonzero
+ * value.
*/
- int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
- const union xfs_btree_key *key1,
- const union xfs_btree_key *key2,
- const union xfs_btree_key *mask);
+ int (*cmp_two_keys)(struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask);
const struct xfs_buf_ops *buf_ops;
@@ -516,9 +519,9 @@ struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
int level, struct xfs_buf **bpp);
bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr);
-int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur,
- const union xfs_btree_ptr *a,
- const union xfs_btree_ptr *b);
+int xfs_btree_cmp_two_ptrs(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *a,
+ const union xfs_btree_ptr *b);
void xfs_btree_get_sibling(struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
union xfs_btree_ptr *ptr, int lr);
@@ -546,7 +549,7 @@ xfs_btree_keycmp_lt(
const union xfs_btree_key *key1,
const union xfs_btree_key *key2)
{
- return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) < 0;
+ return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) < 0;
}
static inline bool
@@ -555,7 +558,7 @@ xfs_btree_keycmp_gt(
const union xfs_btree_key *key1,
const union xfs_btree_key *key2)
{
- return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) > 0;
+ return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) > 0;
}
static inline bool
@@ -564,7 +567,7 @@ xfs_btree_keycmp_eq(
const union xfs_btree_key *key1,
const union xfs_btree_key *key2)
{
- return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) == 0;
+ return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) == 0;
}
static inline bool
@@ -602,7 +605,7 @@ xfs_btree_masked_keycmp_lt(
const union xfs_btree_key *key2,
const union xfs_btree_key *mask)
{
- return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) < 0;
+ return cur->bc_ops->cmp_two_keys(cur, key1, key2, mask) < 0;
}
static inline bool
@@ -612,7 +615,7 @@ xfs_btree_masked_keycmp_gt(
const union xfs_btree_key *key2,
const union xfs_btree_key *mask)
{
- return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) > 0;
+ return cur->bc_ops->cmp_two_keys(cur, key1, key2, mask) > 0;
}
static inline bool
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9566a7623365..779dac59b1f3 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -112,7 +112,7 @@ typedef struct xfs_sb {
uint16_t sb_sectsize; /* volume sector size, bytes */
uint16_t sb_inodesize; /* inode size, bytes */
uint16_t sb_inopblock; /* inodes per block */
- char sb_fname[XFSLABEL_MAX]; /* file system name */
+ char sb_fname[XFSLABEL_MAX] __nonstring; /* file system name */
uint8_t sb_blocklog; /* log2 of sb_blocksize */
uint8_t sb_sectlog; /* log2 of sb_sectsize */
uint8_t sb_inodelog; /* log2 of sb_inodesize */
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
index e9d76bcdc820..792f76d2e2a0 100644
--- a/fs/xfs/libxfs/xfs_group.c
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -163,7 +163,8 @@ xfs_group_free(
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
if (uninit)
@@ -171,7 +172,8 @@ xfs_group_free(
/* drop the mount's active reference */
xfs_group_rele(xg);
- XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0);
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) > 0);
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) < 0);
kfree_rcu_mightsleep(xg);
}
@@ -189,9 +191,11 @@ xfs_group_insert(
xg->xg_type = type;
#ifdef __KERNEL__
- xg->xg_busy_extents = xfs_extent_busy_alloc();
- if (!xg->xg_busy_extents)
- return -ENOMEM;
+ if (xfs_group_has_extent_busy(mp, type)) {
+ xg->xg_busy_extents = xfs_extent_busy_alloc();
+ if (!xg->xg_busy_extents)
+ return -ENOMEM;
+ }
spin_lock_init(&xg->xg_state_lock);
xfs_hooks_init(&xg->xg_rmap_update_hooks);
#endif
@@ -210,7 +214,8 @@ xfs_group_insert(
out_drain:
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
return error;
}
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0c47b5c6ca7d..750111634d9f 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi(
set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
}
+#ifdef DEBUG
/*
- * It's possible for these to be out of sync if
- * we are in the middle of a forced shutdown.
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
*/
- ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag_mount(pag)));
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
+ ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ xfs_trans_brelse(tp, agibp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
+ }
+#endif /* DEBUG */
+
if (agibpp)
*agibpp = agibp;
else
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 6f270d8f4270..100afdd66cdd 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -265,17 +265,17 @@ xfs_finobt_init_ptr_from_cur(
ptr->s = agi->agi_free_root;
}
-STATIC int64_t
-xfs_inobt_key_diff(
+STATIC int
+xfs_inobt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
- cur->bc_rec.i.ir_startino;
+ return cmp_int(be32_to_cpu(key->inobt.ir_startino),
+ cur->bc_rec.i.ir_startino);
}
-STATIC int64_t
-xfs_inobt_diff_two_keys(
+STATIC int
+xfs_inobt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -283,8 +283,8 @@ xfs_inobt_diff_two_keys(
{
ASSERT(!mask || mask->inobt.ir_startino);
- return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
- be32_to_cpu(k2->inobt.ir_startino);
+ return cmp_int(be32_to_cpu(k1->inobt.ir_startino),
+ be32_to_cpu(k2->inobt.ir_startino));
}
static xfs_failaddr_t
@@ -430,9 +430,9 @@ const struct xfs_btree_ops xfs_inobt_ops = {
.init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
- .key_diff = xfs_inobt_key_diff,
+ .cmp_key_with_cur = xfs_inobt_cmp_key_with_cur,
.buf_ops = &xfs_inobt_buf_ops,
- .diff_two_keys = xfs_inobt_diff_two_keys,
+ .cmp_two_keys = xfs_inobt_cmp_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
.keys_contiguous = xfs_inobt_keys_contiguous,
@@ -460,9 +460,9 @@ const struct xfs_btree_ops xfs_finobt_ops = {
.init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
- .key_diff = xfs_inobt_key_diff,
+ .cmp_key_with_cur = xfs_inobt_cmp_key_with_cur,
.buf_ops = &xfs_finobt_buf_ops,
- .diff_two_keys = xfs_inobt_diff_two_keys,
+ .cmp_two_keys = xfs_inobt_cmp_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
.keys_contiguous = xfs_inobt_keys_contiguous,
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 66c7916fb5cd..95de23095030 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -104,7 +104,7 @@ struct xlog_recover_item {
struct list_head ri_list;
int ri_cnt; /* count of regions found */
int ri_total; /* total regions */
- struct xfs_log_iovec *ri_buf; /* ptr to regions buffer */
+ struct kvec *ri_buf; /* ptr to regions buffer */
const struct xlog_recover_item_ops *ri_ops;
};
@@ -117,7 +117,7 @@ struct xlog_recover {
struct list_head r_itemq; /* q for items */
};
-#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
+#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].iov_base)
#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index cebe83f7842a..897784037483 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -2099,9 +2099,7 @@ xfs_refcount_recover_cow_leftovers(
* recording the CoW debris we cancel the (empty) transaction
* and everything goes away cleanly.
*/
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(mp);
if (isrt) {
xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 54505fee1852..06da3ca14727 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -174,8 +174,8 @@ xfs_refcountbt_init_ptr_from_cur(
ptr->s = agf->agf_refcount_root;
}
-STATIC int64_t
-xfs_refcountbt_key_diff(
+STATIC int
+xfs_refcountbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
@@ -185,11 +185,11 @@ xfs_refcountbt_key_diff(
start = xfs_refcount_encode_startblock(irec->rc_startblock,
irec->rc_domain);
- return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
+ return cmp_int(be32_to_cpu(kp->rc_startblock), start);
}
-STATIC int64_t
-xfs_refcountbt_diff_two_keys(
+STATIC int
+xfs_refcountbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -197,8 +197,8 @@ xfs_refcountbt_diff_two_keys(
{
ASSERT(!mask || mask->refc.rc_startblock);
- return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
- be32_to_cpu(k2->refc.rc_startblock);
+ return cmp_int(be32_to_cpu(k1->refc.rc_startblock),
+ be32_to_cpu(k2->refc.rc_startblock));
}
STATIC xfs_failaddr_t
@@ -339,9 +339,9 @@ const struct xfs_btree_ops xfs_refcountbt_ops = {
.init_high_key_from_rec = xfs_refcountbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_refcountbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur,
- .key_diff = xfs_refcountbt_key_diff,
+ .cmp_key_with_cur = xfs_refcountbt_cmp_key_with_cur,
.buf_ops = &xfs_refcountbt_buf_ops,
- .diff_two_keys = xfs_refcountbt_diff_two_keys,
+ .cmp_two_keys = xfs_refcountbt_cmp_two_keys,
.keys_inorder = xfs_refcountbt_keys_inorder,
.recs_inorder = xfs_refcountbt_recs_inorder,
.keys_contiguous = xfs_refcountbt_keys_contiguous,
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 2cab694ac58a..bf16aee50d73 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -243,38 +243,22 @@ static inline uint64_t offset_keymask(uint64_t offset)
return offset & ~XFS_RMAP_OFF_UNWRITTEN;
}
-STATIC int64_t
-xfs_rmapbt_key_diff(
+STATIC int
+xfs_rmapbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct xfs_rmap_irec *rec = &cur->bc_rec.r;
const struct xfs_rmap_key *kp = &key->rmap;
- __u64 x, y;
- int64_t d;
- d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
- if (d)
- return d;
-
- x = be64_to_cpu(kp->rm_owner);
- y = rec->rm_owner;
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
-
- x = offset_keymask(be64_to_cpu(kp->rm_offset));
- y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
- return 0;
+ return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?:
+ cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?:
+ cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)),
+ offset_keymask(xfs_rmap_irec_offset_pack(rec)));
}
-STATIC int64_t
-xfs_rmapbt_diff_two_keys(
+STATIC int
+xfs_rmapbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -282,36 +266,31 @@ xfs_rmapbt_diff_two_keys(
{
const struct xfs_rmap_key *kp1 = &k1->rmap;
const struct xfs_rmap_key *kp2 = &k2->rmap;
- int64_t d;
- __u64 x, y;
+ int d;
/* Doesn't make sense to mask off the physical space part */
ASSERT(!mask || mask->rmap.rm_startblock);
- d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
- be32_to_cpu(kp2->rm_startblock);
+ d = cmp_int(be32_to_cpu(kp1->rm_startblock),
+ be32_to_cpu(kp2->rm_startblock));
if (d)
return d;
if (!mask || mask->rmap.rm_owner) {
- x = be64_to_cpu(kp1->rm_owner);
- y = be64_to_cpu(kp2->rm_owner);
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
+ d = cmp_int(be64_to_cpu(kp1->rm_owner),
+ be64_to_cpu(kp2->rm_owner));
+ if (d)
+ return d;
}
if (!mask || mask->rmap.rm_offset) {
/* Doesn't make sense to allow offset but not owner */
ASSERT(!mask || mask->rmap.rm_owner);
- x = offset_keymask(be64_to_cpu(kp1->rm_offset));
- y = offset_keymask(be64_to_cpu(kp2->rm_offset));
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
+ d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)),
+ offset_keymask(be64_to_cpu(kp2->rm_offset)));
+ if (d)
+ return d;
}
return 0;
@@ -515,9 +494,9 @@ const struct xfs_btree_ops xfs_rmapbt_ops = {
.init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
- .key_diff = xfs_rmapbt_key_diff,
+ .cmp_key_with_cur = xfs_rmapbt_cmp_key_with_cur,
.buf_ops = &xfs_rmapbt_buf_ops,
- .diff_two_keys = xfs_rmapbt_diff_two_keys,
+ .cmp_two_keys = xfs_rmapbt_cmp_two_keys,
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
.keys_contiguous = xfs_rmapbt_keys_contiguous,
@@ -632,9 +611,9 @@ const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
.init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
- .key_diff = xfs_rmapbt_key_diff,
+ .cmp_key_with_cur = xfs_rmapbt_cmp_key_with_cur,
.buf_ops = &xfs_rmapbt_mem_buf_ops,
- .diff_two_keys = xfs_rmapbt_diff_two_keys,
+ .cmp_two_keys = xfs_rmapbt_cmp_two_keys,
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
.keys_contiguous = xfs_rmapbt_keys_contiguous,
diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c
index 3db5e7a4a945..ac11e94b42ae 100644
--- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c
+++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c
@@ -156,8 +156,8 @@ xfs_rtrefcountbt_init_ptr_from_cur(
ptr->l = 0;
}
-STATIC int64_t
-xfs_rtrefcountbt_key_diff(
+STATIC int
+xfs_rtrefcountbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
@@ -167,11 +167,11 @@ xfs_rtrefcountbt_key_diff(
start = xfs_refcount_encode_startblock(irec->rc_startblock,
irec->rc_domain);
- return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
+ return cmp_int(be32_to_cpu(kp->rc_startblock), start);
}
-STATIC int64_t
-xfs_rtrefcountbt_diff_two_keys(
+STATIC int
+xfs_rtrefcountbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -179,8 +179,8 @@ xfs_rtrefcountbt_diff_two_keys(
{
ASSERT(!mask || mask->refc.rc_startblock);
- return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
- be32_to_cpu(k2->refc.rc_startblock);
+ return cmp_int(be32_to_cpu(k1->refc.rc_startblock),
+ be32_to_cpu(k2->refc.rc_startblock));
}
static xfs_failaddr_t
@@ -387,9 +387,9 @@ const struct xfs_btree_ops xfs_rtrefcountbt_ops = {
.init_high_key_from_rec = xfs_rtrefcountbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rtrefcountbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_rtrefcountbt_init_ptr_from_cur,
- .key_diff = xfs_rtrefcountbt_key_diff,
+ .cmp_key_with_cur = xfs_rtrefcountbt_cmp_key_with_cur,
.buf_ops = &xfs_rtrefcountbt_buf_ops,
- .diff_two_keys = xfs_rtrefcountbt_diff_two_keys,
+ .cmp_two_keys = xfs_rtrefcountbt_cmp_two_keys,
.keys_inorder = xfs_rtrefcountbt_keys_inorder,
.recs_inorder = xfs_rtrefcountbt_recs_inorder,
.keys_contiguous = xfs_rtrefcountbt_keys_contiguous,
diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c
index 9bdc2cbfc113..55f903165769 100644
--- a/fs/xfs/libxfs/xfs_rtrmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c
@@ -185,38 +185,22 @@ static inline uint64_t offset_keymask(uint64_t offset)
return offset & ~XFS_RMAP_OFF_UNWRITTEN;
}
-STATIC int64_t
-xfs_rtrmapbt_key_diff(
+STATIC int
+xfs_rtrmapbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct xfs_rmap_irec *rec = &cur->bc_rec.r;
const struct xfs_rmap_key *kp = &key->rmap;
- __u64 x, y;
- int64_t d;
- d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
- if (d)
- return d;
-
- x = be64_to_cpu(kp->rm_owner);
- y = rec->rm_owner;
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
-
- x = offset_keymask(be64_to_cpu(kp->rm_offset));
- y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
- return 0;
+ return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?:
+ cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?:
+ cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)),
+ offset_keymask(xfs_rmap_irec_offset_pack(rec)));
}
-STATIC int64_t
-xfs_rtrmapbt_diff_two_keys(
+STATIC int
+xfs_rtrmapbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -224,36 +208,31 @@ xfs_rtrmapbt_diff_two_keys(
{
const struct xfs_rmap_key *kp1 = &k1->rmap;
const struct xfs_rmap_key *kp2 = &k2->rmap;
- int64_t d;
- __u64 x, y;
+ int d;
/* Doesn't make sense to mask off the physical space part */
ASSERT(!mask || mask->rmap.rm_startblock);
- d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
- be32_to_cpu(kp2->rm_startblock);
+ d = cmp_int(be32_to_cpu(kp1->rm_startblock),
+ be32_to_cpu(kp2->rm_startblock));
if (d)
return d;
if (!mask || mask->rmap.rm_owner) {
- x = be64_to_cpu(kp1->rm_owner);
- y = be64_to_cpu(kp2->rm_owner);
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
+ d = cmp_int(be64_to_cpu(kp1->rm_owner),
+ be64_to_cpu(kp2->rm_owner));
+ if (d)
+ return d;
}
if (!mask || mask->rmap.rm_offset) {
/* Doesn't make sense to allow offset but not owner */
ASSERT(!mask || mask->rmap.rm_owner);
- x = offset_keymask(be64_to_cpu(kp1->rm_offset));
- y = offset_keymask(be64_to_cpu(kp2->rm_offset));
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
+ d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)),
+ offset_keymask(be64_to_cpu(kp2->rm_offset)));
+ if (d)
+ return d;
}
return 0;
@@ -511,9 +490,9 @@ const struct xfs_btree_ops xfs_rtrmapbt_ops = {
.init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur,
- .key_diff = xfs_rtrmapbt_key_diff,
+ .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur,
.buf_ops = &xfs_rtrmapbt_buf_ops,
- .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
+ .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys,
.keys_inorder = xfs_rtrmapbt_keys_inorder,
.recs_inorder = xfs_rtrmapbt_recs_inorder,
.keys_contiguous = xfs_rtrmapbt_keys_contiguous,
@@ -620,9 +599,9 @@ const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
.init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
- .key_diff = xfs_rtrmapbt_key_diff,
+ .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur,
.buf_ops = &xfs_rtrmapbt_mem_buf_ops,
- .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
+ .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys,
.keys_inorder = xfs_rtrmapbt_keys_inorder,
.recs_inorder = xfs_rtrmapbt_recs_inorder,
.keys_contiguous = xfs_rtrmapbt_keys_contiguous,
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index fe678a0438bc..cd6f0ff382a7 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -306,7 +306,7 @@ xchk_btree_block_check_sibling(
if (pbp)
xchk_buffer_recheck(bs->sc, pbp);
- if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
+ if (xfs_btree_cmp_two_ptrs(cur, pp, sibling))
xchk_btree_set_corrupt(bs->sc, cur, level);
out:
xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 28ad341df8ee..2ef7742be7d3 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -866,11 +866,11 @@ xchk_trans_cancel(
sc->tp = NULL;
}
-int
+void
xchk_trans_alloc_empty(
struct xfs_scrub *sc)
{
- return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+ sc->tp = xfs_trans_alloc_empty(sc->mp);
}
/*
@@ -892,7 +892,8 @@ xchk_trans_alloc(
return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
resblks, 0, 0, &sc->tp);
- return xchk_trans_alloc_empty(sc);
+ xchk_trans_alloc_empty(sc);
+ return 0;
}
/* Set us up with a transaction and an empty context. */
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 19877d99f255..ddbc065c798c 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -7,7 +7,7 @@
#define __XFS_SCRUB_COMMON_H__
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
-int xchk_trans_alloc_empty(struct xfs_scrub *sc);
+void xchk_trans_alloc_empty(struct xfs_scrub *sc);
void xchk_trans_cancel(struct xfs_scrub *sc);
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 249313882108..8d3b550990b5 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1289,9 +1289,7 @@ xrep_dir_scan_dirtree(
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
bool flush;
@@ -1317,9 +1315,7 @@ xrep_dir_scan_dirtree(
if (error)
break;
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
if (xchk_should_terminate(sc, &error))
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 9b598c5790ad..cebd0d526926 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -237,7 +237,8 @@ xchk_setup_fscounters(
return error;
}
- return xchk_trans_alloc_empty(sc);
+ xchk_trans_alloc_empty(sc);
+ return 0;
}
/*
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index e21c16fbd15d..14939d7de349 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -318,9 +318,7 @@ xchk_metapath(
return 0;
}
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
error = xchk_metapath_ilock_both(mpath);
if (error)
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 4a47d0aabf73..26721fab5cab 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -555,9 +555,7 @@ xchk_nlinks_collect(
* do not take sb_internal.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
if (S_ISDIR(VFS_I(ip)->i_mode))
@@ -880,9 +878,7 @@ xchk_nlinks_compare(
* inactivation workqueue.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
/*
* Use the inobt to walk all allocated inodes to compare the link
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 4ebdee095428..6ef2ee9c3814 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -340,9 +340,7 @@ xrep_nlinks(
* We can only push the inactivation workqueues with an empty
* transaction.
*/
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
xchk_iscan_iter_finish(&xnc->compare_iscan);
xchk_iscan_teardown(&xnc->compare_iscan);
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
index 31bfe10be22a..2949feda6271 100644
--- a/fs/xfs/scrub/parent_repair.c
+++ b/fs/xfs/scrub/parent_repair.c
@@ -569,9 +569,7 @@ xrep_parent_scan_dirtree(
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) {
bool flush;
@@ -597,9 +595,7 @@ xrep_parent_scan_dirtree(
if (error)
break;
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
if (xchk_should_terminate(sc, &error))
@@ -1099,9 +1095,7 @@ xrep_parent_flush_xattrs(
xrep_tempfile_iounlock(rp->sc);
/* Recreate the empty transaction and relock the inode. */
- error = xchk_trans_alloc_empty(rp->sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(rp->sc);
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
index dc4033b91e44..e4105aaafe84 100644
--- a/fs/xfs/scrub/quotacheck.c
+++ b/fs/xfs/scrub/quotacheck.c
@@ -505,9 +505,7 @@ xqcheck_collect_counts(
* transactions do not take sb_internal.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) {
error = xqcheck_collect_inode(xqc, ip);
diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c
index 709356dc6256..9a4ef823c5a7 100644
--- a/fs/xfs/scrub/rcbag_btree.c
+++ b/fs/xfs/scrub/rcbag_btree.c
@@ -47,29 +47,20 @@ rcbagbt_init_rec_from_cur(
bag_rec->rbg_refcount = bag_irec->rbg_refcount;
}
-STATIC int64_t
-rcbagbt_key_diff(
+STATIC int
+rcbagbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec;
const struct rcbag_key *kp = (const struct rcbag_key *)key;
- if (kp->rbg_startblock > rec->rbg_startblock)
- return 1;
- if (kp->rbg_startblock < rec->rbg_startblock)
- return -1;
-
- if (kp->rbg_blockcount > rec->rbg_blockcount)
- return 1;
- if (kp->rbg_blockcount < rec->rbg_blockcount)
- return -1;
-
- return 0;
+ return cmp_int(kp->rbg_startblock, rec->rbg_startblock) ?:
+ cmp_int(kp->rbg_blockcount, rec->rbg_blockcount);
}
-STATIC int64_t
-rcbagbt_diff_two_keys(
+STATIC int
+rcbagbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -80,17 +71,8 @@ rcbagbt_diff_two_keys(
ASSERT(mask == NULL);
- if (kp1->rbg_startblock > kp2->rbg_startblock)
- return 1;
- if (kp1->rbg_startblock < kp2->rbg_startblock)
- return -1;
-
- if (kp1->rbg_blockcount > kp2->rbg_blockcount)
- return 1;
- if (kp1->rbg_blockcount < kp2->rbg_blockcount)
- return -1;
-
- return 0;
+ return cmp_int(kp1->rbg_startblock, kp2->rbg_startblock) ?:
+ cmp_int(kp1->rbg_blockcount, kp2->rbg_blockcount);
}
STATIC int
@@ -201,9 +183,9 @@ static const struct xfs_btree_ops rcbagbt_mem_ops = {
.init_key_from_rec = rcbagbt_init_key_from_rec,
.init_rec_from_cur = rcbagbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
- .key_diff = rcbagbt_key_diff,
+ .cmp_key_with_cur = rcbagbt_cmp_key_with_cur,
.buf_ops = &rcbagbt_mem_buf_ops,
- .diff_two_keys = rcbagbt_diff_two_keys,
+ .cmp_two_keys = rcbagbt_cmp_two_keys,
.keys_inorder = rcbagbt_keys_inorder,
.recs_inorder = rcbagbt_recs_inorder,
};
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index f8f9ed30f56b..d00c18954a26 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1269,42 +1269,6 @@ xrep_setup_xfbtree(
}
/*
- * Create a dummy transaction for use in a live update hook function. This
- * function MUST NOT be called from regular repair code because the current
- * process' transaction is saved via the cookie.
- */
-int
-xrep_trans_alloc_hook_dummy(
- struct xfs_mount *mp,
- void **cookiep,
- struct xfs_trans **tpp)
-{
- int error;
-
- *cookiep = current->journal_info;
- current->journal_info = NULL;
-
- error = xfs_trans_alloc_empty(mp, tpp);
- if (!error)
- return 0;
-
- current->journal_info = *cookiep;
- *cookiep = NULL;
- return error;
-}
-
-/* Cancel a dummy transaction used by a live update hook function. */
-void
-xrep_trans_cancel_hook_dummy(
- void **cookiep,
- struct xfs_trans *tp)
-{
- xfs_trans_cancel(tp);
- current->journal_info = *cookiep;
- *cookiep = NULL;
-}
-
-/*
* See if this buffer can pass the given ->verify_struct() function.
*
* If the buffer already has ops attached and they're not the ones that were
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index af0a3a9e5ed9..9c04295742c8 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -180,10 +180,6 @@ int xrep_quotacheck(struct xfs_scrub *sc);
int xrep_reinit_pagf(struct xfs_scrub *sc);
int xrep_reinit_pagi(struct xfs_scrub *sc);
-int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
- struct xfs_trans **tpp);
-void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
-
bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
void xrep_inode_set_nblocks(struct xfs_scrub *sc, int64_t new_blocks);
int xrep_reset_metafile_resv(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index f5f73078ffe2..17d4a38d735c 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -951,9 +951,7 @@ end_agscan:
sa->agf_bp = NULL;
sa->agi_bp = NULL;
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
/* Iterate all AGs for inodes rmaps. */
while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
@@ -1612,7 +1610,6 @@ xrep_rmapbt_live_update(
struct xfs_mount *mp;
struct xfs_btree_cur *mcur;
struct xfs_trans *tp;
- void *txcookie;
int error;
rr = container_of(nb, struct xrep_rmap, rhook.rmap_hook.nb);
@@ -1623,9 +1620,7 @@ xrep_rmapbt_live_update(
trace_xrep_rmap_live_update(pag_group(rr->sc->sa.pag), action, p);
- error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
- if (error)
- goto out_abort;
+ tp = xfs_trans_alloc_empty(mp);
mutex_lock(&rr->lock);
mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, &rr->rmap_btree);
@@ -1639,14 +1634,13 @@ xrep_rmapbt_live_update(
if (error)
goto out_cancel;
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
+ xfs_trans_cancel(tp);
mutex_unlock(&rr->lock);
return NOTIFY_DONE;
out_cancel:
xfbtree_trans_cancel(&rr->rmap_btree, tp);
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
-out_abort:
+ xfs_trans_cancel(tp);
mutex_unlock(&rr->lock);
xchk_iscan_abort(&rr->iscan);
out_unlock:
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
index fc2592c53af5..7561941a337a 100644
--- a/fs/xfs/scrub/rtrmap_repair.c
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -580,9 +580,7 @@ xrep_rtrmap_find_rmaps(
*/
xchk_trans_cancel(sc);
xchk_rtgroup_unlock(&sc->sr);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
error = xrep_rtrmap_scan_inode(rr, ip);
@@ -846,7 +844,6 @@ xrep_rtrmapbt_live_update(
struct xfs_mount *mp;
struct xfs_btree_cur *mcur;
struct xfs_trans *tp;
- void *txcookie;
int error;
rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
@@ -857,9 +854,7 @@ xrep_rtrmapbt_live_update(
trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
- error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
- if (error)
- goto out_abort;
+ tp = xfs_trans_alloc_empty(mp);
mutex_lock(&rr->lock);
mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
@@ -873,14 +868,13 @@ xrep_rtrmapbt_live_update(
if (error)
goto out_cancel;
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
+ xfs_trans_cancel(tp);
mutex_unlock(&rr->lock);
return NOTIFY_DONE;
out_cancel:
xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
-out_abort:
+ xfs_trans_cancel(tp);
xchk_iscan_abort(&rr->iscan);
mutex_unlock(&rr->lock);
out_unlock:
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 76e24032e99a..3c3b0d25006f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -876,10 +876,7 @@ xchk_scrubv_open_by_handle(
struct xfs_inode *ip;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return NULL;
-
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_iget(mp, tp, head->svh_ino, XCHK_IGET_FLAGS, 0, &ip);
xfs_trans_cancel(tp);
if (error)
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index d7c4ced47c15..1e6e9c10cea2 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -2996,7 +2996,7 @@ DEFINE_EVENT(xrep_pptr_salvage_class, name, \
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_salvage_pptr);
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_insert_pptr);
-TRACE_EVENT(xrep_xattr_class,
+DECLARE_EVENT_CLASS(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
TP_ARGS(ip, arg_ip),
TP_STRUCT__entry(
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index f683b7a9323f..5eef3bc30bda 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -91,41 +91,37 @@ xfs_attri_log_nameval_alloc(
name_len + new_name_len + value_len +
new_value_len);
- nv->name.i_addr = nv + 1;
- nv->name.i_len = name_len;
- nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME;
- memcpy(nv->name.i_addr, name, name_len);
+ nv->name.iov_base = nv + 1;
+ nv->name.iov_len = name_len;
+ memcpy(nv->name.iov_base, name, name_len);
if (new_name_len) {
- nv->new_name.i_addr = nv->name.i_addr + name_len;
- nv->new_name.i_len = new_name_len;
- memcpy(nv->new_name.i_addr, new_name, new_name_len);
+ nv->new_name.iov_base = nv->name.iov_base + name_len;
+ nv->new_name.iov_len = new_name_len;
+ memcpy(nv->new_name.iov_base, new_name, new_name_len);
} else {
- nv->new_name.i_addr = NULL;
- nv->new_name.i_len = 0;
+ nv->new_name.iov_base = NULL;
+ nv->new_name.iov_len = 0;
}
- nv->new_name.i_type = XLOG_REG_TYPE_ATTR_NEWNAME;
if (value_len) {
- nv->value.i_addr = nv->name.i_addr + name_len + new_name_len;
- nv->value.i_len = value_len;
- memcpy(nv->value.i_addr, value, value_len);
+ nv->value.iov_base = nv->name.iov_base + name_len + new_name_len;
+ nv->value.iov_len = value_len;
+ memcpy(nv->value.iov_base, value, value_len);
} else {
- nv->value.i_addr = NULL;
- nv->value.i_len = 0;
+ nv->value.iov_base = NULL;
+ nv->value.iov_len = 0;
}
- nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE;
if (new_value_len) {
- nv->new_value.i_addr = nv->name.i_addr + name_len +
+ nv->new_value.iov_base = nv->name.iov_base + name_len +
new_name_len + value_len;
- nv->new_value.i_len = new_value_len;
- memcpy(nv->new_value.i_addr, new_value, new_value_len);
+ nv->new_value.iov_len = new_value_len;
+ memcpy(nv->new_value.iov_base, new_value, new_value_len);
} else {
- nv->new_value.i_addr = NULL;
- nv->new_value.i_len = 0;
+ nv->new_value.iov_base = NULL;
+ nv->new_value.iov_len = 0;
}
- nv->new_value.i_type = XLOG_REG_TYPE_ATTR_NEWVALUE;
refcount_set(&nv->refcount, 1);
return nv;
@@ -170,21 +166,21 @@ xfs_attri_item_size(
*nvecs += 2;
*nbytes += sizeof(struct xfs_attri_log_format) +
- xlog_calc_iovec_len(nv->name.i_len);
+ xlog_calc_iovec_len(nv->name.iov_len);
- if (nv->new_name.i_len) {
+ if (nv->new_name.iov_len) {
*nvecs += 1;
- *nbytes += xlog_calc_iovec_len(nv->new_name.i_len);
+ *nbytes += xlog_calc_iovec_len(nv->new_name.iov_len);
}
- if (nv->value.i_len) {
+ if (nv->value.iov_len) {
*nvecs += 1;
- *nbytes += xlog_calc_iovec_len(nv->value.i_len);
+ *nbytes += xlog_calc_iovec_len(nv->value.iov_len);
}
- if (nv->new_value.i_len) {
+ if (nv->new_value.iov_len) {
*nvecs += 1;
- *nbytes += xlog_calc_iovec_len(nv->new_value.i_len);
+ *nbytes += xlog_calc_iovec_len(nv->new_value.iov_len);
}
}
@@ -212,31 +208,36 @@ xfs_attri_item_format(
* the log recovery.
*/
- ASSERT(nv->name.i_len > 0);
+ ASSERT(nv->name.iov_len > 0);
attrip->attri_format.alfi_size++;
- if (nv->new_name.i_len > 0)
+ if (nv->new_name.iov_len > 0)
attrip->attri_format.alfi_size++;
- if (nv->value.i_len > 0)
+ if (nv->value.iov_len > 0)
attrip->attri_format.alfi_size++;
- if (nv->new_value.i_len > 0)
+ if (nv->new_value.iov_len > 0)
attrip->attri_format.alfi_size++;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
&attrip->attri_format,
sizeof(struct xfs_attri_log_format));
- xlog_copy_from_iovec(lv, &vecp, &nv->name);
- if (nv->new_name.i_len > 0)
- xlog_copy_from_iovec(lv, &vecp, &nv->new_name);
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base,
+ nv->name.iov_len);
- if (nv->value.i_len > 0)
- xlog_copy_from_iovec(lv, &vecp, &nv->value);
+ if (nv->new_name.iov_len > 0)
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWNAME,
+ nv->new_name.iov_base, nv->new_name.iov_len);
- if (nv->new_value.i_len > 0)
- xlog_copy_from_iovec(lv, &vecp, &nv->new_value);
+ if (nv->value.iov_len > 0)
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
+ nv->value.iov_base, nv->value.iov_len);
+
+ if (nv->new_value.iov_len > 0)
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWVALUE,
+ nv->new_value.iov_base, nv->new_value.iov_len);
}
/*
@@ -383,22 +384,22 @@ xfs_attr_log_item(
attrp->alfi_ino = args->dp->i_ino;
ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK));
attrp->alfi_op_flags = attr->xattri_op_flags;
- attrp->alfi_value_len = nv->value.i_len;
+ attrp->alfi_value_len = nv->value.iov_len;
switch (xfs_attr_log_item_op(attrp)) {
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
- ASSERT(nv->value.i_len == nv->new_value.i_len);
+ ASSERT(nv->value.iov_len == nv->new_value.iov_len);
attrp->alfi_igen = VFS_I(args->dp)->i_generation;
- attrp->alfi_old_name_len = nv->name.i_len;
- attrp->alfi_new_name_len = nv->new_name.i_len;
+ attrp->alfi_old_name_len = nv->name.iov_len;
+ attrp->alfi_new_name_len = nv->new_name.iov_len;
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
attrp->alfi_igen = VFS_I(args->dp)->i_generation;
fallthrough;
default:
- attrp->alfi_name_len = nv->name.i_len;
+ attrp->alfi_name_len = nv->name.iov_len;
break;
}
@@ -616,10 +617,7 @@ xfs_attri_iread_extents(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(ip->i_mount);
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -690,14 +688,14 @@ xfs_attri_recover_work(
args->dp = ip;
args->geo = mp->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
- args->name = nv->name.i_addr;
- args->namelen = nv->name.i_len;
- args->new_name = nv->new_name.i_addr;
- args->new_namelen = nv->new_name.i_len;
- args->value = nv->value.i_addr;
- args->valuelen = nv->value.i_len;
- args->new_value = nv->new_value.i_addr;
- args->new_valuelen = nv->new_value.i_len;
+ args->name = nv->name.iov_base;
+ args->namelen = nv->name.iov_len;
+ args->new_name = nv->new_name.iov_base;
+ args->new_namelen = nv->new_name.iov_len;
+ args->value = nv->value.iov_base;
+ args->valuelen = nv->value.iov_len;
+ args->new_value = nv->new_value.iov_base;
+ args->new_valuelen = nv->new_value.iov_len;
args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK;
args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
XFS_DA_OP_LOGGED;
@@ -754,8 +752,8 @@ xfs_attr_recover_work(
*/
attrp = &attrip->attri_format;
if (!xfs_attri_validate(mp, attrp) ||
- !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.i_addr,
- nv->name.i_len))
+ !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.iov_base,
+ nv->name.iov_len))
return -EFSCORRUPTED;
attr = xfs_attri_recover_work(mp, dfp, attrp, &ip, nv);
@@ -953,50 +951,50 @@ static inline void *
xfs_attri_validate_name_iovec(
struct xfs_mount *mp,
struct xfs_attri_log_format *attri_formatp,
- const struct xfs_log_iovec *iovec,
+ const struct kvec *iovec,
unsigned int name_len)
{
- if (iovec->i_len != xlog_calc_iovec_len(name_len)) {
+ if (iovec->iov_len != xlog_calc_iovec_len(name_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, sizeof(*attri_formatp));
return NULL;
}
- if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->i_addr,
+ if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->iov_base,
name_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, sizeof(*attri_formatp));
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- iovec->i_addr, iovec->i_len);
+ iovec->iov_base, iovec->iov_len);
return NULL;
}
- return iovec->i_addr;
+ return iovec->iov_base;
}
static inline void *
xfs_attri_validate_value_iovec(
struct xfs_mount *mp,
struct xfs_attri_log_format *attri_formatp,
- const struct xfs_log_iovec *iovec,
+ const struct kvec *iovec,
unsigned int value_len)
{
- if (iovec->i_len != xlog_calc_iovec_len(value_len)) {
+ if (iovec->iov_len != xlog_calc_iovec_len(value_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, sizeof(*attri_formatp));
return NULL;
}
if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) &&
- !xfs_parent_valuecheck(mp, iovec->i_addr, value_len)) {
+ !xfs_parent_valuecheck(mp, iovec->iov_base, value_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, sizeof(*attri_formatp));
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- iovec->i_addr, iovec->i_len);
+ iovec->iov_base, iovec->iov_len);
return NULL;
}
- return iovec->i_addr;
+ return iovec->iov_base;
}
STATIC int
@@ -1023,13 +1021,13 @@ xlog_recover_attri_commit_pass2(
/* Validate xfs_attri_log_format before the large memory allocation */
len = sizeof(struct xfs_attri_log_format);
- if (item->ri_buf[i].i_len != len) {
+ if (item->ri_buf[i].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
- attri_formatp = item->ri_buf[i].i_addr;
+ attri_formatp = item->ri_buf[i].iov_base;
if (!xfs_attri_validate(mp, attri_formatp)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
@@ -1218,10 +1216,10 @@ xlog_recover_attrd_commit_pass2(
{
struct xfs_attrd_log_format *attrd_formatp;
- attrd_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) {
+ attrd_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_attrd_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h
index e74128cbb722..d108a11b55ae 100644
--- a/fs/xfs/xfs_attr_item.h
+++ b/fs/xfs/xfs_attr_item.h
@@ -12,10 +12,10 @@ struct xfs_mount;
struct kmem_zone;
struct xfs_attri_log_nameval {
- struct xfs_log_iovec name;
- struct xfs_log_iovec new_name; /* PPTR_REPLACE only */
- struct xfs_log_iovec value;
- struct xfs_log_iovec new_value; /* PPTR_REPLACE only */
+ struct kvec name;
+ struct kvec new_name; /* PPTR_REPLACE only */
+ struct kvec value;
+ struct kvec new_value; /* PPTR_REPLACE only */
refcount_t refcount;
/* name and value follow the end of this struct */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 646c515ee355..80f0c4bcc483 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -654,24 +654,24 @@ xlog_recover_bui_commit_pass2(
struct xfs_bui_log_format *bui_formatp;
size_t len;
- bui_formatp = item->ri_buf[0].i_addr;
+ bui_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_bui_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -705,10 +705,10 @@ xlog_recover_bud_commit_pass2(
{
struct xfs_bud_log_format *bud_formatp;
- bud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
+ bud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_bud_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8af83bd161f9..f9ef3b2a332a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1683,7 +1683,7 @@ xfs_free_buftarg(
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- bdev_fput(btp->bt_bdev_file);
+ bdev_fput(btp->bt_file);
kfree(btp);
}
@@ -1712,8 +1712,8 @@ xfs_configure_buftarg_atomic_writes(
max_bytes = 0;
}
- btp->bt_bdev_awu_min = min_bytes;
- btp->bt_bdev_awu_max = max_bytes;
+ btp->bt_awu_min = min_bytes;
+ btp->bt_awu_max = max_bytes;
}
/* Configure a buffer target that abstracts a block device. */
@@ -1738,14 +1738,9 @@ xfs_configure_buftarg(
return -EINVAL;
}
- /*
- * Flush the block device pagecache so our bios see anything dirtied
- * before mount.
- */
if (bdev_can_atomic_write(btp->bt_bdev))
xfs_configure_buftarg_atomic_writes(btp);
-
- return sync_blockdev(btp->bt_bdev);
+ return 0;
}
int
@@ -1803,7 +1798,7 @@ xfs_alloc_buftarg(
btp = kzalloc(sizeof(*btp), GFP_KERNEL | __GFP_NOFAIL);
btp->bt_mount = mp;
- btp->bt_bdev_file = bdev_file;
+ btp->bt_file = bdev_file;
btp->bt_bdev = file_bdev(bdev_file);
btp->bt_dev = btp->bt_bdev->bd_dev;
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
@@ -2082,44 +2077,6 @@ xfs_buf_delwri_submit(
return error;
}
-/*
- * Push a single buffer on a delwri queue.
- *
- * The purpose of this function is to submit a single buffer of a delwri queue
- * and return with the buffer still on the original queue.
- *
- * The buffer locking and queue management logic between _delwri_pushbuf() and
- * _delwri_queue() guarantee that the buffer cannot be queued to another list
- * before returning.
- */
-int
-xfs_buf_delwri_pushbuf(
- struct xfs_buf *bp,
- struct list_head *buffer_list)
-{
- int error;
-
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
- trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
-
- xfs_buf_lock(bp);
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
- bp->b_flags |= XBF_WRITE;
- xfs_buf_submit(bp);
-
- /*
- * The buffer is now locked, under I/O but still on the original delwri
- * queue. Wait for I/O completion, restore the DELWRI_Q flag and
- * return with the buffer unlocked and still on the original queue.
- */
- error = xfs_buf_iowait(bp);
- bp->b_flags |= _XBF_DELWRI_Q;
- xfs_buf_unlock(bp);
-
- return error;
-}
-
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
/*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9d2ab567cf81..b269e115d9ac 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -94,7 +94,6 @@ void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
*/
struct xfs_buftarg {
dev_t bt_dev;
- struct file *bt_bdev_file;
struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
struct file *bt_file;
@@ -112,9 +111,9 @@ struct xfs_buftarg {
struct percpu_counter bt_readahead_count;
struct ratelimit_state bt_ioerror_rl;
- /* Atomic write unit values, bytes */
- unsigned int bt_bdev_awu_min;
- unsigned int bt_bdev_awu_max;
+ /* Hardware atomic write unit values, bytes */
+ unsigned int bt_awu_min;
+ unsigned int bt_awu_max;
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
@@ -326,7 +325,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
-extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
{
@@ -376,7 +374,6 @@ extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
-#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 90139e0f3271..8d85b5eee444 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -32,19 +32,74 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
+static void
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->__bli_format;
+ return;
+ }
+
+ bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
+ GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->__bli_format) {
+ kfree(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
+
+static void
+xfs_buf_item_free(
+ struct xfs_buf_log_item *bip)
+{
+ xfs_buf_item_free_format(bip);
+ kvfree(bip->bli_item.li_lv_shadow);
+ kmem_cache_free(xfs_buf_item_cache, bip);
+}
+
+/*
+ * xfs_buf_item_relse() is called when the buf log item is no longer needed.
+ */
+static void
+xfs_buf_item_relse(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+
+ trace_xfs_buf_item_relse(bp, _RET_IP_);
+
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+ ASSERT(atomic_read(&bip->bli_refcount) == 0);
+
+ bp->b_log_item = NULL;
+ xfs_buf_rele(bp);
+ xfs_buf_item_free(bip);
+}
+
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
- struct xfs_log_iovec *iovec)
+ struct kvec *iovec)
{
- struct xfs_buf_log_format *blfp = iovec->i_addr;
+ struct xfs_buf_log_format *blfp = iovec->iov_base;
char *bmp_end;
char *item_end;
- if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len)
+ if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->iov_len)
return false;
- item_end = (char *)iovec->i_addr + iovec->i_len;
+ item_end = (char *)iovec->iov_base + iovec->iov_len;
bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size];
return bmp_end <= item_end;
}
@@ -390,6 +445,42 @@ xfs_buf_item_pin(
}
/*
+ * For a stale BLI, process all the necessary completions that must be
+ * performed when the final BLI reference goes away. The buffer will be
+ * referenced and locked here - we return to the caller with the buffer still
+ * referenced and locked for them to finalise processing of the buffer.
+ */
+static void
+xfs_buf_item_finish_stale(
+ struct xfs_buf_log_item *bip)
+{
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_log_item *lip = &bip->bli_item;
+
+ ASSERT(bip->bli_flags & XFS_BLI_STALE);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_flags & XBF_STALE);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(list_empty(&lip->li_trans));
+ ASSERT(!bp->b_transp);
+
+ if (bip->bli_flags & XFS_BLI_STALE_INODE) {
+ xfs_buf_item_done(bp);
+ xfs_buf_inode_iodone(bp);
+ ASSERT(list_empty(&bp->b_li_list));
+ return;
+ }
+
+ /*
+ * We may or may not be on the AIL here, xfs_trans_ail_delete() will do
+ * the right thing regardless of the situation in which we are called.
+ */
+ xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
+ xfs_buf_item_relse(bip);
+ ASSERT(bp->b_log_item == NULL);
+}
+
+/*
* This is called to unpin the buffer associated with the buf log item which was
* previously pinned with a call to xfs_buf_item_pin(). We enter this function
* with a buffer pin count, a buffer reference and a BLI reference.
@@ -438,13 +529,6 @@ xfs_buf_item_unpin(
}
if (stale) {
- ASSERT(bip->bli_flags & XFS_BLI_STALE);
- ASSERT(xfs_buf_islocked(bp));
- ASSERT(bp->b_flags & XBF_STALE);
- ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
- ASSERT(list_empty(&lip->li_trans));
- ASSERT(!bp->b_transp);
-
trace_xfs_buf_item_unpin_stale(bip);
/*
@@ -455,22 +539,7 @@ xfs_buf_item_unpin(
* processing is complete.
*/
xfs_buf_rele(bp);
-
- /*
- * If we get called here because of an IO error, we may or may
- * not have the item on the AIL. xfs_trans_ail_delete() will
- * take care of that situation. xfs_trans_ail_delete() drops
- * the AIL lock.
- */
- if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_item_done(bp);
- xfs_buf_inode_iodone(bp);
- ASSERT(list_empty(&bp->b_li_list));
- } else {
- xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
- xfs_buf_item_relse(bp);
- ASSERT(bp->b_log_item == NULL);
- }
+ xfs_buf_item_finish_stale(bip);
xfs_buf_relse(bp);
return;
}
@@ -543,43 +612,42 @@ xfs_buf_item_push(
* Drop the buffer log item refcount and take appropriate action. This helper
* determines whether the bli must be freed or not, since a decrement to zero
* does not necessarily mean the bli is unused.
- *
- * Return true if the bli is freed, false otherwise.
*/
-bool
+void
xfs_buf_item_put(
struct xfs_buf_log_item *bip)
{
- struct xfs_log_item *lip = &bip->bli_item;
- bool aborted;
- bool dirty;
+
+ ASSERT(xfs_buf_islocked(bip->bli_buf));
/* drop the bli ref and return if it wasn't the last one */
if (!atomic_dec_and_test(&bip->bli_refcount))
- return false;
+ return;
- /*
- * We dropped the last ref and must free the item if clean or aborted.
- * If the bli is dirty and non-aborted, the buffer was clean in the
- * transaction but still awaiting writeback from previous changes. In
- * that case, the bli is freed on buffer writeback completion.
- */
- aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
- xlog_is_shutdown(lip->li_log);
- dirty = bip->bli_flags & XFS_BLI_DIRTY;
- if (dirty && !aborted)
- return false;
+ /* If the BLI is in the AIL, then it is still dirty and in use */
+ if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
+ ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
+ return;
+ }
/*
- * The bli is aborted or clean. An aborted item may be in the AIL
- * regardless of dirty state. For example, consider an aborted
- * transaction that invalidated a dirty bli and cleared the dirty
- * state.
+ * In shutdown conditions, we can be asked to free a dirty BLI that
+ * isn't in the AIL. This can occur due to a checkpoint aborting a BLI
+ * instead of inserting it into the AIL at checkpoint IO completion. If
+ * there's another bli reference (e.g. a btree cursor holds a clean
+ * reference) and it is released via xfs_trans_brelse(), we can get here
+ * with that aborted, dirty BLI. In this case, it is safe to free the
+ * dirty BLI immediately, as it is not in the AIL and there are no
+ * other references to it.
+ *
+ * We should never get here with a stale BLI via that path as
+ * xfs_trans_brelse() specifically holds onto stale buffers rather than
+ * releasing them.
*/
- if (aborted)
- xfs_trans_ail_delete(lip, 0);
- xfs_buf_item_relse(bip->bli_buf);
- return true;
+ ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
+ test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
+ ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+ xfs_buf_item_relse(bip);
}
/*
@@ -600,6 +668,15 @@ xfs_buf_item_put(
* if necessary but do not unlock the buffer. This is for support of
* xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
* free the item.
+ *
+ * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
+ * perform a completion abort of any objects attached to the buffer for IO
+ * tracking purposes. This generally only happens in shutdown situations,
+ * normally xfs_buf_item_unpin() will drop the last BLI reference and perform
+ * completion processing. However, because transaction completion can race with
+ * checkpoint completion during a shutdown, this release context may end up
+ * being the last active reference to the BLI and so needs to perform this
+ * cleanup.
*/
STATIC void
xfs_buf_item_release(
@@ -607,18 +684,19 @@ xfs_buf_item_release(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- bool released;
bool hold = bip->bli_flags & XFS_BLI_HOLD;
bool stale = bip->bli_flags & XFS_BLI_STALE;
-#if defined(DEBUG) || defined(XFS_WARN)
- bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
- bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
bool aborted = test_bit(XFS_LI_ABORTED,
&lip->li_flags);
+ bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
+#if defined(DEBUG) || defined(XFS_WARN)
+ bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
#endif
trace_xfs_buf_item_release(bip);
+ ASSERT(xfs_buf_islocked(bp));
+
/*
* The bli dirty state should match whether the blf has logged segments
* except for ordered buffers, where only the bli should be dirty.
@@ -634,16 +712,56 @@ xfs_buf_item_release(
bp->b_transp = NULL;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
+ /* If there are other references, then we have nothing to do. */
+ if (!atomic_dec_and_test(&bip->bli_refcount))
+ goto out_release;
+
+ /*
+ * Stale buffer completion frees the BLI, unlocks and releases the
+ * buffer. Neither the BLI or buffer are safe to reference after this
+ * call, so there's nothing more we need to do here.
+ *
+ * If we get here with a stale buffer and references to the BLI remain,
+ * we must not unlock the buffer as the last BLI reference owns lock
+ * context, not us.
+ */
+ if (stale) {
+ xfs_buf_item_finish_stale(bip);
+ xfs_buf_relse(bp);
+ ASSERT(!hold);
+ return;
+ }
+
/*
- * Unref the item and unlock the buffer unless held or stale. Stale
- * buffers remain locked until final unpin unless the bli is freed by
- * the unref call. The latter implies shutdown because buffer
- * invalidation dirties the bli and transaction.
+ * Dirty or clean, aborted items are done and need to be removed from
+ * the AIL and released. This frees the BLI, but leaves the buffer
+ * locked and referenced.
*/
- released = xfs_buf_item_put(bip);
- if (hold || (stale && !released))
+ if (aborted || xlog_is_shutdown(lip->li_log)) {
+ ASSERT(list_empty(&bip->bli_buf->b_li_list));
+ xfs_buf_item_done(bp);
+ goto out_release;
+ }
+
+ /*
+ * Clean, unreferenced BLIs can be immediately freed, leaving the buffer
+ * locked and referenced.
+ *
+ * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback.
+ */
+ if (!dirty)
+ xfs_buf_item_relse(bip);
+ else
+ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
+
+ /* Not safe to reference the BLI from here */
+out_release:
+ /*
+ * If we get here with a stale buffer, we must not unlock the
+ * buffer as the last BLI reference owns lock context, not us.
+ */
+ if (stale || hold)
return;
- ASSERT(!stale || aborted);
xfs_buf_relse(bp);
}
@@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_push = xfs_buf_item_push,
};
-STATIC void
-xfs_buf_item_get_format(
- struct xfs_buf_log_item *bip,
- int count)
-{
- ASSERT(bip->bli_formats == NULL);
- bip->bli_format_count = count;
-
- if (count == 1) {
- bip->bli_formats = &bip->__bli_format;
- return;
- }
-
- bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
- GFP_KERNEL | __GFP_NOFAIL);
-}
-
-STATIC void
-xfs_buf_item_free_format(
- struct xfs_buf_log_item *bip)
-{
- if (bip->bli_formats != &bip->__bli_format) {
- kfree(bip->bli_formats);
- bip->bli_formats = NULL;
- }
-}
-
/*
* Allocate a new buf log item to go with the given buffer.
* Set the buffer's b_log_item field to point to the new
@@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format(
return false;
}
-STATIC void
-xfs_buf_item_free(
- struct xfs_buf_log_item *bip)
-{
- xfs_buf_item_free_format(bip);
- kvfree(bip->bli_item.li_lv_shadow);
- kmem_cache_free(xfs_buf_item_cache, bip);
-}
-
-/*
- * xfs_buf_item_relse() is called when the buf log item is no longer needed.
- */
-void
-xfs_buf_item_relse(
- struct xfs_buf *bp)
-{
- struct xfs_buf_log_item *bip = bp->b_log_item;
-
- trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
-
- if (atomic_read(&bip->bli_refcount))
- return;
- bp->b_log_item = NULL;
- xfs_buf_rele(bp);
- xfs_buf_item_free(bip);
-}
-
void
xfs_buf_item_done(
struct xfs_buf *bp)
@@ -1023,5 +1086,5 @@ xfs_buf_item_done(
xfs_trans_ail_delete(&bp->b_log_item->bli_item,
(bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
SHUTDOWN_CORRUPT_INCORE);
- xfs_buf_item_relse(bp);
+ xfs_buf_item_relse(bp->b_log_item);
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index e10e324cd245..3159325dd17b 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -49,8 +49,7 @@ struct xfs_buf_log_item {
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_done(struct xfs_buf *bp);
-void xfs_buf_item_relse(struct xfs_buf *);
-bool xfs_buf_item_put(struct xfs_buf_log_item *);
+void xfs_buf_item_put(struct xfs_buf_log_item *bip);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_inode_iodone(struct xfs_buf *);
@@ -62,7 +61,7 @@ static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp)
}
#endif /* CONFIG_XFS_QUOTA */
void xfs_buf_iodone(struct xfs_buf *);
-bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
+bool xfs_buf_log_check_iovec(struct kvec *iovec);
unsigned int xfs_buf_inval_log_space(unsigned int map_count,
unsigned int blocksize);
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index d4c5cef5bc43..5d58e2ae4972 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -159,7 +159,7 @@ STATIC enum xlog_recover_reorder
xlog_recover_buf_reorder(
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
if (buf_f->blf_flags & XFS_BLF_CANCEL)
return XLOG_REORDER_CANCEL_LIST;
@@ -173,7 +173,7 @@ xlog_recover_buf_ra_pass2(
struct xlog *log,
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
}
@@ -187,11 +187,11 @@ xlog_recover_buf_commit_pass1(
struct xlog *log,
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *bf = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *bf = item->ri_buf[0].iov_base;
if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
- xfs_err(log->l_mp, "bad buffer log item size (%d)",
- item->ri_buf[0].i_len);
+ xfs_err(log->l_mp, "bad buffer log item size (%zd)",
+ item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -487,8 +487,8 @@ xlog_recover_do_reg_buffer(
nbits = xfs_contig_bits(buf_f->blf_data_map,
buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
- ASSERT(item->ri_buf[i].i_addr != NULL);
- ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
+ ASSERT(item->ri_buf[i].iov_base != NULL);
+ ASSERT(item->ri_buf[i].iov_len % XFS_BLF_CHUNK == 0);
ASSERT(BBTOB(bp->b_length) >=
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
@@ -500,8 +500,8 @@ xlog_recover_do_reg_buffer(
* the log. Hence we need to trim nbits back to the length of
* the current region being copied out of the log.
*/
- if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
- nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+ if (item->ri_buf[i].iov_len < (nbits << XFS_BLF_SHIFT))
+ nbits = item->ri_buf[i].iov_len >> XFS_BLF_SHIFT;
/*
* Do a sanity check if this is a dquot buffer. Just checking
@@ -511,18 +511,18 @@ xlog_recover_do_reg_buffer(
fa = NULL;
if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- if (item->ri_buf[i].i_addr == NULL) {
+ if (item->ri_buf[i].iov_base == NULL) {
xfs_alert(mp,
"XFS: NULL dquot in %s.", __func__);
goto next;
}
- if (item->ri_buf[i].i_len < size_disk_dquot) {
+ if (item->ri_buf[i].iov_len < size_disk_dquot) {
xfs_alert(mp,
- "XFS: dquot too small (%d) in %s.",
- item->ri_buf[i].i_len, __func__);
+ "XFS: dquot too small (%zd) in %s.",
+ item->ri_buf[i].iov_len, __func__);
goto next;
}
- fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, -1);
+ fa = xfs_dquot_verify(mp, item->ri_buf[i].iov_base, -1);
if (fa) {
xfs_alert(mp,
"dquot corrupt at %pS trying to replay into block 0x%llx",
@@ -533,7 +533,7 @@ xlog_recover_do_reg_buffer(
memcpy(xfs_buf_offset(bp,
(uint)bit << XFS_BLF_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
+ item->ri_buf[i].iov_base, /* source */
nbits<<XFS_BLF_SHIFT); /* length */
next:
i++;
@@ -669,8 +669,8 @@ xlog_recover_do_inode_buffer(
if (next_unlinked_offset < reg_buf_offset)
continue;
- ASSERT(item->ri_buf[item_index].i_addr != NULL);
- ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
+ ASSERT(item->ri_buf[item_index].iov_base != NULL);
+ ASSERT((item->ri_buf[item_index].iov_len % XFS_BLF_CHUNK) == 0);
ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
/*
@@ -678,7 +678,7 @@ xlog_recover_do_inode_buffer(
* current di_next_unlinked field. Extract its value
* and copy it to the buffer copy.
*/
- logged_nextp = item->ri_buf[item_index].i_addr +
+ logged_nextp = item->ri_buf[item_index].iov_base +
next_unlinked_offset - reg_buf_offset;
if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
xfs_alert(mp,
@@ -1002,7 +1002,7 @@ xlog_recover_buf_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t current_lsn)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
struct xfs_mount *mp = log->l_mp;
struct xfs_buf *bp;
int error;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 94d0873bcd62..ee49f20875af 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -103,24 +103,6 @@ xfs_discard_endio(
bio_put(bio);
}
-static inline struct block_device *
-xfs_group_bdev(
- const struct xfs_group *xg)
-{
- struct xfs_mount *mp = xg->xg_mount;
-
- switch (xg->xg_type) {
- case XG_TYPE_AG:
- return mp->m_ddev_targp->bt_bdev;
- case XG_TYPE_RTG:
- return mp->m_rtdev_targp->bt_bdev;
- default:
- ASSERT(0);
- break;
- }
- return NULL;
-}
-
/*
* Walk the discard list and issue discards on all the busy extents in the
* list. We plug and chain the bios so that we only need a single completion
@@ -138,11 +120,14 @@ xfs_discard_extents(
blk_start_plug(&plug);
list_for_each_entry(busyp, &extents->extent_list, list) {
- trace_xfs_discard_extent(busyp->group, busyp->bno,
- busyp->length);
+ struct xfs_group *xg = busyp->group;
+ struct xfs_buftarg *btp =
+ xfs_group_type_buftarg(xg->xg_mount, xg->xg_type);
- error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
- xfs_gbno_to_daddr(busyp->group, busyp->bno),
+ trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
+
+ error = __blkdev_issue_discard(btp->bt_bdev,
+ xfs_gbno_to_daddr(xg, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
if (error && error != -EOPNOTSUPP) {
@@ -204,9 +189,7 @@ xfs_trim_gather_extents(
*/
xfs_log_force(mp, XFS_LOG_SYNC);
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
if (error)
@@ -598,9 +581,7 @@ xfs_trim_rtextents(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(mp);
/*
* Walk the free ranges between low and high. The query_range function
@@ -716,9 +697,7 @@ xfs_trim_rtgroup_extents(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(mp);
/*
* Walk the free ranges between low and high. The query_range function
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4e32f0860b7..0bd8022e47b4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1398,11 +1398,9 @@ xfs_qm_dqflush(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
trace_xfs_dqflush(dqp);
-
- xfs_qm_dqunpin_wait(dqp);
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
index 2c2720ce6923..89bc9bcaf51e 100644
--- a/fs/xfs/xfs_dquot_item_recover.c
+++ b/fs/xfs/xfs_dquot_item_recover.c
@@ -34,10 +34,10 @@ xlog_recover_dquot_ra_pass2(
if (mp->m_qflags == 0)
return;
- recddq = item->ri_buf[1].i_addr;
+ recddq = item->ri_buf[1].iov_base;
if (recddq == NULL)
return;
- if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
+ if (item->ri_buf[1].iov_len < sizeof(struct xfs_disk_dquot))
return;
type = recddq->d_type & XFS_DQTYPE_REC_MASK;
@@ -45,7 +45,7 @@ xlog_recover_dquot_ra_pass2(
if (log->l_quotaoffs_flag & type)
return;
- dq_f = item->ri_buf[0].i_addr;
+ dq_f = item->ri_buf[0].iov_base;
ASSERT(dq_f);
ASSERT(dq_f->qlf_len == 1);
@@ -79,14 +79,14 @@ xlog_recover_dquot_commit_pass2(
if (mp->m_qflags == 0)
return 0;
- recddq = item->ri_buf[1].i_addr;
+ recddq = item->ri_buf[1].iov_base;
if (recddq == NULL) {
xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
return -EFSCORRUPTED;
}
- if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
- xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
- item->ri_buf[1].i_len, __func__);
+ if (item->ri_buf[1].iov_len < sizeof(struct xfs_disk_dquot)) {
+ xfs_alert(log->l_mp, "dquot too small (%zd) in %s.",
+ item->ri_buf[1].iov_len, __func__);
return -EFSCORRUPTED;
}
@@ -108,7 +108,7 @@ xlog_recover_dquot_commit_pass2(
* The other possibility, of course, is that the quota subsystem was
* removed since the last mount - ENOSYS.
*/
- dq_f = item->ri_buf[0].i_addr;
+ dq_f = item->ri_buf[0].iov_base;
ASSERT(dq_f);
fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id);
if (fa) {
@@ -147,7 +147,7 @@ xlog_recover_dquot_commit_pass2(
}
}
- memcpy(ddq, recddq, item->ri_buf[1].i_len);
+ memcpy(ddq, recddq, item->ri_buf[1].iov_len);
if (xfs_has_crc(mp)) {
xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
@@ -192,7 +192,7 @@ xlog_recover_quotaoff_commit_pass1(
struct xlog *log,
struct xlog_recover_item *item)
{
- struct xfs_qoff_logformat *qoff_f = item->ri_buf[0].i_addr;
+ struct xfs_qoff_logformat *qoff_f = item->ri_buf[0].iov_base;
ASSERT(qoff_f);
/*
diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c
index 264a121c5e16..229cbe0adf17 100644
--- a/fs/xfs/xfs_exchmaps_item.c
+++ b/fs/xfs/xfs_exchmaps_item.c
@@ -558,12 +558,12 @@ xlog_recover_xmi_commit_pass2(
size_t len;
len = sizeof(struct xfs_xmi_log_format);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
}
- xmi_formatp = item->ri_buf[0].i_addr;
+ xmi_formatp = item->ri_buf[0].iov_base;
if (xmi_formatp->__pad != 0) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
@@ -598,8 +598,8 @@ xlog_recover_xmd_commit_pass2(
{
struct xfs_xmd_log_format *xmd_formatp;
- xmd_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) {
+ xmd_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_xmd_log_format)) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index f069b04e8ea1..3e6e019b6146 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -68,4 +68,12 @@ static inline void xfs_extent_busy_sort(struct list_head *list)
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
}
+/*
+ * Zoned RTGs don't need to track busy extents, as the actual block freeing only
+ * happens by a zone reset, which forces out all transactions that touched the
+ * to be reset zone first.
+ */
+#define xfs_group_has_extent_busy(mp, type) \
+ ((type) == XG_TYPE_AG || !xfs_has_zoned((mp)))
+
#endif /* __XFS_EXTENT_BUSY_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d574f5f639fa..47ee598a9827 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -182,15 +182,18 @@ xfs_efi_init(
* It will handle the conversion of formats if necessary.
*/
STATIC int
-xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
+xfs_efi_copy_format(
+ struct kvec *buf,
+ struct xfs_efi_log_format *dst_efi_fmt)
{
- xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
- uint i;
- uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents);
- uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents);
- uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents);
+ struct xfs_efi_log_format *src_efi_fmt = buf->iov_base;
+ uint len, len32, len64, i;
- if (buf->i_len == len) {
+ len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents);
+ len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents);
+ len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents);
+
+ if (buf->iov_len == len) {
memcpy(dst_efi_fmt, src_efi_fmt,
offsetof(struct xfs_efi_log_format, efi_extents));
for (i = 0; i < src_efi_fmt->efi_nextents; i++)
@@ -198,8 +201,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
&src_efi_fmt->efi_extents[i],
sizeof(struct xfs_extent));
return 0;
- } else if (buf->i_len == len32) {
- xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
+ } else if (buf->iov_len == len32) {
+ xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->iov_base;
dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
@@ -212,8 +215,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
src_efi_fmt_32->efi_extents[i].ext_len;
}
return 0;
- } else if (buf->i_len == len64) {
- xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr;
+ } else if (buf->iov_len == len64) {
+ xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->iov_base;
dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
@@ -227,8 +230,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr,
- buf->i_len);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->iov_base,
+ buf->iov_len);
return -EFSCORRUPTED;
}
@@ -865,11 +868,11 @@ xlog_recover_efi_commit_pass2(
struct xfs_efi_log_format *efi_formatp;
int error;
- efi_formatp = item->ri_buf[0].i_addr;
+ efi_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -904,11 +907,11 @@ xlog_recover_rtefi_commit_pass2(
struct xfs_efi_log_format *efi_formatp;
int error;
- efi_formatp = item->ri_buf[0].i_addr;
+ efi_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -933,7 +936,7 @@ xlog_recover_rtefi_commit_pass2(
xfs_lsn_t lsn)
{
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
#endif
@@ -958,9 +961,9 @@ xlog_recover_efd_commit_pass2(
xfs_lsn_t lsn)
{
struct xfs_efd_log_format *efd_formatp;
- int buflen = item->ri_buf[0].i_len;
+ int buflen = item->ri_buf[0].iov_len;
- efd_formatp = item->ri_buf[0].i_addr;
+ efd_formatp = item->ri_buf[0].iov_base;
if (buflen < sizeof(struct xfs_efd_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
@@ -968,9 +971,9 @@ xlog_recover_efd_commit_pass2(
return -EFSCORRUPTED;
}
- if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof(
efd_formatp->efd_nextents) &&
- item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof(
efd_formatp->efd_nextents)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
efd_formatp, buflen);
@@ -995,9 +998,9 @@ xlog_recover_rtefd_commit_pass2(
xfs_lsn_t lsn)
{
struct xfs_efd_log_format *efd_formatp;
- int buflen = item->ri_buf[0].i_len;
+ int buflen = item->ri_buf[0].iov_len;
- efd_formatp = item->ri_buf[0].i_addr;
+ efd_formatp = item->ri_buf[0].iov_base;
if (buflen < sizeof(struct xfs_efd_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
@@ -1005,9 +1008,9 @@ xlog_recover_rtefd_commit_pass2(
return -EFSCORRUPTED;
}
- if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof(
efd_formatp->efd_nextents) &&
- item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof(
efd_formatp->efd_nextents)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
efd_formatp, buflen);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 48254a72071b..ed69a65f56d7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -497,7 +497,7 @@ restart:
static ssize_t
xfs_zoned_write_space_reserve(
- struct xfs_inode *ip,
+ struct xfs_mount *mp,
struct kiocb *iocb,
struct iov_iter *from,
unsigned int flags,
@@ -533,8 +533,8 @@ xfs_zoned_write_space_reserve(
*
* Any remaining block will be returned after the write.
*/
- return xfs_zoned_space_reserve(ip,
- XFS_B_TO_FSB(ip->i_mount, count) + 1 + 2, flags, ac);
+ return xfs_zoned_space_reserve(mp, XFS_B_TO_FSB(mp, count) + 1 + 2,
+ flags, ac);
}
static int
@@ -718,13 +718,13 @@ xfs_file_dio_write_zoned(
struct xfs_zone_alloc_ctx ac = { };
ssize_t ret;
- ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
+ ret = xfs_zoned_write_space_reserve(ip->i_mount, iocb, from, 0, &ac);
if (ret < 0)
return ret;
ret = xfs_file_dio_write_aligned(ip, iocb, from,
&xfs_zoned_direct_write_iomap_ops,
&xfs_dio_zoned_write_ops, &ac);
- xfs_zoned_space_unreserve(ip, &ac);
+ xfs_zoned_space_unreserve(ip->i_mount, &ac);
return ret;
}
@@ -752,7 +752,7 @@ xfs_file_dio_write_atomic(
* HW offload should be faster, so try that first if it is already
* known that the write length is not too large.
*/
- if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max)
+ if (ocount > xfs_inode_buftarg(ip)->bt_awu_max)
dops = &xfs_atomic_write_cow_iomap_ops;
else
dops = &xfs_direct_write_iomap_ops;
@@ -1032,7 +1032,7 @@ xfs_file_buffered_write_zoned(
struct xfs_zone_alloc_ctx ac = { };
ssize_t ret;
- ret = xfs_zoned_write_space_reserve(ip, iocb, from, XFS_ZR_GREEDY, &ac);
+ ret = xfs_zoned_write_space_reserve(mp, iocb, from, XFS_ZR_GREEDY, &ac);
if (ret < 0)
return ret;
@@ -1073,7 +1073,7 @@ retry:
out_unlock:
xfs_iunlock(ip, iolock);
out_unreserve:
- xfs_zoned_space_unreserve(ip, &ac);
+ xfs_zoned_space_unreserve(ip->i_mount, &ac);
if (ret > 0) {
XFS_STATS_ADD(mp, xs_write_bytes, ret);
ret = generic_write_sync(iocb, ret);
@@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range(
}
#define XFS_FALLOC_FL_SUPPORTED \
- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
- FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
+ (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
+ FALLOC_FL_UNSHARE_RANGE)
STATIC long
__xfs_file_fallocate(
@@ -1413,11 +1414,11 @@ xfs_file_zoned_fallocate(
struct xfs_inode *ip = XFS_I(file_inode(file));
int error;
- error = xfs_zoned_space_reserve(ip, 2, XFS_ZR_RESERVED, &ac);
+ error = xfs_zoned_space_reserve(ip->i_mount, 2, XFS_ZR_RESERVED, &ac);
if (error)
return error;
error = __xfs_file_fallocate(file, mode, offset, len, &ac);
- xfs_zoned_space_unreserve(ip, &ac);
+ xfs_zoned_space_unreserve(ip->i_mount, &ac);
return error;
}
@@ -1827,12 +1828,12 @@ xfs_write_fault_zoned(
* But as the overallocation is limited to less than a folio and will be
* release instantly that's just fine.
*/
- error = xfs_zoned_space_reserve(ip, XFS_B_TO_FSB(ip->i_mount, len), 0,
- &ac);
+ error = xfs_zoned_space_reserve(ip->i_mount,
+ XFS_B_TO_FSB(ip->i_mount, len), 0, &ac);
if (error < 0)
return vmf_fs_error(error);
ret = __xfs_write_fault(vmf, order, &ac);
- xfs_zoned_space_unreserve(ip, &ac);
+ xfs_zoned_space_unreserve(ip->i_mount, &ac);
return ret;
}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 414b27a86458..af68c7de8ee8 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -1270,9 +1270,7 @@ xfs_getfsmap(
* buffer locking abilities to detect cycles in the rmapbt
* without deadlocking.
*/
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- break;
+ tp = xfs_trans_alloc_empty(mp);
info.dev = handlers[i].dev;
info.last = false;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 726e29b837e6..4cf7abe50143 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -893,10 +893,7 @@ xfs_metafile_iget(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
xfs_trans_cancel(tp);
return error;
@@ -979,7 +976,15 @@ xfs_reclaim_inode(
*/
if (xlog_is_shutdown(ip->i_mount->m_log)) {
xfs_iunpin_wait(ip);
+ /*
+ * Avoid a ABBA deadlock on the inode cluster buffer vs
+ * concurrent xfs_ifree_cluster() trying to mark the inode
+ * stale. We don't need the inode locked to run the flush abort
+ * code, but the flush abort needs to lock the cluster buffer.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iflush_shutdown_abort(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
goto reclaim;
}
if (xfs_ipincount(ip))
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 4345db501714..f83ec2bd0583 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -158,7 +158,7 @@ xlog_recover_icreate_commit_pass2(
int nbufs;
int i;
- icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
+ icl = (struct xfs_icreate_log *)item->ri_buf[0].iov_base;
if (icl->icl_type != XFS_LI_ICREATE) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
return -EINVAL;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ee3e0f284287..9c39251961a3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1635,7 +1635,7 @@ retry:
iip = ip->i_itemp;
if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
- ASSERT(iip->ili_last_fields);
+ ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log));
goto out_iunlock;
}
@@ -2932,12 +2932,9 @@ xfs_inode_reload_unlinked(
struct xfs_inode *ip)
{
struct xfs_trans *tp;
- int error;
-
- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
- if (error)
- return error;
+ int error = 0;
+ tp = xfs_trans_alloc_empty(ip->i_mount);
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_inode_unlinked_incomplete(ip))
error = xfs_inode_reload_unlinked_bucket(tp, ip);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d7e2b902ef5c..07fbdcc4cbf5 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -358,7 +358,7 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
{
- return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0;
+ return xfs_inode_buftarg(ip)->bt_awu_max > 0;
}
/*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c6cb0b6b9e46..829675700fcd 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -758,11 +758,14 @@ xfs_inode_item_push(
* completed and items removed from the AIL before the next push
* attempt.
*/
+ trace_xfs_inode_push_stale(ip, _RET_IP_);
return XFS_ITEM_PINNED;
}
- if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp))
+ if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) {
+ trace_xfs_inode_push_pinned(ip, _RET_IP_);
return XFS_ITEM_PINNED;
+ }
if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;
@@ -1179,12 +1182,12 @@ xfs_iflush_shutdown_abort(
*/
int
xfs_inode_item_format_convert(
- struct xfs_log_iovec *buf,
+ struct kvec *buf,
struct xfs_inode_log_format *in_f)
{
- struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
+ struct xfs_inode_log_format_32 *in_f32 = buf->iov_base;
- if (buf->i_len != sizeof(*in_f32)) {
+ if (buf->iov_len != sizeof(*in_f32)) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 377e06007804..ba92ce11a011 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -46,8 +46,8 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
extern void xfs_inode_item_destroy(struct xfs_inode *);
extern void xfs_iflush_abort(struct xfs_inode *);
extern void xfs_iflush_shutdown_abort(struct xfs_inode *);
-extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
- struct xfs_inode_log_format *);
+int xfs_inode_item_format_convert(struct kvec *buf,
+ struct xfs_inode_log_format *in_f);
extern struct kmem_cache *xfs_ili_cache;
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 7205fd14f6b3..9d1999d41be1 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -30,13 +30,13 @@ xlog_recover_inode_ra_pass2(
struct xlog *log,
struct xlog_recover_item *item)
{
- if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
- struct xfs_inode_log_format *ilfp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].iov_len == sizeof(struct xfs_inode_log_format)) {
+ struct xfs_inode_log_format *ilfp = item->ri_buf[0].iov_base;
xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
&xfs_inode_buf_ra_ops);
} else {
- struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].i_addr;
+ struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].iov_base;
xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
&xfs_inode_buf_ra_ops);
@@ -326,8 +326,8 @@ xlog_recover_inode_commit_pass2(
int need_free = 0;
xfs_failaddr_t fa;
- if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
- in_f = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].iov_len == sizeof(struct xfs_inode_log_format)) {
+ in_f = item->ri_buf[0].iov_base;
} else {
in_f = kmalloc(sizeof(struct xfs_inode_log_format),
GFP_KERNEL | __GFP_NOFAIL);
@@ -366,7 +366,7 @@ xlog_recover_inode_commit_pass2(
error = -EFSCORRUPTED;
goto out_release;
}
- ldip = item->ri_buf[1].i_addr;
+ ldip = item->ri_buf[1].iov_base;
if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
"%s: Bad inode log record, rec ptr "PTR_FMT", ino %lld",
@@ -472,12 +472,12 @@ xlog_recover_inode_commit_pass2(
goto out_release;
}
isize = xfs_log_dinode_size(mp);
- if (unlikely(item->ri_buf[1].i_len > isize)) {
+ if (unlikely(item->ri_buf[1].iov_len > isize)) {
XFS_CORRUPTION_ERROR("Bad log dinode size", XFS_ERRLEVEL_LOW,
mp, ldip, sizeof(*ldip));
xfs_alert(mp,
- "Bad inode 0x%llx log dinode size 0x%x",
- in_f->ilf_ino, item->ri_buf[1].i_len);
+ "Bad inode 0x%llx log dinode size 0x%zx",
+ in_f->ilf_ino, item->ri_buf[1].iov_len);
error = -EFSCORRUPTED;
goto out_release;
}
@@ -500,8 +500,8 @@ xlog_recover_inode_commit_pass2(
if (in_f->ilf_size == 2)
goto out_owner_change;
- len = item->ri_buf[2].i_len;
- src = item->ri_buf[2].i_addr;
+ len = item->ri_buf[2].iov_len;
+ src = item->ri_buf[2].iov_base;
ASSERT(in_f->ilf_size <= 4);
ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
ASSERT(!(fields & XFS_ILOG_DFORK) ||
@@ -538,8 +538,8 @@ xlog_recover_inode_commit_pass2(
} else {
attr_index = 2;
}
- len = item->ri_buf[attr_index].i_len;
- src = item->ri_buf[attr_index].i_addr;
+ len = item->ri_buf[attr_index].iov_len;
+ src = item->ri_buf[attr_index].iov_base;
ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize));
switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d250f7f74e3b..c3e8c5c1084f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -990,9 +990,8 @@ xfs_ioc_getlabel(
BUILD_BUG_ON(sizeof(sbp->sb_fname) > FSLABEL_MAX);
/* 1 larger than sb_fname, so this ensures a trailing NUL char */
- memset(label, 0, sizeof(label));
spin_lock(&mp->m_sb_lock);
- strncpy(label, sbp->sb_fname, XFSLABEL_MAX);
+ memtostr_pad(label, sbp->sb_fname);
spin_unlock(&mp->m_sb_lock);
if (copy_to_user(user_label, label, sizeof(label)))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ff05e6b1b0bb..ec30b78bf5c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -827,7 +827,7 @@ xfs_bmap_hw_atomic_write_possible(
/*
* The ->iomap_begin caller should ensure this, but check anyway.
*/
- return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
+ return len <= xfs_inode_buftarg(ip)->bt_awu_max;
}
static int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8cddbb7c149b..149b5460fbfd 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -665,7 +665,7 @@ xfs_get_atomic_write_max_opt(
* less than our out of place write limit, but we don't want to exceed
* the awu_max.
*/
- return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max);
+ return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
}
static void
@@ -970,7 +970,7 @@ xfs_setattr_size(
* change.
*/
if (xfs_is_zoned_inode(ip)) {
- error = xfs_zoned_space_reserve(ip, 1,
+ error = xfs_zoned_space_reserve(mp, 1,
XFS_ZR_NOWAIT | XFS_ZR_RESERVED, &ac);
if (error) {
if (error == -EAGAIN)
@@ -998,7 +998,7 @@ xfs_setattr_size(
}
if (xfs_is_zoned_inode(ip))
- xfs_zoned_space_unreserve(ip, &ac);
+ xfs_zoned_space_unreserve(mp, &ac);
if (error)
return error;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 1fa1c0564b0c..c8c9b8d8309f 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -239,14 +239,10 @@ xfs_bulkstat_one(
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
- error = xfs_trans_alloc_empty(breq->mp, &tp);
- if (error)
- goto out;
-
+ tp = xfs_trans_alloc_empty(breq->mp);
error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp,
breq->startino, &bc);
xfs_trans_cancel(tp);
-out:
kfree(bc.buf);
/*
@@ -331,17 +327,13 @@ xfs_bulkstat(
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
- error = xfs_trans_alloc_empty(breq->mp, &tp);
- if (error)
- goto out;
-
+ tp = xfs_trans_alloc_empty(breq->mp);
if (breq->flags & XFS_IBULK_SAME_AG)
iwalk_flags |= XFS_IWALK_SAME_AG;
error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_bulkstat_iwalk, breq->icount, &bc);
xfs_trans_cancel(tp);
-out:
kfree(bc.buf);
/*
@@ -464,14 +456,10 @@ xfs_inumbers(
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
- error = xfs_trans_alloc_empty(breq->mp, &tp);
- if (error)
- goto out;
-
+ tp = xfs_trans_alloc_empty(breq->mp);
error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
-out:
/*
* We found some inode groups, so clear the error status and return
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 7db3ece370b1..c1c31d1a8e21 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -377,11 +377,8 @@ xfs_iwalk_run_callbacks(
if (!has_more)
return 0;
- if (iwag->drop_trans) {
- error = xfs_trans_alloc_empty(mp, &iwag->tp);
- if (error)
- return error;
- }
+ if (iwag->drop_trans)
+ iwag->tp = xfs_trans_alloc_empty(mp);
/* ...and recreate the cursor just past where we left off. */
error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, 0, agi_bpp);
@@ -617,9 +614,7 @@ xfs_iwalk_ag_work(
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
- error = xfs_trans_alloc_empty(mp, &iwag->tp);
- if (error)
- goto out;
+ iwag->tp = xfs_trans_alloc_empty(mp);
iwag->drop_trans = 1;
error = xfs_iwalk_ag(iwag);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 793468b4d30d..c8a57e21a1d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -109,14 +109,14 @@ xlog_prepare_iovec(
vec = &lv->lv_iovecp[0];
}
- len = lv->lv_buf_len + sizeof(struct xlog_op_header);
+ len = lv->lv_buf_used + sizeof(struct xlog_op_header);
if (!IS_ALIGNED(len, sizeof(uint64_t))) {
- lv->lv_buf_len = round_up(len, sizeof(uint64_t)) -
+ lv->lv_buf_used = round_up(len, sizeof(uint64_t)) -
sizeof(struct xlog_op_header);
}
vec->i_type = type;
- vec->i_addr = lv->lv_buf + lv->lv_buf_len;
+ vec->i_addr = lv->lv_buf + lv->lv_buf_used;
oph = vec->i_addr;
oph->oh_clientid = XFS_TRANSACTION;
@@ -1931,9 +1931,9 @@ xlog_print_trans(
if (!lv)
continue;
xfs_warn(mp, " niovecs = %d", lv->lv_niovecs);
- xfs_warn(mp, " size = %d", lv->lv_size);
+ xfs_warn(mp, " alloc_size = %d", lv->lv_alloc_size);
xfs_warn(mp, " bytes = %d", lv->lv_bytes);
- xfs_warn(mp, " buf len = %d", lv->lv_buf_len);
+ xfs_warn(mp, " buf used= %d", lv->lv_buf_used);
/* dump each iovec for the log item */
vec = lv->lv_iovecp;
@@ -3092,16 +3092,16 @@ xfs_log_force_seq(
*/
void
xfs_log_ticket_put(
- xlog_ticket_t *ticket)
+ struct xlog_ticket *ticket)
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
if (atomic_dec_and_test(&ticket->t_ref))
kmem_cache_free(xfs_log_ticket_cache, ticket);
}
-xlog_ticket_t *
+struct xlog_ticket *
xfs_log_ticket_get(
- xlog_ticket_t *ticket)
+ struct xlog_ticket *ticket)
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
atomic_inc(&ticket->t_ref);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 13455854365f..af6daf4f6792 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -16,8 +16,8 @@ struct xfs_log_vec {
struct xfs_log_item *lv_item; /* owner */
char *lv_buf; /* formatted buffer */
int lv_bytes; /* accounted space in buffer */
- int lv_buf_len; /* aligned size of buffer */
- int lv_size; /* size of allocated lv */
+ int lv_buf_used; /* buffer space used so far */
+ int lv_alloc_size; /* size of allocated lv */
};
#define XFS_LOG_VEC_ORDERED (-1)
@@ -64,12 +64,13 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
oph->oh_len = cpu_to_be32(len);
len += sizeof(struct xlog_op_header);
- lv->lv_buf_len += len;
+ lv->lv_buf_used += len;
lv->lv_bytes += len;
vec->i_len = len;
/* Catch buffer overruns */
- ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
+ ASSERT((void *)lv->lv_buf + lv->lv_bytes <=
+ (void *)lv + lv->lv_alloc_size);
}
/*
@@ -87,13 +88,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
return buf;
}
-static inline void *
-xlog_copy_from_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
- const struct xfs_log_iovec *src)
-{
- return xlog_copy_iovec(lv, vecp, src->i_type, src->i_addr, src->i_len);
-}
-
/*
* By comparing each component, we don't have to worry about extra
* endian issues in treating two 32 bit numbers as one 64 bit number
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f66d2d430e4f..f443757e93c2 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -275,7 +275,7 @@ xlog_cil_alloc_shadow_bufs(
struct xfs_log_vec *lv;
int niovecs = 0;
int nbytes = 0;
- int buf_size;
+ int alloc_size;
bool ordered = false;
/* Skip items which aren't dirty in this transaction. */
@@ -316,14 +316,14 @@ xlog_cil_alloc_shadow_bufs(
* that space to ensure we can align it appropriately and not
* overrun the buffer.
*/
- buf_size = nbytes + xlog_cil_iovec_space(niovecs);
+ alloc_size = nbytes + xlog_cil_iovec_space(niovecs);
/*
* if we have no shadow buffer, or it is too small, we need to
* reallocate it.
*/
if (!lip->li_lv_shadow ||
- buf_size > lip->li_lv_shadow->lv_size) {
+ alloc_size > lip->li_lv_shadow->lv_alloc_size) {
/*
* We free and allocate here as a realloc would copy
* unnecessary data. We don't use kvzalloc() for the
@@ -332,15 +332,15 @@ xlog_cil_alloc_shadow_bufs(
* storage.
*/
kvfree(lip->li_lv_shadow);
- lv = xlog_kvmalloc(buf_size);
+ lv = xlog_kvmalloc(alloc_size);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
INIT_LIST_HEAD(&lv->lv_list);
lv->lv_item = lip;
- lv->lv_size = buf_size;
+ lv->lv_alloc_size = alloc_size;
if (ordered)
- lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+ lv->lv_buf_used = XFS_LOG_VEC_ORDERED;
else
lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
lip->li_lv_shadow = lv;
@@ -348,9 +348,9 @@ xlog_cil_alloc_shadow_bufs(
/* same or smaller, optimise common overwrite case */
lv = lip->li_lv_shadow;
if (ordered)
- lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+ lv->lv_buf_used = XFS_LOG_VEC_ORDERED;
else
- lv->lv_buf_len = 0;
+ lv->lv_buf_used = 0;
lv->lv_bytes = 0;
}
@@ -370,30 +370,30 @@ xlog_cil_alloc_shadow_bufs(
STATIC void
xfs_cil_prepare_item(
struct xlog *log,
+ struct xfs_log_item *lip,
struct xfs_log_vec *lv,
- struct xfs_log_vec *old_lv,
int *diff_len)
{
/* Account for the new LV being passed in */
- if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
+ if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED)
*diff_len += lv->lv_bytes;
/*
* If there is no old LV, this is the first time we've seen the item in
* this CIL context and so we need to pin it. If we are replacing the
- * old_lv, then remove the space it accounts for and make it the shadow
+ * old lv, then remove the space it accounts for and make it the shadow
* buffer for later freeing. In both cases we are now switching to the
* shadow buffer, so update the pointer to it appropriately.
*/
- if (!old_lv) {
+ if (!lip->li_lv) {
if (lv->lv_item->li_ops->iop_pin)
lv->lv_item->li_ops->iop_pin(lv->lv_item);
lv->lv_item->li_lv_shadow = NULL;
- } else if (old_lv != lv) {
- ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+ } else if (lip->li_lv != lv) {
+ ASSERT(lv->lv_buf_used != XFS_LOG_VEC_ORDERED);
- *diff_len -= old_lv->lv_bytes;
- lv->lv_item->li_lv_shadow = old_lv;
+ *diff_len -= lip->li_lv->lv_bytes;
+ lv->lv_item->li_lv_shadow = lip->li_lv;
}
/* attach new log vector to log item */
@@ -452,10 +452,8 @@ xlog_cil_insert_format_items(
}
list_for_each_entry(lip, &tp->t_items, li_trans) {
- struct xfs_log_vec *lv;
- struct xfs_log_vec *old_lv = NULL;
- struct xfs_log_vec *shadow;
- bool ordered = false;
+ struct xfs_log_vec *lv = lip->li_lv;
+ struct xfs_log_vec *shadow = lip->li_lv_shadow;
/* Skip items which aren't dirty in this transaction. */
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
@@ -465,22 +463,23 @@ xlog_cil_insert_format_items(
* The formatting size information is already attached to
* the shadow lv on the log item.
*/
- shadow = lip->li_lv_shadow;
- if (shadow->lv_buf_len == XFS_LOG_VEC_ORDERED)
- ordered = true;
+ if (shadow->lv_buf_used == XFS_LOG_VEC_ORDERED) {
+ if (!lv) {
+ lv = shadow;
+ lv->lv_item = lip;
+ }
+ ASSERT(shadow->lv_alloc_size == lv->lv_alloc_size);
+ xfs_cil_prepare_item(log, lip, lv, diff_len);
+ continue;
+ }
/* Skip items that do not have any vectors for writing */
- if (!shadow->lv_niovecs && !ordered)
+ if (!shadow->lv_niovecs)
continue;
/* compare to existing item size */
- old_lv = lip->li_lv;
- if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
+ if (lv && shadow->lv_alloc_size <= lv->lv_alloc_size) {
/* same or smaller, optimise common overwrite case */
- lv = lip->li_lv;
-
- if (ordered)
- goto insert;
/*
* set the item up as though it is a new insertion so
@@ -492,7 +491,7 @@ xlog_cil_insert_format_items(
lv->lv_niovecs = shadow->lv_niovecs;
/* reset the lv buffer information for new formatting */
- lv->lv_buf_len = 0;
+ lv->lv_buf_used = 0;
lv->lv_bytes = 0;
lv->lv_buf = (char *)lv +
xlog_cil_iovec_space(lv->lv_niovecs);
@@ -500,17 +499,11 @@ xlog_cil_insert_format_items(
/* switch to shadow buffer! */
lv = shadow;
lv->lv_item = lip;
- if (ordered) {
- /* track as an ordered logvec */
- ASSERT(lip->li_lv == NULL);
- goto insert;
- }
}
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
lip->li_ops->iop_format(lip, lv);
-insert:
- xfs_cil_prepare_item(log, lv, old_lv, diff_len);
+ xfs_cil_prepare_item(log, lip, lv, diff_len);
}
}
@@ -793,8 +786,10 @@ xlog_cil_ail_insert(
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
- if (aborted)
+ if (aborted) {
+ trace_xlog_ail_insert_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
lip->li_ops->iop_release(lip);
@@ -1243,7 +1238,7 @@ xlog_cil_build_lv_chain(
lv->lv_order_id = item->li_order_id;
/* we don't write ordered log vectors */
- if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
+ if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED)
*num_bytes += lv->lv_bytes;
*num_iovecs += lv->lv_niovecs;
list_add_tail(&lv->lv_list, &ctx->lv_chain);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 39a102cc1b43..a9a7a271c15b 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -144,7 +144,7 @@ enum xlog_iclog_state {
#define XLOG_COVER_OPS 5
-typedef struct xlog_ticket {
+struct xlog_ticket {
struct list_head t_queue; /* reserve/write queue */
struct task_struct *t_task; /* task that owns this ticket */
xlog_tid_t t_tid; /* transaction identifier */
@@ -155,7 +155,7 @@ typedef struct xlog_ticket {
char t_cnt; /* current unit count */
uint8_t t_flags; /* properties of reservation */
int t_iclog_hdrs; /* iclog hdrs in t_curr_res */
-} xlog_ticket_t;
+};
/*
* - A log record header is 512 bytes. There is plenty of room to grow the
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 2f76531842f8..e6ed9e09c027 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2131,15 +2131,15 @@ xlog_recover_add_to_cont_trans(
item = list_entry(trans->r_itemq.prev, struct xlog_recover_item,
ri_list);
- old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
- old_len = item->ri_buf[item->ri_cnt-1].i_len;
+ old_ptr = item->ri_buf[item->ri_cnt-1].iov_base;
+ old_len = item->ri_buf[item->ri_cnt-1].iov_len;
ptr = kvrealloc(old_ptr, len + old_len, GFP_KERNEL);
if (!ptr)
return -ENOMEM;
memcpy(&ptr[old_len], dp, len);
- item->ri_buf[item->ri_cnt-1].i_len += len;
- item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+ item->ri_buf[item->ri_cnt-1].iov_len += len;
+ item->ri_buf[item->ri_cnt-1].iov_base = ptr;
trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
return 0;
}
@@ -2223,7 +2223,7 @@ xlog_recover_add_to_trans(
}
item->ri_total = in_f->ilf_size;
- item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ item->ri_buf = kcalloc(item->ri_total, sizeof(*item->ri_buf),
GFP_KERNEL | __GFP_NOFAIL);
}
@@ -2237,8 +2237,8 @@ xlog_recover_add_to_trans(
}
/* Description region is ri_buf[0] */
- item->ri_buf[item->ri_cnt].i_addr = ptr;
- item->ri_buf[item->ri_cnt].i_len = len;
+ item->ri_buf[item->ri_cnt].iov_base = ptr;
+ item->ri_buf[item->ri_cnt].iov_len = len;
item->ri_cnt++;
trace_xfs_log_recover_item_add(log, trans, item, 0);
return 0;
@@ -2262,7 +2262,7 @@ xlog_recover_free_trans(
/* Free the regions in the item. */
list_del(&item->ri_list);
for (i = 0; i < item->ri_cnt; i++)
- kvfree(item->ri_buf[i].i_addr);
+ kvfree(item->ri_buf[i].iov_base);
/* Free the item itself */
kfree(item->ri_buf);
kfree(item);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29276fe60df9..0b690bc119d7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -171,19 +171,16 @@ xfs_readsb(
ASSERT(mp->m_ddev_targp != NULL);
/*
- * For the initial read, we must guess at the sector
- * size based on the block device. It's enough to
- * get the sb_sectsize out of the superblock and
- * then reread with the proper length.
- * We don't verify it yet, because it may not be complete.
+ * In the first pass, use the device sector size to just read enough
+ * of the superblock to extract the XFS sector size.
+ *
+ * The device sector size must be smaller than or equal to the XFS
+ * sector size and thus we can always read the superblock. Once we know
+ * the XFS sector size, re-read it and run the buffer verifier.
*/
- sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
+ sector_size = mp->m_ddev_targp->bt_logical_sectorsize;
buf_ops = NULL;
- /*
- * Allocate a (locked) buffer to hold the superblock. This will be kept
- * around at all times to optimize access to the superblock.
- */
reread:
error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
BTOBB(sector_size), &bp, buf_ops);
@@ -247,6 +244,10 @@ reread:
/* no need to be quiet anymore, so reset the buf ops */
bp->b_ops = &xfs_sb_buf_ops;
+ /*
+ * Keep a pointer of the sb buffer around instead of caching it in the
+ * buffer cache because we access it frequently.
+ */
mp->m_sb_bp = bp;
xfs_buf_unlock(bp);
return 0;
@@ -678,68 +679,46 @@ static inline unsigned int max_pow_of_two_factor(const unsigned int nr)
}
/*
- * If the data device advertises atomic write support, limit the size of data
- * device atomic writes to the greatest power-of-two factor of the AG size so
- * that every atomic write unit aligns with the start of every AG. This is
- * required so that the per-AG allocations for an atomic write will always be
+ * If the underlying device advertises atomic write support, limit the size of
+ * atomic writes to the greatest power-of-two factor of the group size so
+ * that every atomic write unit aligns with the start of every group. This is
+ * required so that the allocations for an atomic write will always be
* aligned compatibly with the alignment requirements of the storage.
*
- * If the data device doesn't advertise atomic writes, then there are no
- * alignment restrictions and the largest out-of-place write we can do
- * ourselves is the number of blocks that user files can allocate from any AG.
- */
-static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp)
-{
- if (mp->m_ddev_targp->bt_bdev_awu_min > 0)
- return max_pow_of_two_factor(mp->m_sb.sb_agblocks);
- return rounddown_pow_of_two(mp->m_ag_max_usable);
-}
-
-/*
- * Reflink on the realtime device requires rtgroups, and atomic writes require
- * reflink.
- *
- * If the realtime device advertises atomic write support, limit the size of
- * data device atomic writes to the greatest power-of-two factor of the rtgroup
- * size so that every atomic write unit aligns with the start of every rtgroup.
- * This is required so that the per-rtgroup allocations for an atomic write
- * will always be aligned compatibly with the alignment requirements of the
- * storage.
- *
- * If the rt device doesn't advertise atomic writes, then there are no
- * alignment restrictions and the largest out-of-place write we can do
- * ourselves is the number of blocks that user files can allocate from any
- * rtgroup.
+ * If the device doesn't advertise atomic writes, then there are no alignment
+ * restrictions and the largest out-of-place write we can do ourselves is the
+ * number of blocks that user files can allocate from any group.
*/
-static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp)
+static xfs_extlen_t
+xfs_calc_group_awu_max(
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
{
- struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+ struct xfs_groups *g = &mp->m_groups[type];
+ struct xfs_buftarg *btp = xfs_group_type_buftarg(mp, type);
- if (rgs->blocks == 0)
+ if (g->blocks == 0)
return 0;
- if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0)
- return max_pow_of_two_factor(rgs->blocks);
- return rounddown_pow_of_two(rgs->blocks);
+ if (btp && btp->bt_awu_min > 0)
+ return max_pow_of_two_factor(g->blocks);
+ return rounddown_pow_of_two(g->blocks);
}
/* Compute the maximum atomic write unit size for each section. */
static inline void
xfs_calc_atomic_write_unit_max(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
{
- struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
- struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+ struct xfs_groups *g = &mp->m_groups[type];
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp);
- const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp);
- const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp);
-
- ags->awu_max = min3(max_write, max_ioend, max_agsize);
- rgs->awu_max = min3(max_write, max_ioend, max_rgsize);
+ const xfs_extlen_t max_gsize = xfs_calc_group_awu_max(mp, type);
- trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend,
- max_agsize, max_rgsize);
+ g->awu_max = min3(max_write, max_ioend, max_gsize);
+ trace_xfs_calc_atomic_write_unit_max(mp, type, max_write, max_ioend,
+ max_gsize, g->awu_max);
}
/*
@@ -757,7 +736,8 @@ xfs_set_max_atomic_write_opt(
max(mp->m_groups[XG_TYPE_AG].blocks,
mp->m_groups[XG_TYPE_RTG].blocks);
const xfs_extlen_t max_group_write =
- max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
+ max(xfs_calc_group_awu_max(mp, XG_TYPE_AG),
+ xfs_calc_group_awu_max(mp, XG_TYPE_RTG));
int error;
if (new_max_bytes == 0)
@@ -813,7 +793,8 @@ set_limit:
return error;
}
- xfs_calc_atomic_write_unit_max(mp);
+ xfs_calc_atomic_write_unit_max(mp, XG_TYPE_AG);
+ xfs_calc_atomic_write_unit_max(mp, XG_TYPE_RTG);
mp->m_awu_max_bytes = new_max_bytes;
return 0;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d85084f9f317..97de44c32272 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -802,4 +802,21 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
unsigned long long new_max_bytes);
+static inline struct xfs_buftarg *
+xfs_group_type_buftarg(
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
+{
+ switch (type) {
+ case XG_TYPE_AG:
+ return mp->m_ddev_targp;
+ case XG_TYPE_RTG:
+ return mp->m_rtdev_targp;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return NULL;
+}
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 08443ceec329..866c71d9fbae 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -320,7 +320,7 @@ xfs_mru_cache_create(
xfs_mru_cache_free_func_t free_func)
{
struct xfs_mru_cache *mru = NULL;
- int err = 0, grp;
+ int grp;
unsigned int grp_time;
if (mrup)
@@ -341,8 +341,8 @@ xfs_mru_cache_create(
mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists),
GFP_KERNEL | __GFP_NOFAIL);
if (!mru->lists) {
- err = -ENOMEM;
- goto exit;
+ kfree(mru);
+ return -ENOMEM;
}
for (grp = 0; grp < mru->grp_count; grp++)
@@ -361,14 +361,7 @@ xfs_mru_cache_create(
mru->free_func = free_func;
mru->data = data;
*mrup = mru;
-
-exit:
- if (err && mru && mru->lists)
- kfree(mru->lists);
- if (err && mru)
- kfree(mru);
-
- return err;
+ return 0;
}
/*
@@ -425,10 +418,6 @@ xfs_mru_cache_insert(
{
int error = -EINVAL;
- ASSERT(mru && mru->lists);
- if (!mru || !mru->lists)
- goto out_free;
-
error = -ENOMEM;
if (radix_tree_preload(GFP_KERNEL))
goto out_free;
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 3545dc1d953c..fbeddcac4792 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -253,8 +253,7 @@ xfs_dax_notify_dev_failure(
return -EOPNOTSUPP;
}
- error = xfs_dax_translate_range(type == XG_TYPE_RTG ?
- mp->m_rtdev_targp : mp->m_ddev_targp,
+ error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type),
offset, len, &daddr, &bblen);
if (error)
return error;
@@ -280,10 +279,7 @@ xfs_dax_notify_dev_failure(
kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0;
}
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- goto out;
-
+ tp = xfs_trans_alloc_empty(mp);
start_gno = xfs_fsb_to_gno(mp, start_bno, type);
end_gno = xfs_fsb_to_gno(mp, end_bno, type);
while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
@@ -354,7 +350,6 @@ xfs_dax_notify_dev_failure(
error = -EFSCORRUPTED;
}
-out:
/* Thaw the fs if it has been frozen before. */
if (mf_flags & MF_MEM_PRE_REMOVE)
xfs_dax_notify_failure_thaw(mp, kernel_frozen);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 417439b58785..23ba84ec919a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,6 +134,7 @@ xfs_qm_dqpurge(
dqp->q_flags |= XFS_DQFLAG_FREEING;
+ xfs_qm_dqunpin_wait(dqp);
xfs_dqflock(dqp);
/*
@@ -465,6 +466,7 @@ xfs_qm_dquot_isolate(
struct xfs_dquot *dqp = container_of(item,
struct xfs_dquot, q_lru);
struct xfs_qm_isolate *isol = arg;
+ enum lru_status ret = LRU_SKIP;
if (!xfs_dqlock_nowait(dqp))
goto out_miss_busy;
@@ -478,6 +480,16 @@ xfs_qm_dquot_isolate(
goto out_miss_unlock;
/*
+ * If the dquot is pinned or dirty, rotate it to the end of the LRU to
+ * give some time for it to be cleaned before we try to isolate it
+ * again.
+ */
+ ret = LRU_ROTATE;
+ if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) {
+ goto out_miss_unlock;
+ }
+
+ /*
* This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
*/
@@ -492,41 +504,14 @@ xfs_qm_dquot_isolate(
}
/*
- * If the dquot is dirty, flush it. If it's already being flushed, just
- * skip it so there is time for the IO to complete before we try to
- * reclaim it again on the next LRU pass.
+ * The dquot may still be under IO, in which case the flush lock will be
+ * held. If we can't get the flush lock now, just skip over the dquot as
+ * if it was dirty.
*/
if (!xfs_dqflock_nowait(dqp))
goto out_miss_unlock;
- if (XFS_DQ_IS_DIRTY(dqp)) {
- struct xfs_buf *bp = NULL;
- int error;
-
- trace_xfs_dqreclaim_dirty(dqp);
-
- /* we have to drop the LRU lock to flush the dquot */
- spin_unlock(&lru->lock);
-
- error = xfs_dquot_use_attached_buf(dqp, &bp);
- if (!bp || error == -EAGAIN) {
- xfs_dqfunlock(dqp);
- goto out_unlock_dirty;
- }
-
- /*
- * dqflush completes dqflock on error, and the delwri ioend
- * does it on success.
- */
- error = xfs_qm_dqflush(dqp, bp);
- if (error)
- goto out_unlock_dirty;
-
- xfs_buf_delwri_queue(bp, &isol->buffers);
- xfs_buf_relse(bp);
- goto out_unlock_dirty;
- }
-
+ ASSERT(!XFS_DQ_IS_DIRTY(dqp));
xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
@@ -548,13 +533,7 @@ out_miss_unlock:
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- return LRU_SKIP;
-
-out_unlock_dirty:
- trace_xfs_dqreclaim_busy(dqp);
- XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
- xfs_dqunlock(dqp);
- return LRU_RETRY;
+ return ret;
}
static unsigned long
@@ -681,10 +660,7 @@ xfs_qm_load_metadir_qinos(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_dqinode_load_parent(tp, &qi->qi_dirip);
if (error == -ENOENT) {
/* no quota dir directory, but we'll create one later */
@@ -1486,7 +1462,6 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
- struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@@ -1497,34 +1472,8 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
- /*
- * The only way the dquot is already flush locked by the time quotacheck
- * gets here is if reclaim flushed it before the dqadjust walk dirtied
- * it for the final time. Quotacheck collects all dquot bufs in the
- * local delwri queue before dquots are dirtied, so reclaim can't have
- * possibly queued it for I/O. The only way out is to push the buffer to
- * cycle the flush lock.
- */
- if (!xfs_dqflock_nowait(dqp)) {
- /* buf is pinned in-core by delwri list */
- error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp);
- if (error)
- goto out_unlock;
-
- if (!(bp->b_flags & _XBF_DELWRI_Q)) {
- error = -EAGAIN;
- xfs_buf_relse(bp);
- goto out_unlock;
- }
- xfs_buf_unlock(bp);
-
- xfs_buf_delwri_pushbuf(bp, buffer_list);
- xfs_buf_rele(bp);
-
- error = -EAGAIN;
- goto out_unlock;
- }
+ xfs_qm_dqunpin_wait(dqp);
+ xfs_dqflock(dqp);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)
@@ -1803,10 +1752,7 @@ xfs_qm_qino_load(
struct xfs_inode *dp = NULL;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(mp);
if (xfs_has_metadir(mp)) {
error = xfs_dqinode_load_parent(tp, &dp);
if (error)
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 076501123d89..3728234699a2 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -717,18 +717,18 @@ xlog_recover_cui_commit_pass2(
struct xfs_cui_log_format *cui_formatp;
size_t len;
- cui_formatp = item->ri_buf[0].i_addr;
+ cui_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_cui_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -759,18 +759,18 @@ xlog_recover_rtcui_commit_pass2(
struct xfs_cui_log_format *cui_formatp;
size_t len;
- cui_formatp = item->ri_buf[0].i_addr;
+ cui_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_cui_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -791,7 +791,7 @@ xlog_recover_rtcui_commit_pass2(
xfs_lsn_t lsn)
{
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
#endif
@@ -817,10 +817,10 @@ xlog_recover_cud_commit_pass2(
{
struct xfs_cud_log_format *cud_formatp;
- cud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
+ cud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_cud_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -843,10 +843,10 @@ xlog_recover_rtcud_commit_pass2(
{
struct xfs_cud_log_format *cud_formatp;
- cud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
+ cud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_cud_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index c99700318ec2..15f0903f6fd4 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -746,18 +746,18 @@ xlog_recover_rui_commit_pass2(
struct xfs_rui_log_format *rui_formatp;
size_t len;
- rui_formatp = item->ri_buf[0].i_addr;
+ rui_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_rui_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -788,18 +788,18 @@ xlog_recover_rtrui_commit_pass2(
struct xfs_rui_log_format *rui_formatp;
size_t len;
- rui_formatp = item->ri_buf[0].i_addr;
+ rui_formatp = item->ri_buf[0].iov_base;
- if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ if (item->ri_buf[0].iov_len < xfs_rui_log_format_sizeof(0)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[0].iov_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -820,7 +820,7 @@ xlog_recover_rtrui_commit_pass2(
xfs_lsn_t lsn)
{
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
#endif
@@ -846,10 +846,10 @@ xlog_recover_rud_commit_pass2(
{
struct xfs_rud_log_format *rud_formatp;
- rud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ rud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_rud_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- rud_formatp, item->ri_buf[0].i_len);
+ rud_formatp, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -872,10 +872,10 @@ xlog_recover_rtrud_commit_pass2(
{
struct xfs_rud_log_format *rud_formatp;
- rud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ rud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_rud_log_format)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
- rud_formatp, item->ri_buf[0].i_len);
+ rud_formatp, item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6484c596ecea..6907e871fa15 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -729,9 +729,7 @@ xfs_rtginode_ensure(
if (rtg->rtg_inodes[type])
return 0;
- error = xfs_trans_alloc_empty(rtg_mount(rtg), &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(rtg_mount(rtg));
error = xfs_rtginode_load(rtg, type, tp);
xfs_trans_cancel(tp);
@@ -1259,6 +1257,8 @@ xfs_growfs_check_rtgeom(
kfree(nmp);
+ trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
+
if (min_logfsbs > mp->m_sb.sb_logblocks)
return -EINVAL;
@@ -1303,9 +1303,7 @@ xfs_growfs_rt_prep_groups(
if (!mp->m_rtdirip) {
struct xfs_trans *tp;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_rtginode_load_parent(tp);
xfs_trans_cancel(tp);
@@ -1672,10 +1670,7 @@ xfs_rtmount_inodes(
struct xfs_rtgroup *rtg = NULL;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(mp);
if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) {
error = xfs_rtginode_load_parent(tp);
if (error)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0bc4b5489078..bb0a82635a77 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2020,14 +2020,13 @@ xfs_remount_rw(
int error;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
- bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
+ xfs_readonly_buftarg(mp->m_logdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only logdev");
return -EACCES;
}
- if (mp->m_rtdev_targp &&
- bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
+ if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only rtdev");
return -EACCES;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 01d284a1c759..e1794e3e3156 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -171,36 +171,33 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
TRACE_EVENT(xfs_calc_atomic_write_unit_max,
- TP_PROTO(struct xfs_mount *mp, unsigned int max_write,
- unsigned int max_ioend, unsigned int max_agsize,
- unsigned int max_rgsize),
- TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize),
+ TP_PROTO(struct xfs_mount *mp, enum xfs_group_type type,
+ unsigned int max_write, unsigned int max_ioend,
+ unsigned int max_gsize, unsigned int awu_max),
+ TP_ARGS(mp, type, max_write, max_ioend, max_gsize, awu_max),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(unsigned int, max_write)
__field(unsigned int, max_ioend)
- __field(unsigned int, max_agsize)
- __field(unsigned int, max_rgsize)
- __field(unsigned int, data_awu_max)
- __field(unsigned int, rt_awu_max)
+ __field(unsigned int, max_gsize)
+ __field(unsigned int, awu_max)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
+ __entry->type = type;
__entry->max_write = max_write;
__entry->max_ioend = max_ioend;
- __entry->max_agsize = max_agsize;
- __entry->max_rgsize = max_rgsize;
- __entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max;
- __entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max;
+ __entry->max_gsize = max_gsize;
+ __entry->awu_max = awu_max;
),
- TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u",
+ TP_printk("dev %d:%d %s max_write %u max_ioend %u max_gsize %u awu_max %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->max_write,
__entry->max_ioend,
- __entry->max_agsize,
- __entry->max_rgsize,
- __entry->data_awu_max,
- __entry->rt_awu_max)
+ __entry->max_gsize,
+ __entry->awu_max)
);
TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
@@ -428,8 +425,8 @@ DECLARE_EVENT_CLASS(xfs_zone_alloc_class,
__field(dev_t, dev)
__field(xfs_rgnumber_t, rgno)
__field(xfs_rgblock_t, used)
+ __field(xfs_rgblock_t, allocated)
__field(xfs_rgblock_t, written)
- __field(xfs_rgblock_t, write_pointer)
__field(xfs_rgblock_t, rgbno)
__field(xfs_extlen_t, len)
),
@@ -437,17 +434,17 @@ DECLARE_EVENT_CLASS(xfs_zone_alloc_class,
__entry->dev = rtg_mount(oz->oz_rtg)->m_super->s_dev;
__entry->rgno = rtg_rgno(oz->oz_rtg);
__entry->used = rtg_rmap(oz->oz_rtg)->i_used_blocks;
+ __entry->allocated = oz->oz_allocated;
__entry->written = oz->oz_written;
- __entry->write_pointer = oz->oz_write_pointer;
__entry->rgbno = rgbno;
__entry->len = len;
),
- TP_printk("dev %d:%d rgno 0x%x used 0x%x written 0x%x wp 0x%x rgbno 0x%x len 0x%x",
+ TP_printk("dev %d:%d rgno 0x%x used 0x%x alloced 0x%x written 0x%x rgbno 0x%x len 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rgno,
__entry->used,
+ __entry->allocated,
__entry->written,
- __entry->write_pointer,
__entry->rgbno,
__entry->len)
);
@@ -778,7 +775,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_iodone_async);
@@ -1083,7 +1079,9 @@ DEFINE_INODE_EVENT(xfs_get_acl);
#endif
DEFINE_INODE_EVENT(xfs_vm_bmap);
DEFINE_INODE_EVENT(xfs_file_ioctl);
+#ifdef CONFIG_COMPAT
DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+#endif
DEFINE_INODE_EVENT(xfs_ioctl_setattr);
DEFINE_INODE_EVENT(xfs_dir_fsync);
DEFINE_INODE_EVENT(xfs_file_fsync);
@@ -1147,6 +1145,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__field(xfs_ino_t, ino)
__field(int, count)
__field(int, pincount)
+ __field(unsigned long, iflags)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
@@ -1154,13 +1153,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__entry->ino = ip->i_ino;
__entry->count = atomic_read(&VFS_I(ip)->i_count);
__entry->pincount = atomic_read(&ip->i_pincount);
+ __entry->iflags = ip->i_flags;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
+ TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
__entry->pincount,
+ __entry->iflags,
(char *)__entry->caller_ip)
)
@@ -1250,6 +1251,8 @@ DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_inode_push_pinned);
+DEFINE_IREF_EVENT(xfs_inode_push_stale);
DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name),
@@ -1393,7 +1396,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
TP_ARGS(dqp))
DEFINE_DQUOT_EVENT(xfs_dqadjust);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
DEFINE_DQUOT_EVENT(xfs_dqattach_found);
@@ -1598,7 +1600,6 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant);
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
-DEFINE_LOGGRANT_EVENT(xfs_log_cil_return);
DECLARE_EVENT_CLASS(xfs_log_item_class,
TP_PROTO(struct xfs_log_item *lip),
@@ -1654,6 +1655,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
+DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
+DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
@@ -1859,8 +1862,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
-DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
-DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read);
DEFINE_SIMPLE_IO_EVENT(xfs_zoned_map_blocks);
@@ -1893,31 +1894,6 @@ DEFINE_EVENT(xfs_itrunc_class, name, \
DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
-TRACE_EVENT(xfs_pagecache_inval,
- TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
- TP_ARGS(ip, start, finish),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_off_t, start)
- __field(xfs_off_t, finish)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_disk_size;
- __entry->start = start;
- __entry->finish = finish;
- ),
- TP_printk("dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->start,
- __entry->finish)
-);
-
TRACE_EVENT(xfs_bunmap,
TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t fileoff, xfs_filblks_t len,
int flags, unsigned long caller_ip),
@@ -2263,14 +2239,12 @@ DEFINE_EVENT(xfs_alloc_class, name, \
DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
DEFINE_ALLOC_EVENT(xfs_alloc_cur);
DEFINE_ALLOC_EVENT(xfs_alloc_cur_right);
DEFINE_ALLOC_EVENT(xfs_alloc_cur_left);
DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup);
DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
@@ -2465,13 +2439,8 @@ DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall);
DEFINE_ATTR_EVENT(xfs_attr_node_addname);
DEFINE_ATTR_EVENT(xfs_attr_node_get);
DEFINE_ATTR_EVENT(xfs_attr_node_replace);
-DEFINE_ATTR_EVENT(xfs_attr_node_removename);
-
-DEFINE_ATTR_EVENT(xfs_attr_fillstate);
-DEFINE_ATTR_EVENT(xfs_attr_refillstate);
DEFINE_ATTR_EVENT(xfs_attr_rmtval_get);
-DEFINE_ATTR_EVENT(xfs_attr_rmtval_set);
#define DEFINE_DA_EVENT(name) \
DEFINE_EVENT(xfs_da_class, name, \
@@ -2889,7 +2858,6 @@ DEFINE_EVENT(xfs_rtdiscard_class, name, \
TP_ARGS(mp, rtbno, len))
DEFINE_RTDISCARD_EVENT(xfs_discard_rtextent);
DEFINE_RTDISCARD_EVENT(xfs_discard_rttoosmall);
-DEFINE_RTDISCARD_EVENT(xfs_discard_rtrelax);
DECLARE_EVENT_CLASS(xfs_btree_cur_class,
TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
@@ -4206,36 +4174,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src);
DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest);
-/* dedupe tracepoints */
-DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error);
-
-/* ioctl tracepoints */
-TRACE_EVENT(xfs_ioctl_clone,
- TP_PROTO(struct inode *src, struct inode *dest),
- TP_ARGS(src, dest),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(unsigned long, src_ino)
- __field(loff_t, src_isize)
- __field(unsigned long, dest_ino)
- __field(loff_t, dest_isize)
- ),
- TP_fast_assign(
- __entry->dev = src->i_sb->s_dev;
- __entry->src_ino = src->i_ino;
- __entry->src_isize = i_size_read(src);
- __entry->dest_ino = dest->i_ino;
- __entry->dest_isize = i_size_read(dest);
- ),
- TP_printk("dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->src_ino,
- __entry->src_isize,
- __entry->dest_ino,
- __entry->dest_isize)
-);
-
/* unshare tracepoints */
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
@@ -4243,7 +4181,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
/* copy on write */
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
@@ -5028,7 +4965,6 @@ DEFINE_ICLOG_EVENT(xlog_iclog_switch);
DEFINE_ICLOG_EVENT(xlog_iclog_sync);
DEFINE_ICLOG_EVENT(xlog_iclog_syncing);
DEFINE_ICLOG_EVENT(xlog_iclog_sync_done);
-DEFINE_ICLOG_EVENT(xlog_iclog_want_sync);
DEFINE_ICLOG_EVENT(xlog_iclog_wait_on);
DEFINE_ICLOG_EVENT(xlog_iclog_write);
@@ -5077,7 +5013,6 @@ DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_leaf_addname_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return);
-DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_alloc);
DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_defer_add);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c6657072361a..ece374d622b3 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -134,18 +134,14 @@ xfs_trans_dup(
}
/*
- * This is called to reserve free disk blocks and log space for the
- * given transaction. This must be done before allocating any resources
- * within the transaction.
+ * This is called to reserve free disk blocks and log space for the given
+ * transaction before allocating any resources within the transaction.
*
* This will return ENOSPC if there are not enough blocks available.
* It will sleep waiting for available log space.
- * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
- * is used by long running transactions. If any one of the reservations
- * fails then they will all be backed out.
*
- * This does not do quota reservations. That typically is done by the
- * caller afterwards.
+ * This does not do quota reservations. That typically is done by the caller
+ * afterwards.
*/
static int
xfs_trans_reserve(
@@ -158,10 +154,12 @@ xfs_trans_reserve(
int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+ ASSERT(resp->tr_logres > 0);
+
/*
- * Attempt to reserve the needed disk blocks by decrementing
- * the number needed from the number available. This will
- * fail if the count would go below zero.
+ * Attempt to reserve the needed disk blocks by decrementing the number
+ * needed from the number available. This will fail if the count would
+ * go below zero.
*/
if (blocks > 0) {
error = xfs_dec_fdblocks(mp, blocks, rsvd);
@@ -173,42 +171,20 @@ xfs_trans_reserve(
/*
* Reserve the log space needed for this transaction.
*/
- if (resp->tr_logres > 0) {
- bool permanent = false;
-
- ASSERT(tp->t_log_res == 0 ||
- tp->t_log_res == resp->tr_logres);
- ASSERT(tp->t_log_count == 0 ||
- tp->t_log_count == resp->tr_logcount);
-
- if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
- tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
- permanent = true;
- } else {
- ASSERT(tp->t_ticket == NULL);
- ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
- }
-
- if (tp->t_ticket != NULL) {
- ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
- error = xfs_log_regrant(mp, tp->t_ticket);
- } else {
- error = xfs_log_reserve(mp, resp->tr_logres,
- resp->tr_logcount,
- &tp->t_ticket, permanent);
- }
-
- if (error)
- goto undo_blocks;
+ if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES)
+ tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
+ error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount,
+ &tp->t_ticket, (tp->t_flags & XFS_TRANS_PERM_LOG_RES));
+ if (error)
+ goto undo_blocks;
- tp->t_log_res = resp->tr_logres;
- tp->t_log_count = resp->tr_logcount;
- }
+ tp->t_log_res = resp->tr_logres;
+ tp->t_log_count = resp->tr_logcount;
/*
- * Attempt to reserve the needed realtime extents by decrementing
- * the number needed from the number available. This will
- * fail if the count would go below zero.
+ * Attempt to reserve the needed realtime extents by decrementing the
+ * number needed from the number available. This will fail if the
+ * count would go below zero.
*/
if (rtextents > 0) {
error = xfs_dec_frextents(mp, rtextents);
@@ -221,18 +197,11 @@ xfs_trans_reserve(
return 0;
- /*
- * Error cases jump to one of these labels to undo any
- * reservations which have already been performed.
- */
undo_log:
- if (resp->tr_logres > 0) {
- xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
- tp->t_ticket = NULL;
- tp->t_log_res = 0;
- tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
- }
-
+ xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
+ tp->t_ticket = NULL;
+ tp->t_log_res = 0;
+ tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
undo_blocks:
if (blocks > 0) {
xfs_add_fdblocks(mp, blocks);
@@ -241,6 +210,28 @@ undo_blocks:
return error;
}
+static struct xfs_trans *
+__xfs_trans_alloc(
+ struct xfs_mount *mp,
+ uint flags)
+{
+ struct xfs_trans *tp;
+
+ ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp));
+
+ tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
+ if (!(flags & XFS_TRANS_NO_WRITECOUNT))
+ sb_start_intwrite(mp->m_super);
+ xfs_trans_set_context(tp);
+ tp->t_flags = flags;
+ tp->t_mountp = mp;
+ INIT_LIST_HEAD(&tp->t_items);
+ INIT_LIST_HEAD(&tp->t_busy);
+ INIT_LIST_HEAD(&tp->t_dfops);
+ tp->t_highest_agno = NULLAGNUMBER;
+ return tp;
+}
+
int
xfs_trans_alloc(
struct xfs_mount *mp,
@@ -254,33 +245,16 @@ xfs_trans_alloc(
bool want_retry = true;
int error;
+ ASSERT(resp->tr_logres > 0);
+
/*
* Allocate the handle before we do our freeze accounting and setting up
* GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
retry:
- tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
- if (!(flags & XFS_TRANS_NO_WRITECOUNT))
- sb_start_intwrite(mp->m_super);
- xfs_trans_set_context(tp);
-
- /*
- * Zero-reservation ("empty") transactions can't modify anything, so
- * they're allowed to run while we're frozen.
- */
- WARN_ON(resp->tr_logres > 0 &&
- mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
- ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
- xfs_has_lazysbcount(mp));
-
- tp->t_flags = flags;
- tp->t_mountp = mp;
- INIT_LIST_HEAD(&tp->t_items);
- INIT_LIST_HEAD(&tp->t_busy);
- INIT_LIST_HEAD(&tp->t_dfops);
- tp->t_highest_agno = NULLAGNUMBER;
-
+ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
+ tp = __xfs_trans_alloc(mp, flags);
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error == -ENOSPC && want_retry) {
xfs_trans_cancel(tp);
@@ -324,14 +298,11 @@ retry:
* where we can be grabbing buffers at the same time that freeze is trying to
* drain the buffer LRU list.
*/
-int
+struct xfs_trans *
xfs_trans_alloc_empty(
- struct xfs_mount *mp,
- struct xfs_trans **tpp)
+ struct xfs_mount *mp)
{
- struct xfs_trans_res resv = {0};
-
- return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
+ return __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT);
}
/*
@@ -742,8 +713,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
- if (abort)
+ if (abort) {
+ trace_xfs_trans_free_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
+ }
if (lip->li_ops->iop_release)
lip->li_ops->iop_release(lip);
}
@@ -1024,51 +997,57 @@ xfs_trans_cancel(
}
/*
- * Roll from one trans in the sequence of PERMANENT transactions to
- * the next: permanent transactions are only flushed out when
- * committed with xfs_trans_commit(), but we still want as soon
- * as possible to let chunks of it go to the log. So we commit the
- * chunk we've been working on and get a new transaction to continue.
+ * Roll from one trans in the sequence of PERMANENT transactions to the next:
+ * permanent transactions are only flushed out when committed with
+ * xfs_trans_commit(), but we still want as soon as possible to let chunks of it
+ * go to the log. So we commit the chunk we've been working on and get a new
+ * transaction to continue.
*/
int
xfs_trans_roll(
struct xfs_trans **tpp)
{
- struct xfs_trans *trans = *tpp;
- struct xfs_trans_res tres;
+ struct xfs_trans *tp = *tpp;
+ unsigned int log_res = tp->t_log_res;
+ unsigned int log_count = tp->t_log_count;
int error;
- trace_xfs_trans_roll(trans, _RET_IP_);
+ trace_xfs_trans_roll(tp, _RET_IP_);
+
+ ASSERT(log_res > 0);
/*
* Copy the critical parameters from one trans to the next.
*/
- tres.tr_logres = trans->t_log_res;
- tres.tr_logcount = trans->t_log_count;
-
- *tpp = xfs_trans_dup(trans);
+ *tpp = xfs_trans_dup(tp);
/*
* Commit the current transaction.
- * If this commit failed, then it'd just unlock those items that
- * are not marked ihold. That also means that a filesystem shutdown
- * is in progress. The caller takes the responsibility to cancel
- * the duplicate transaction that gets returned.
+ *
+ * If this commit failed, then it'd just unlock those items that are not
+ * marked ihold. That also means that a filesystem shutdown is in
+ * progress. The caller takes the responsibility to cancel the
+ * duplicate transaction that gets returned.
*/
- error = __xfs_trans_commit(trans, true);
+ error = __xfs_trans_commit(tp, true);
if (error)
return error;
/*
* Reserve space in the log for the next transaction.
- * This also pushes items in the "AIL", the list of logged items,
- * out to disk if they are taking up space at the tail of the log
- * that we want to use. This requires that either nothing be locked
- * across this call, or that anything that is locked be logged in
- * the prior and the next transactions.
+ *
+ * This also pushes items in the AIL out to disk if they are taking up
+ * space at the tail of the log that we want to use. This requires that
+ * either nothing be locked across this call, or that anything that is
+ * locked be logged in the prior and the next transactions.
*/
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- return xfs_trans_reserve(*tpp, &tres, 0, 0);
+ tp = *tpp;
+ error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
+ if (error)
+ return error;
+ tp->t_log_res = log_res;
+ tp->t_log_count = log_count;
+ return 0;
}
/*
@@ -1144,9 +1123,18 @@ xfs_trans_reserve_more(
unsigned int blocks,
unsigned int rtextents)
{
- struct xfs_trans_res resv = { };
-
- return xfs_trans_reserve(tp, &resv, blocks, rtextents);
+ bool rsvd = tp->t_flags & XFS_TRANS_RESERVE;
+
+ if (blocks && xfs_dec_fdblocks(tp->t_mountp, blocks, rsvd))
+ return -ENOSPC;
+ if (rtextents && xfs_dec_frextents(tp->t_mountp, rtextents)) {
+ if (blocks)
+ xfs_add_fdblocks(tp->t_mountp, blocks);
+ return -ENOSPC;
+ }
+ tp->t_blk_res += blocks;
+ tp->t_rtx_res += rtextents;
+ return 0;
}
/*
@@ -1161,14 +1149,13 @@ xfs_trans_reserve_more_inode(
unsigned int rblocks,
bool force_quota)
{
- struct xfs_trans_res resv = { };
struct xfs_mount *mp = ip->i_mount;
unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks);
int error;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- error = xfs_trans_reserve(tp, &resv, dblocks, rtx);
+ error = xfs_trans_reserve_more(tp, dblocks, rtx);
if (error)
return error;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 2b366851e9a4..7fb860f645a3 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -15,7 +15,6 @@ struct xfs_efd_log_item;
struct xfs_efi_log_item;
struct xfs_inode;
struct xfs_item_ops;
-struct xfs_log_iovec;
struct xfs_mount;
struct xfs_trans;
struct xfs_trans_res;
@@ -168,8 +167,7 @@ int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
struct xfs_trans **tpp);
int xfs_trans_reserve_more(struct xfs_trans *tp,
unsigned int blocks, unsigned int rtextents);
-int xfs_trans_alloc_empty(struct xfs_mount *mp,
- struct xfs_trans **tpp);
+struct xfs_trans *xfs_trans_alloc_empty(struct xfs_mount *mp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 0f641a9091ec..ac5cecec9aa1 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -243,7 +243,7 @@ __xfs_xattr_put_listent(
offset = context->buffer + context->count;
memcpy(offset, prefix, prefix_len);
offset += prefix_len;
- strncpy(offset, (char *)name, namelen); /* real name */
+ memcpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 80add26c0111..33f7eee521a8 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -434,7 +434,7 @@ xfs_init_open_zone(
spin_lock_init(&oz->oz_alloc_lock);
atomic_set(&oz->oz_ref, 1);
oz->oz_rtg = rtg;
- oz->oz_write_pointer = write_pointer;
+ oz->oz_allocated = write_pointer;
oz->oz_written = write_pointer;
oz->oz_write_hint = write_hint;
oz->oz_is_gc = is_gc;
@@ -569,7 +569,7 @@ xfs_try_use_zone(
struct xfs_open_zone *oz,
bool lowspace)
{
- if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg))
+ if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
return false;
if (!lowspace && !xfs_good_hint_match(oz, file_hint))
return false;
@@ -654,13 +654,6 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
!(ip->i_diflags & XFS_DIFLAG_APPEND);
}
-/*
- * Pick a new zone for writes.
- *
- * If we aren't using up our budget of open zones just open a new one from the
- * freelist. Else try to find one that matches the expected data lifetime. If
- * we don't find one that is good pick any zone that is available.
- */
static struct xfs_open_zone *
xfs_select_zone_nowait(
struct xfs_mount *mp,
@@ -688,7 +681,8 @@ xfs_select_zone_nowait(
goto out_unlock;
/*
- * See if we can open a new zone and use that.
+ * See if we can open a new zone and use that so that data for different
+ * files is mixed as little as possible.
*/
oz = xfs_try_open_zone(mp, write_hint);
if (oz)
@@ -727,7 +721,7 @@ xfs_select_zone(
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
- if (oz)
+ if (oz || xfs_is_shutdown(mp))
break;
schedule();
}
@@ -744,25 +738,25 @@ xfs_zone_alloc_blocks(
{
struct xfs_rtgroup *rtg = oz->oz_rtg;
struct xfs_mount *mp = rtg_mount(rtg);
- xfs_rgblock_t rgbno;
+ xfs_rgblock_t allocated;
spin_lock(&oz->oz_alloc_lock);
count_fsb = min3(count_fsb, XFS_MAX_BMBT_EXTLEN,
- (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_write_pointer);
+ (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_allocated);
if (!count_fsb) {
spin_unlock(&oz->oz_alloc_lock);
return 0;
}
- rgbno = oz->oz_write_pointer;
- oz->oz_write_pointer += count_fsb;
+ allocated = oz->oz_allocated;
+ oz->oz_allocated += count_fsb;
spin_unlock(&oz->oz_alloc_lock);
- trace_xfs_zone_alloc_blocks(oz, rgbno, count_fsb);
+ trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb);
*sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector);
if (!*is_seq)
- *sector += XFS_FSB_TO_BB(mp, rgbno);
+ *sector += XFS_FSB_TO_BB(mp, allocated);
return XFS_FSB_TO_B(mp, count_fsb);
}
@@ -777,26 +771,6 @@ xfs_mark_rtg_boundary(
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
}
-static void
-xfs_submit_zoned_bio(
- struct iomap_ioend *ioend,
- struct xfs_open_zone *oz,
- bool is_seq)
-{
- ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
- ioend->io_private = oz;
- atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
-
- if (is_seq) {
- ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
- ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
- } else {
- xfs_mark_rtg_boundary(ioend);
- }
-
- submit_bio(&ioend->io_bio);
-}
-
/*
* Cache the last zone written to for an inode so that it is considered first
* for subsequent writes.
@@ -891,6 +865,26 @@ xfs_zone_cache_create_association(
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
}
+static void
+xfs_submit_zoned_bio(
+ struct iomap_ioend *ioend,
+ struct xfs_open_zone *oz,
+ bool is_seq)
+{
+ ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
+ ioend->io_private = oz;
+ atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
+
+ if (is_seq) {
+ ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
+ ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
+ } else {
+ xfs_mark_rtg_boundary(ioend);
+ }
+
+ submit_bio(&ioend->io_bio);
+}
+
void
xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend,
@@ -983,7 +977,7 @@ xfs_zone_rgbno_is_valid(
lockdep_assert_held(&rtg_rmap(rtg)->i_lock);
if (rtg->rtg_open_zone)
- return rgbno < rtg->rtg_open_zone->oz_write_pointer;
+ return rgbno < rtg->rtg_open_zone->oz_allocated;
return !xa_get_mark(&rtg_mount(rtg)->m_groups[XG_TYPE_RTG].xa,
rtg_rgno(rtg), XFS_RTG_FREE);
}
@@ -1017,7 +1011,7 @@ xfs_init_zone(
{
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_zone_info *zi = mp->m_zone_info;
- uint64_t used = rtg_rmap(rtg)->i_used_blocks;
+ uint32_t used = rtg_rmap(rtg)->i_used_blocks;
xfs_rgblock_t write_pointer, highest_rgbno;
int error;
@@ -1114,24 +1108,27 @@ xfs_get_zone_info_cb(
}
/*
- * Calculate the max open zone limit based on the of number of
- * backing zones available
+ * Calculate the max open zone limit based on the of number of backing zones
+ * available.
*/
static inline uint32_t
xfs_max_open_zones(
struct xfs_mount *mp)
{
unsigned int max_open, max_open_data_zones;
+
/*
- * We need two zones for every open data zone,
- * one in reserve as we don't reclaim open zones. One data zone
- * and its spare is included in XFS_MIN_ZONES.
+ * We need two zones for every open data zone, one in reserve as we
+ * don't reclaim open zones. One data zone and its spare is included
+ * in XFS_MIN_ZONES to support at least one user data writer.
*/
max_open_data_zones = (mp->m_sb.sb_rgcount - XFS_MIN_ZONES) / 2 + 1;
max_open = max_open_data_zones + XFS_OPEN_GC_ZONES;
/*
- * Cap the max open limit to 1/4 of available space
+ * Cap the max open limit to 1/4 of available space. Without this we'd
+ * run out of easy reclaim targets too quickly and storage devices don't
+ * handle huge numbers of concurrent write streams overly well.
*/
max_open = min(max_open, mp->m_sb.sb_rgcount / 4);
diff --git a/fs/xfs/xfs_zone_alloc.h b/fs/xfs/xfs_zone_alloc.h
index ecf39106704c..4db02816d0fd 100644
--- a/fs/xfs/xfs_zone_alloc.h
+++ b/fs/xfs/xfs_zone_alloc.h
@@ -23,9 +23,9 @@ struct xfs_zone_alloc_ctx {
*/
#define XFS_ZR_RESERVED (1U << 2)
-int xfs_zoned_space_reserve(struct xfs_inode *ip, xfs_filblks_t count_fsb,
+int xfs_zoned_space_reserve(struct xfs_mount *mp, xfs_filblks_t count_fsb,
unsigned int flags, struct xfs_zone_alloc_ctx *ac);
-void xfs_zoned_space_unreserve(struct xfs_inode *ip,
+void xfs_zoned_space_unreserve(struct xfs_mount *mp,
struct xfs_zone_alloc_ctx *ac);
void xfs_zoned_add_available(struct xfs_mount *mp, xfs_filblks_t count_fsb);
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 9c00fc5baa30..064cd1a857a0 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -328,10 +328,7 @@ xfs_zone_gc_query(
iter->rec_idx = 0;
iter->rec_count = 0;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return error;
-
+ tp = xfs_trans_alloc_empty(mp);
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
cur = xfs_rtrmapbt_init_cursor(tp, rtg);
error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
@@ -533,8 +530,7 @@ xfs_zone_gc_steal_open(
spin_lock(&zi->zi_open_zones_lock);
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) {
- if (!found ||
- oz->oz_write_pointer < found->oz_write_pointer)
+ if (!found || oz->oz_allocated < found->oz_allocated)
found = oz;
}
@@ -584,7 +580,7 @@ xfs_zone_gc_ensure_target(
{
struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone;
- if (!oz || oz->oz_write_pointer == rtg_blocks(oz->oz_rtg))
+ if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg))
return xfs_zone_gc_select_target(mp);
return oz;
}
@@ -605,7 +601,7 @@ xfs_zone_gc_space_available(
oz = xfs_zone_gc_ensure_target(data->mp);
if (!oz)
return false;
- return oz->oz_write_pointer < rtg_blocks(oz->oz_rtg) &&
+ return oz->oz_allocated < rtg_blocks(oz->oz_rtg) &&
xfs_zone_gc_scratch_available(data);
}
@@ -647,7 +643,7 @@ xfs_zone_gc_alloc_blocks(
*/
spin_lock(&mp->m_sb_lock);
*count_fsb = min(*count_fsb,
- rtg_blocks(oz->oz_rtg) - oz->oz_write_pointer);
+ rtg_blocks(oz->oz_rtg) - oz->oz_allocated);
*count_fsb = min3(*count_fsb,
mp->m_free[XC_FREE_RTEXTENTS].res_avail,
mp->m_free[XC_FREE_RTAVAILABLE].res_avail);
@@ -661,8 +657,8 @@ xfs_zone_gc_alloc_blocks(
*daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0);
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr);
if (!*is_seq)
- *daddr += XFS_FSB_TO_BB(mp, oz->oz_write_pointer);
- oz->oz_write_pointer += *count_fsb;
+ *daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated);
+ oz->oz_allocated += *count_fsb;
atomic_inc(&oz->oz_ref);
return oz;
}
diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c
index 733bcc2f8645..07e30c596975 100644
--- a/fs/xfs/xfs_zone_info.c
+++ b/fs/xfs/xfs_zone_info.c
@@ -32,7 +32,7 @@ xfs_show_open_zone(
{
seq_printf(m, "\t zone %d, wp %u, written %u, used %u, hint %s\n",
rtg_rgno(oz->oz_rtg),
- oz->oz_write_pointer, oz->oz_written,
+ oz->oz_allocated, oz->oz_written,
rtg_rmap(oz->oz_rtg)->i_used_blocks,
xfs_write_hint_to_str(oz->oz_write_hint));
}
diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h
index ab696975a993..35e6de3d25ed 100644
--- a/fs/xfs/xfs_zone_priv.h
+++ b/fs/xfs/xfs_zone_priv.h
@@ -11,18 +11,18 @@ struct xfs_open_zone {
atomic_t oz_ref;
/*
- * oz_write_pointer is the write pointer at which space is handed out
- * for conventional zones, or simple the count of blocks handed out
- * so far for sequential write required zones and is protected by
- * oz_alloc_lock/
+ * oz_allocated is the amount of space already allocated out of the zone
+ * and is protected by oz_alloc_lock.
+ *
+ * For conventional zones it also is the offset of the next write.
*/
spinlock_t oz_alloc_lock;
- xfs_rgblock_t oz_write_pointer;
+ xfs_rgblock_t oz_allocated;
/*
- * oz_written is the number of blocks for which we've received a
- * write completion. oz_written must always be <= oz_write_pointer
- * and is protected by the ILOCK of the rmap inode.
+ * oz_written is the number of blocks for which we've received a write
+ * completion. oz_written must always be <= oz_allocated and is
+ * protected by the ILOCK of the rmap inode.
*/
xfs_rgblock_t oz_written;
diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c
index 93c9a7721139..1313c55b8cbe 100644
--- a/fs/xfs/xfs_zone_space_resv.c
+++ b/fs/xfs/xfs_zone_space_resv.c
@@ -117,11 +117,10 @@ xfs_zoned_space_wait_error(
static int
xfs_zoned_reserve_available(
- struct xfs_inode *ip,
+ struct xfs_mount *mp,
xfs_filblks_t count_fsb,
unsigned int flags)
{
- struct xfs_mount *mp = ip->i_mount;
struct xfs_zone_info *zi = mp->m_zone_info;
struct xfs_zone_reservation reservation = {
.task = current,
@@ -198,11 +197,10 @@ xfs_zoned_reserve_available(
*/
static int
xfs_zoned_reserve_extents_greedy(
- struct xfs_inode *ip,
+ struct xfs_mount *mp,
xfs_filblks_t *count_fsb,
unsigned int flags)
{
- struct xfs_mount *mp = ip->i_mount;
struct xfs_zone_info *zi = mp->m_zone_info;
s64 len = *count_fsb;
int error = -ENOSPC;
@@ -220,12 +218,11 @@ xfs_zoned_reserve_extents_greedy(
int
xfs_zoned_space_reserve(
- struct xfs_inode *ip,
+ struct xfs_mount *mp,
xfs_filblks_t count_fsb,
unsigned int flags,
struct xfs_zone_alloc_ctx *ac)
{
- struct xfs_mount *mp = ip->i_mount;
int error;
ASSERT(ac->reserved_blocks == 0);
@@ -234,11 +231,11 @@ xfs_zoned_space_reserve(
error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
flags & XFS_ZR_RESERVED);
if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
- error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags);
+ error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
if (error)
return error;
- error = xfs_zoned_reserve_available(ip, count_fsb, flags);
+ error = xfs_zoned_reserve_available(mp, count_fsb, flags);
if (error) {
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
return error;
@@ -249,12 +246,10 @@ xfs_zoned_space_reserve(
void
xfs_zoned_space_unreserve(
- struct xfs_inode *ip,
+ struct xfs_mount *mp,
struct xfs_zone_alloc_ctx *ac)
{
if (ac->reserved_blocks > 0) {
- struct xfs_mount *mp = ip->i_mount;
-
xfs_zoned_add_available(mp, ac->reserved_blocks);
xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
}
diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h
index 8d3009dd28ff..8348c116aa3b 100644
--- a/include/asm-generic/param.h
+++ b/include/asm-generic/param.h
@@ -6,6 +6,6 @@
# undef HZ
# define HZ CONFIG_HZ /* Internal kernel timer frequency */
-# define USER_HZ 100 /* some user interfaces are */
+# define USER_HZ __USER_HZ /* some user interfaces are */
# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */
#endif /* __ASM_GENERIC_PARAM_H */
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 6f6b9de12cd3..db294d452e8c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -202,6 +202,8 @@ struct shash_desc {
#define HASH_REQUEST_CLONE(name, gfp) \
hash_request_clone(name, sizeof(__##name##_req), gfp)
+#define CRYPTO_HASH_STATESIZE(coresize, blocksize) (coresize + blocksize + 1)
+
/**
* struct shash_alg - synchronous message digest definition
* @init: see struct ahash_alg
diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h
index b9bccd3ff57f..21a27fd5e198 100644
--- a/include/crypto/internal/sha2.h
+++ b/include/crypto/internal/sha2.h
@@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
const u8 *data, size_t nblocks);
-static inline void sha256_choose_blocks(
+static __always_inline void sha256_choose_blocks(
u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks,
bool force_generic, bool force_simd)
{
diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
index 7e7f1ac3b7fd..9e338e7aafbd 100644
--- a/include/crypto/internal/simd.h
+++ b/include/crypto/internal/simd.h
@@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count,
*
* This delegates to may_use_simd(), except that this also returns false if SIMD
* in crypto code has been temporarily disabled on this CPU by the crypto
- * self-tests, in order to test the no-SIMD fallback code.
+ * self-tests, in order to test the no-SIMD fallback code. This override is
+ * currently limited to configurations where the "full" self-tests are enabled,
+ * because it might be a bit too invasive to be part of the "fast" self-tests.
*/
-#ifdef CONFIG_CRYPTO_SELFTESTS
+#ifdef CONFIG_CRYPTO_SELFTESTS_FULL
DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test);
#define crypto_simd_usable() \
(may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test))
diff --git a/include/crypto/md5.h b/include/crypto/md5.h
index 198b5d69b92f..28ee533a0507 100644
--- a/include/crypto/md5.h
+++ b/include/crypto/md5.h
@@ -2,6 +2,7 @@
#ifndef _CRYPTO_MD5_H
#define _CRYPTO_MD5_H
+#include <crypto/hash.h>
#include <linux/types.h>
#define MD5_DIGEST_SIZE 16
@@ -15,6 +16,9 @@
#define MD5_H2 0x98badcfeUL
#define MD5_H3 0x10325476UL
+#define CRYPTO_MD5_STATESIZE \
+ CRYPTO_HASH_STATESIZE(MD5_STATE_SIZE, MD5_HMAC_BLOCK_SIZE)
+
extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE];
struct md5_state {
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 9689a7c5dd36..513837632b7d 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -160,6 +160,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
u64 new_size,
struct list_head *blocks);
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+
void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
void drm_buddy_free_list(struct drm_buddy *mm,
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 5c3b2aa3e69d..d344d41e6cfe 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -300,6 +300,9 @@ struct drm_file {
*
* Mapping of mm object handles to object pointers. Used by the GEM
* subsystem. Protected by @table_lock.
+ *
+ * Note that allocated entries might be NULL as a transient state when
+ * creating or deleting a handle.
*/
struct idr object_idr;
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index 668077009fce..38b24fc8978d 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -23,6 +23,7 @@
#ifndef __DRM_FRAMEBUFFER_H__
#define __DRM_FRAMEBUFFER_H__
+#include <linux/bits.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/sched.h>
@@ -100,6 +101,8 @@ struct drm_framebuffer_funcs {
unsigned num_clips);
};
+#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i))
+
/**
* struct drm_framebuffer - frame buffer object
*
@@ -189,6 +192,10 @@ struct drm_framebuffer {
*/
int flags;
/**
+ * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF.
+ */
+ unsigned int internal_flags;
+ /**
* @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock.
*/
struct list_head filp_head;
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index b37860f4a895..6d2c08e81101 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -223,6 +223,9 @@ struct mipi_dsi_multi_context {
#define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev)
+extern const struct bus_type mipi_dsi_bus_type;
+#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type)
+
/**
* mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any
* given pixel format defined by the MIPI DSI
diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h
index 125f096c88cb..ee9df8cc67b7 100644
--- a/include/drm/spsc_queue.h
+++ b/include/drm/spsc_queue.h
@@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n
preempt_disable();
+ atomic_inc(&queue->job_count);
+ smp_mb__after_atomic();
+
tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next);
WRITE_ONCE(*tail, node);
- atomic_inc(&queue->job_count);
/*
* In case of first element verify new node will be visible to the consumer
diff --git a/include/dt-bindings/clock/sun8i-v3s-ccu.h b/include/dt-bindings/clock/sun8i-v3s-ccu.h
index 014ac6123d17..c4055629c9f9 100644
--- a/include/dt-bindings/clock/sun8i-v3s-ccu.h
+++ b/include/dt-bindings/clock/sun8i-v3s-ccu.h
@@ -96,7 +96,7 @@
#define CLK_TCON0 64
#define CLK_CSI_MISC 65
#define CLK_CSI0_MCLK 66
-#define CLK_CSI1_SCLK 67
+#define CLK_CSI_SCLK 67
#define CLK_CSI1_MCLK 68
#define CLK_VE 69
#define CLK_AC_DIG 70
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 784ebe4607a4..50b47eba7d01 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -113,7 +113,7 @@
/* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */
#define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\
- 0xb66fb428, 0xc52e, 0xe911, \
+ 0x28b46fb6, 0x2ec5, 0x11e9, \
0xa9, 0xca, 0x4b, 0x56, \
0x4d, 0x00, 0x3a, 0x74)
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 5bded24dc24f..e1634897e159 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr {
u32 offset;
u32 send_recv_id;
u32 size;
+ u32 res1;
uuid_t uuid;
};
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 9b02961d65ee..45f2f278b50a 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb)
ATM_SKB(skb)->atm_options = vcc->atm_options;
}
+static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize,
+ &sk_atm(vcc)->sk_wmem_alloc));
+}
+
static inline void atm_force_charge(struct atm_vcc *vcc,int truesize)
{
atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9c37c66ef9ca..46ffac5caab7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
fi->folio = page_folio(bvec->bv_page);
fi->offset = bvec->bv_offset +
- PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
+ PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page);
fi->_seg_count = bvec->bv_len;
fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count);
fi->_next = folio_next(fi->folio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a59880c809c7..181a0deadc9e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER)
* however we constrain this to what we can validate and test.
*/
#define BLK_MAX_BLOCK_SIZE SZ_64K
+#else
+#define BLK_MAX_BLOCK_SIZE PAGE_SIZE
+#endif
+
/* blk_validate_limits() validates bsize, so drivers don't usually need to */
static inline int blk_validate_block_size(unsigned long bsize)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 0029ff880e27..b16b88bfbc3e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -262,14 +262,12 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
struct folio **foliop, get_block_t *get_block);
int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
get_block_t *get_block);
-int block_write_end(struct file *, struct address_space *,
- loff_t, unsigned len, unsigned copied,
- struct folio *, void *);
-int generic_write_end(struct file *, struct address_space *,
+int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *);
+int generic_write_end(const struct kiocb *, struct address_space *,
loff_t, unsigned len, unsigned copied,
struct folio *, void *);
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
-int cont_write_begin(struct file *, struct address_space *, loff_t,
+int cont_write_begin(const struct kiocb *, struct address_space *, loff_t,
unsigned, struct folio **, void **,
get_block_t *, loff_t *);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 204b22a99c4b..0a80e1f9aa20 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page,
* @offset: offset into the folio
*/
static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio,
- unsigned int len, unsigned int offset)
+ size_t len, size_t offset)
{
- bvec_set_page(bv, &folio->page, len, offset);
+ unsigned long nr = offset / PAGE_SIZE;
+
+ WARN_ON_ONCE(len > UINT_MAX);
+ bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE);
}
/**
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 7093e1d08af0..bee606bebaca 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -277,6 +277,14 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
class_##_name##_t var __cleanup(class_##_name##_destructor) = \
class_##_name##_constructor
+#define scoped_class(_name, var, args) \
+ for (CLASS(_name, var)(args); \
+ __guard_ptr(_name)(&var) || !__is_cond_ptr(_name); \
+ ({ goto _label; })) \
+ if (0) { \
+_label: \
+ break; \
+ } else
/*
* DEFINE_GUARD(name, type, lock, unlock):
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 76e41805b92d..96e8a66da133 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -43,7 +43,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len);
-extern void do_coredump(const kernel_siginfo_t *siginfo);
+extern void vfs_coredump(const kernel_siginfo_t *siginfo);
/*
* Logging for the coredump code, ratelimited.
@@ -63,7 +63,7 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
#else
-static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
+static inline void vfs_coredump(const kernel_siginfo_t *siginfo) {}
#define coredump_report(...)
#define coredump_report_failure(...)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e6089abc28e2..6378370a952f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -120,6 +121,7 @@ extern void cpu_maps_update_begin(void);
extern void cpu_maps_update_done(void);
int bringup_hibernate_cpu(unsigned int sleep_cpu);
void bringup_nonboot_cpus(unsigned int max_cpus);
+int arch_cpu_rescan_dead_smt_siblings(void);
#else /* CONFIG_SMP */
#define cpuhp_tasks_frozen 0
@@ -134,6 +136,8 @@ static inline void cpu_maps_update_done(void)
static inline int add_cpu(unsigned int cpu) { return 0;}
+static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; }
+
#endif /* CONFIG_SMP */
extern const struct bus_type cpu_subsys;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 5658a3bfe803..a102a10f833f 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -263,6 +263,8 @@ static inline void put_cred(const struct cred *cred)
put_cred_many(cred, 1);
}
+DEFINE_FREE(put_cred, struct cred *, if (!IS_ERR_OR_NULL(_T)) put_cred(_T))
+
/**
* current_cred - Access the current task's subjective credentials
*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e29823c701ac..cc3e1c1a3454 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -237,7 +237,6 @@ extern void d_instantiate_new(struct dentry *, struct inode *);
extern void __d_drop(struct dentry *dentry);
extern void d_drop(struct dentry *dentry);
extern void d_delete(struct dentry *);
-extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
@@ -245,6 +244,9 @@ extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
wait_queue_head_t *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+/* weird procfs mess; *NOT* exported */
+extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *,
+ const struct dentry_operations *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
const struct qstr *name);
@@ -604,4 +606,6 @@ static inline struct dentry *d_next_sibling(const struct dentry *dentry)
return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib);
}
+void set_default_d_op(struct super_block *, const struct dentry_operations *);
+
#endif /* __LINUX_DCACHE_H */
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index ca42d5e46ccc..3be35680a54f 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -54,7 +54,7 @@ enum execmem_range_flags {
EXECMEM_ROX_CACHE = (1 << 1),
};
-#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM)
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
/**
* execmem_fill_trapping_insns - set memory to contain instructions that
* will trap
@@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size);
* Return: 0 on success or negative error code on failure.
*/
int execmem_restore_rox(void *ptr, size_t size);
-
-/*
- * Called from mark_readonly(), where the system transitions to ROX.
- */
-void execmem_cache_make_ro(void);
#else
static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
-static inline void execmem_cache_make_ro(void) { }
#endif
/**
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 25c4a5afbd44..cfb0dd1ea49c 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -230,7 +230,7 @@ struct handle_to_path_ctx {
* directory. The name should be stored in the @name (with the
* understanding that it is already pointing to a %NAME_MAX+1 sized
* buffer. get_name() should return %0 on success, a negative error code
- * or error. @get_name will be called without @parent->i_mutex held.
+ * or error. @get_name will be called without @parent->i_rwsem held.
*
* get_parent:
* @get_parent should find the parent directory for the given @child which
@@ -247,7 +247,7 @@ struct handle_to_path_ctx {
* @commit_metadata should commit metadata changes to stable storage.
*
* Locking rules:
- * get_parent is called with child->d_inode->i_mutex down
+ * get_parent is called with child->d_inode->i_rwsem down
* get_name is not (which is possibly inconsistent)
*/
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index c412ded9171e..c2ce8ba05d06 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -175,9 +175,14 @@ static inline bool lock_is_write(struct file_lock *fl)
return fl->c.flc_type == F_WRLCK;
}
+static inline void locks_wake_up_waiter(struct file_lock_core *flc)
+{
+ wake_up(&flc->flc_wait);
+}
+
static inline void locks_wake_up(struct file_lock *fl)
{
- wake_up(&fl->c.flc_wait);
+ locks_wake_up_waiter(&fl->c);
}
static inline bool locks_can_async_lock(const struct file_operations *fops)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 96c7925a6551..0463c662b74a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -399,7 +399,9 @@ struct readahead_control;
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
- { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
+ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
+ { IOCB_AIO_RW, "AIO_RW" }, \
+ { IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
struct kiocb {
struct file *ki_filp;
@@ -444,10 +446,10 @@ struct address_space_operations {
void (*readahead)(struct readahead_control *);
- int (*write_begin)(struct file *, struct address_space *mapping,
+ int (*write_begin)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len,
struct folio **foliop, void **fsdata);
- int (*write_end)(struct file *, struct address_space *mapping,
+ int (*write_end)(const struct kiocb *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct folio *folio, void *fsdata);
@@ -837,7 +839,7 @@ static inline void inode_fake_hash(struct inode *inode)
}
/*
- * inode->i_mutex nesting subclasses for the lock validator:
+ * inode->i_rwsem nesting subclasses for the lock validator:
*
* 0: the object of the current VFS operation
* 1: parent
@@ -989,7 +991,7 @@ static inline loff_t i_size_read(const struct inode *inode)
/*
* NOTE: unlike i_size_read(), i_size_write() does need locking around it
- * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
+ * (normally i_rwsem), otherwise on 32bit/SMP an update of i_size_seqcount
* can be lost, resulting in subsequent i_size_read() calls spinning forever.
*/
static inline void i_size_write(struct inode *inode, loff_t i_size)
@@ -1400,6 +1402,7 @@ struct super_block {
char s_sysfs_name[UUID_STRING_LEN + 1];
unsigned int s_max_links;
+ unsigned int s_d_flags; /* default d_flags for dentries */
/*
* The next field is for VFS *only*. No filesystems have any business
@@ -1413,7 +1416,7 @@ struct super_block {
*/
const char *s_subtype;
- const struct dentry_operations *s_d_op; /* default d_op for dentries */
+ const struct dentry_operations *__s_d_op; /* default d_op for dentries */
struct shrinker *s_shrink; /* per-sb shrinker handle */
@@ -1921,7 +1924,7 @@ static inline void sb_end_intwrite(struct super_block *sb)
* freeze protection should be the outermost lock. In particular, we have:
*
* sb_start_write
- * -> i_mutex (write path, truncate, directory ops, ...)
+ * -> i_rwsem (write path, truncate, directory ops, ...)
* -> s_umount (freeze_super, thaw_super)
*/
static inline void sb_start_write(struct super_block *sb)
@@ -2004,20 +2007,20 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
/**
* struct renamedata - contains all information required for renaming
* @old_mnt_idmap: idmap of the old mount the inode was found from
- * @old_dir: parent of source
+ * @old_parent: parent of source
* @old_dentry: source
* @new_mnt_idmap: idmap of the new mount the inode was found from
- * @new_dir: parent of destination
+ * @new_parent: parent of destination
* @new_dentry: destination
* @delegated_inode: returns an inode needing a delegation break
* @flags: rename flags
*/
struct renamedata {
struct mnt_idmap *old_mnt_idmap;
- struct inode *old_dir;
+ struct dentry *old_parent;
struct dentry *old_dentry;
struct mnt_idmap *new_mnt_idmap;
- struct inode *new_dir;
+ struct dentry *new_parent;
struct dentry *new_dentry;
struct inode **delegated_inode;
unsigned int flags;
@@ -2274,10 +2277,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file)
return true;
}
+int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma);
+
static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
{
- if (WARN_ON_ONCE(file->f_op->mmap_prepare))
- return -EINVAL;
+ if (file->f_op->mmap_prepare)
+ return compat_vma_mmap_prepare(file, vma);
return file->f_op->mmap(file, vma);
}
@@ -2864,7 +2869,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
const struct cred *cred);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
-struct path *backing_file_user_path(struct file *f);
+struct path *backing_file_user_path(const struct file *f);
/*
* When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
@@ -2876,14 +2881,14 @@ struct path *backing_file_user_path(struct file *f);
* by fstat() on that same fd.
*/
/* Get the path to display in /proc/<pid>/maps */
-static inline const struct path *file_user_path(struct file *f)
+static inline const struct path *file_user_path(const struct file *f)
{
if (unlikely(f->f_mode & FMODE_BACKING))
return backing_file_user_path(f);
return &f->f_path;
}
/* Get the inode whose inode number to display in /proc/<pid>/maps */
-static inline const struct inode *file_user_inode(struct file *f)
+static inline const struct inode *file_user_inode(const struct file *f)
{
if (unlikely(f->f_mode & FMODE_BACKING))
return d_inode(backing_file_user_path(f)->dentry);
@@ -3264,6 +3269,22 @@ static inline bool is_dot_dotdot(const char *name, size_t len)
(len == 1 || (len == 2 && name[1] == '.'));
}
+/**
+ * name_contains_dotdot - check if a file name contains ".." path components
+ *
+ * Search for ".." surrounded by either '/' or start/end of string.
+ */
+static inline bool name_contains_dotdot(const char *name)
+{
+ size_t name_len;
+
+ name_len = strlen(name);
+ return strcmp(name, "..") == 0 ||
+ strncmp(name, "../", 3) == 0 ||
+ strstr(name, "/../") != NULL ||
+ (name_len >= 3 && strcmp(name + name_len - 3, "/..") == 0);
+}
+
#include <linux/err.h>
/* needed for stackable file system support */
@@ -3595,17 +3616,21 @@ extern int simple_rename(struct mnt_idmap *, struct inode *,
unsigned int);
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
+extern void locked_recursive_removal(struct dentry *,
+ void (*callback)(struct dentry *));
extern int noop_fsync(struct file *, loff_t, loff_t, int);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
-extern int simple_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata);
+extern int simple_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata);
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode);
extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
-extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
@@ -3619,6 +3644,7 @@ extern int simple_fill_super(struct super_block *, unsigned long,
const struct tree_descr *);
extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
extern void simple_release_fs(struct vfsmount **mount, int *count);
+struct dentry *simple_start_creating(struct dentry *, const char *);
extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
loff_t *ppos, const void *from, size_t available);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index a19e4bd32e4d..7773eb870039 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -200,7 +200,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt,
*/
#define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__)
#define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__)
-#define infofc(p, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__)
+#define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__)
/**
* warnf - Store supplementary warning message
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 2b1f74b24070..0cc2fa283305 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -3,7 +3,7 @@
#define _LINUX_FS_STACK_H
/* This file defines generic functions used primarily by stackable
- * filesystems; none of these functions require i_mutex to be held.
+ * filesystems; none of these functions require i_rwsem to be held.
*/
#include <linux/fs.h>
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 005b040c4791..b37193653e6b 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm);
static inline void futex_mm_init(struct mm_struct *mm)
{
RCU_INIT_POINTER(mm->futex_phash, NULL);
+ mm->futex_phash_new = NULL;
mutex_init(&mm->futex_hash_lock);
}
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ce377f7fb912..996be3c2cff0 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -663,18 +663,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc)
}
/**
- * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon
- * @fc: frame control bytes in little-endian byteorder
- * Return: whether or not the frame is an S1G short beacon,
- * i.e. it is an S1G beacon with 'next TBTT' flag set
- */
-static inline bool ieee80211_is_s1g_short_beacon(__le16 fc)
-{
- return ieee80211_is_s1g_beacon(fc) &&
- (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT));
-}
-
-/**
* ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM
* @fc: frame control bytes in little-endian byteorder
* Return: whether or not the frame is an ATIM frame
@@ -1278,7 +1266,7 @@ struct ieee80211_ext {
u8 sa[ETH_ALEN];
__le32 timestamp;
u8 change_seq;
- u8 variable[0];
+ u8 variable[];
} __packed s1g_beacon;
} u;
} __packed __aligned(2);
@@ -1536,7 +1524,7 @@ struct ieee80211_mgmt {
u8 action_code;
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed tdls_discover_resp;
struct {
u8 action_code;
@@ -1721,35 +1709,35 @@ struct ieee80211_tdls_data {
struct {
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed setup_req;
struct {
__le16 status_code;
u8 dialog_token;
__le16 capability;
- u8 variable[0];
+ u8 variable[];
} __packed setup_resp;
struct {
__le16 status_code;
u8 dialog_token;
- u8 variable[0];
+ u8 variable[];
} __packed setup_cfm;
struct {
__le16 reason_code;
- u8 variable[0];
+ u8 variable[];
} __packed teardown;
struct {
u8 dialog_token;
- u8 variable[0];
+ u8 variable[];
} __packed discover_req;
struct {
u8 target_channel;
u8 oper_class;
- u8 variable[0];
+ u8 variable[];
} __packed chan_switch_req;
struct {
__le16 status_code;
- u8 variable[0];
+ u8 variable[];
} __packed chan_switch_resp;
} u;
} __packed;
@@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb)
return false;
}
+/**
+ * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon
+ * @fc: frame control bytes in little-endian byteorder
+ * @variable: pointer to the beacon frame elements
+ * @variable_len: length of the frame elements
+ * Return: whether or not the frame is an S1G short beacon. As per
+ * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall
+ * always be present as the first element in beacon frames generated at a
+ * TBTT (Target Beacon Transmission Time), so any frame not containing
+ * this element must have been generated at a TSBTT (Target Short Beacon
+ * Transmission Time) that is not a TBTT. Additionally, short beacons are
+ * prohibited from containing the S1G beacon compatibility element as per
+ * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with
+ * either no elements or the first element is not the beacon compatibility
+ * element, we have a short beacon.
+ */
+static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable,
+ size_t variable_len)
+{
+ if (!ieee80211_is_s1g_beacon(fc))
+ return false;
+
+ /*
+ * If the frame does not contain at least 1 element (this is perfectly
+ * valid in a short beacon) and is an S1G beacon, we have a short
+ * beacon.
+ */
+ if (variable_len < 2)
+ return true;
+
+ return variable[0] != WLAN_EID_S1G_BCN_COMPAT;
+}
+
struct element {
u8 id;
u8 datalen;
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index 55cfebc658e6..8a2f652a05ff 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -119,6 +119,7 @@ int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
struct icc_node *icc_node_create_dyn(void);
struct icc_node *icc_node_create(int id);
void icc_node_destroy(int id);
+int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name);
int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node);
int icc_link_create(struct icc_node *node, const int dst_id);
void icc_node_add(struct icc_node *node, struct icc_provider *provider);
@@ -152,6 +153,12 @@ static inline void icc_node_destroy(int id)
{
}
+static inline int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider,
+ const char *name)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node)
{
return -EOPNOTSUPP;
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 2922635986f5..a7efcec2e3d0 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -698,6 +698,8 @@ struct io_kiocb {
struct hlist_node hash_node;
/* For IOPOLL setup queues, with hybrid polling */
u64 iopoll_start;
+ /* for private io_kiocb freeing */
+ struct rcu_head rcu_head;
};
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h
index dd8d1d138544..224ac28e88d7 100644
--- a/include/linux/irqchip/irq-msi-lib.h
+++ b/include/linux/irqchip/irq-msi-lib.h
@@ -17,6 +17,7 @@
#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
+struct msi_domain_info;
int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 5428edd90982..8358b4cd7ba6 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -28,6 +28,7 @@ struct ism_dmb {
struct ism_dev {
spinlock_t lock; /* protects the ism device */
+ spinlock_t cmd_lock; /* serializes cmds */
struct list_head list;
struct pci_dev *pdev;
diff --git a/include/linux/key.h b/include/linux/key.h
index ba05de8579ec..81b8f05c6898 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -236,7 +236,7 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
-#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */
+#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */
/* the key type and key description string
* - the desc is used to match a key against search criteria
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 93a73c076d16..fbd424b2abb1 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref;
extern void kmemleak_not_leak(const void *ptr) __ref;
extern void kmemleak_transient_leak(const void *ptr) __ref;
extern void kmemleak_ignore(const void *ptr) __ref;
+extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
extern void kmemleak_no_scan(const void *ptr) __ref;
extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
@@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr)
static inline void kmemleak_transient_leak(const void *ptr)
{
}
+static inline void kmemleak_ignore_percpu(const void __percpu *ptr)
+{
+}
static inline void kmemleak_ignore(const void *ptr)
{
}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 31be45fd47a6..1e5aec839041 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm);
int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm);
unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
const struct ata_acpi_gtm *gtm);
-int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm);
+int ata_acpi_cbl_pata_type(struct ata_port *ap);
#else
static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap)
{
@@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev,
return 0;
}
-static inline int ata_acpi_cbl_80wire(struct ata_port *ap,
- const struct ata_acpi_gtm *gtm)
+static inline int ata_acpi_cbl_pata_type(struct ata_port *ap)
{
- return 0;
+ return ATA_CBL_PATA40;
}
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ef2ba0c667a..fa538feaa8d9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
return percpu_counter_read_positive(&mm->rss_stat[member]);
}
+static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member)
+{
+ return percpu_counter_sum_positive(&mm->rss_stat[member]);
+}
+
void mm_trace_rss_stat(struct mm_struct *mm, int member);
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
diff --git a/include/linux/module.h b/include/linux/module.h
index 92e1420fccdf..5faa1fb1f4b4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -586,11 +586,6 @@ struct module {
atomic_t refcnt;
#endif
-#ifdef CONFIG_MITIGATION_ITS
- int its_num_pages;
- void **its_page_array;
-#endif
-
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 4880f434c021..5f9c053b0897 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -35,9 +35,6 @@ enum mount_flags {
MNT_SHRINKABLE = 0x100,
MNT_WRITE_HOLD = 0x200,
- MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */
- MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */
-
MNT_INTERNAL = 0x4000,
MNT_LOCK_ATIME = 0x040000,
@@ -48,25 +45,15 @@ enum mount_flags {
MNT_LOCKED = 0x800000,
MNT_DOOMED = 0x1000000,
MNT_SYNC_UMOUNT = 0x2000000,
- MNT_MARKED = 0x4000000,
MNT_UMOUNT = 0x8000000,
- /*
- * MNT_SHARED_MASK is the set of flags that should be cleared when a
- * mount becomes shared. Currently, this is only the flag that says a
- * mount cannot be bind mounted, since this is how we create a mount
- * that shares events with another mount. If you add a new MNT_*
- * flag, consider how it interacts with shared mounts.
- */
- MNT_SHARED_MASK = MNT_UNBINDABLE,
MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
| MNT_READONLY | MNT_NOSYMFOLLOW,
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
- MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
- MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
- MNT_LOCKED,
+ MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
+ MNT_SYNC_UMOUNT | MNT_LOCKED
};
struct vfsmount {
@@ -98,6 +85,7 @@ int mnt_get_write_access(struct vfsmount *mnt);
void mnt_put_write_access(struct vfsmount *mnt);
extern struct vfsmount *fc_mount(struct fs_context *fc);
+extern struct vfsmount *fc_mount_longterm(struct fs_context *fc);
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
@@ -116,10 +104,8 @@ extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
int do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(const struct path *);
-extern void drop_collected_mounts(struct vfsmount *);
-extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
- struct vfsmount *);
+extern struct path *collect_paths(const struct path *, struct path *, unsigned);
+extern void drop_collected_paths(struct path *, struct path *);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
extern int cifs_root_data(char **dev, char **opts);
diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h
index e8462deda6db..f0aa098a395f 100644
--- a/include/linux/mtd/nand-qpic-common.h
+++ b/include/linux/mtd/nand-qpic-common.h
@@ -237,6 +237,9 @@
* @last_data_desc - last DMA desc in data channel (tx/rx).
* @last_cmd_desc - last DMA desc in command channel.
* @txn_done - completion for NAND transfer.
+ * @bam_ce_nitems - the number of elements in the @bam_ce array
+ * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array
+ * @data_sgl_nitems - the number of elements in the @data_sgl array
* @bam_ce_pos - the index in bam_ce which is available for next sgl
* @bam_ce_start - the index in bam_ce which marks the start position ce
* for current sgl. It will be used for size calculation
@@ -255,6 +258,11 @@ struct bam_transaction {
struct dma_async_tx_descriptor *last_data_desc;
struct dma_async_tx_descriptor *last_cmd_desc;
struct completion txn_done;
+
+ unsigned int bam_ce_nitems;
+ unsigned int cmd_sgl_nitems;
+ unsigned int data_sgl_nitems;
+
struct_group(bam_positions,
u32 bam_ce_pos;
u32 bam_ce_start;
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 5daf80df9e89..b74a539ec581 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser);
deregister_mtd_parser)
int mtd_add_partition(struct mtd_info *master, const char *name,
- long long offset, long long length, struct mtd_info **part);
+ long long offset, long long length);
int mtd_del_partition(struct mtd_info *master, int partno);
uint64_t mtd_get_device_size(const struct mtd_info *mtd);
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 811a0f356315..15eaa09da998 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -113,11 +113,12 @@
SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \
SPI_MEM_OP_MAX_FREQ(freq))
-#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \
SPI_MEM_OP_ADDR(2, addr, 2), \
SPI_MEM_OP_DUMMY(ndummy, 2), \
- SPI_MEM_OP_DATA_IN(len, buf, 2))
+ SPI_MEM_OP_DATA_IN(len, buf, 2), \
+ SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0))
#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \
@@ -151,11 +152,12 @@
SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \
SPI_MEM_OP_MAX_FREQ(freq))
-#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \
+#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \
SPI_MEM_OP_ADDR(2, addr, 4), \
SPI_MEM_OP_DUMMY(ndummy, 4), \
- SPI_MEM_OP_DATA_IN(len, buf, 4))
+ SPI_MEM_OP_DATA_IN(len, buf, 4), \
+ SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0))
#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 065c17385e53..185bd8196503 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -265,21 +265,20 @@ struct netfs_io_request {
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
refcount_t ref;
unsigned long flags;
-#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
-#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
-#define NETFS_RREQ_FAILED 4 /* The request failed */
-#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */
-#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
-#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
-#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
+#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */
+#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */
+#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */
+#define NETFS_RREQ_FAILED 3 /* The request failed */
+#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */
+#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */
+#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */
+#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */
+#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */
+#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
-#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
-#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
-#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
- void (*cleanup)(struct netfs_io_request *req);
};
/*
@@ -443,7 +442,6 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
size_t max_size, size_t max_segs);
void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq);
void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error);
-void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
int netfs_start_io_read(struct inode *inode);
void netfs_end_io_read(struct inode *inode);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e63fbfbd5b0f..ce2bcdcadb73 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -751,6 +751,33 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
fgf_t fgp_flags, gfp_t gfp);
/**
+ * write_begin_get_folio - Get folio for write_begin with flags.
+ * @iocb: The kiocb passed from write_begin (may be NULL).
+ * @mapping: The address space to search.
+ * @index: The page cache index.
+ * @len: Length of data being written.
+ *
+ * This is a helper for filesystem write_begin() implementations.
+ * It wraps __filemap_get_folio(), setting appropriate flags in
+ * the write begin context.
+ *
+ * Return: A folio or an ERR_PTR.
+ */
+static inline struct folio *write_begin_get_folio(const struct kiocb *iocb,
+ struct address_space *mapping, pgoff_t index, size_t len)
+{
+ fgf_t fgp_flags = FGP_WRITEBEGIN;
+
+ fgp_flags |= fgf_set_order(len);
+
+ if (iocb && iocb->ki_flags & IOCB_DONTCACHE)
+ fgp_flags |= FGP_DONTCACHE;
+
+ return __filemap_get_folio(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping));
+}
+
+/**
* filemap_get_folio - Find and get a folio.
* @mapping: The address_space to search.
* @index: The page index.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 52dc7cfab0e0..ec9d96025683 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -635,8 +635,46 @@ struct perf_addr_filter_range {
unsigned long size;
};
-/**
- * enum perf_event_state - the states of an event:
+/*
+ * The normal states are:
+ *
+ * ACTIVE --.
+ * ^ |
+ * | |
+ * sched_{in,out}() |
+ * | |
+ * v |
+ * ,---> INACTIVE --+ <-.
+ * | | |
+ * | {dis,en}able()
+ * sched_in() | |
+ * | OFF <--' --+
+ * | |
+ * `---> ERROR ------'
+ *
+ * That is:
+ *
+ * sched_in: INACTIVE -> {ACTIVE,ERROR}
+ * sched_out: ACTIVE -> INACTIVE
+ * disable: {ACTIVE,INACTIVE} -> OFF
+ * enable: {OFF,ERROR} -> INACTIVE
+ *
+ * Where {OFF,ERROR} are disabled states.
+ *
+ * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
+ * defunct events:
+ *
+ * - EXIT means task that the even was assigned to died, but child events
+ * still live, and further children can still be created. But the event
+ * itself will never be active again. It can only transition to
+ * {REVOKED,DEAD};
+ *
+ * - REVOKED means the PMU the event was associated with is gone; all
+ * functionality is stopped but the event is still alive. Can only
+ * transition to DEAD;
+ *
+ * - DEAD event really is DYING tearing down state and freeing bits.
+ *
*/
enum perf_event_state {
PERF_EVENT_STATE_DEAD = -5,
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 437769e061b7..13add0c2c407 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -154,6 +154,7 @@ struct phy_attrs {
* @id: id of the phy device
* @ops: function pointers for performing phy operations
* @mutex: mutex to protect phy_ops
+ * @lockdep_key: lockdep information for this mutex
* @init_count: used to protect when the PHY is used by multiple consumers
* @power_count: used to protect when the PHY is used by multiple consumers
* @attrs: used to specify PHY specific attributes
@@ -165,6 +166,7 @@ struct phy {
int id;
const struct phy_ops *ops;
struct mutex mutex;
+ struct lock_class_key lockdep_key;
int init_count;
int power_count;
struct phy_attrs attrs;
diff --git a/arch/x86/include/asm/amd/fch.h b/include/linux/platform_data/x86/amd-fch.h
index 2cf5153edbc2..2cf5153edbc2 100644
--- a/arch/x86/include/asm/amd/fch.h
+++ b/include/linux/platform_data/x86/amd-fch.h
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 7803edaa8ff8..0cca01b5607b 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -888,15 +888,23 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
extern int power_supply_get_property(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *val);
+int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp,
+ union power_supply_propval *val);
#if IS_ENABLED(CONFIG_POWER_SUPPLY)
extern int power_supply_set_property(struct power_supply *psy,
enum power_supply_property psp,
const union power_supply_propval *val);
+int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp,
+ const union power_supply_propval *val);
#else
static inline int power_supply_set_property(struct power_supply *psy,
enum power_supply_property psp,
const union power_supply_propval *val)
{ return 0; }
+static inline int power_supply_set_property_direct(struct power_supply *psy,
+ enum power_supply_property psp,
+ const union power_supply_propval *val)
+{ return 0; }
#endif
extern void power_supply_external_power_changed(struct power_supply *psy);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index ea62201c74c4..de1d24f19f76 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -27,6 +27,8 @@ enum {
PROC_ENTRY_proc_read_iter = 1U << 1,
PROC_ENTRY_proc_compat_ioctl = 1U << 2,
+
+ PROC_ENTRY_FORCE_LOOKUP = 1U << 7,
};
struct proc_ops {
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 0b3a36bdaa90..0f5f94137f6d 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -594,6 +594,7 @@ struct sev_data_snp_addr {
* @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the
* purpose of guest-assisted migration.
* @rsvd: reserved
+ * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest
* @gosvw: guest OS-visible workarounds, as defined by hypervisor
*/
struct sev_data_snp_launch_start {
@@ -603,6 +604,7 @@ struct sev_data_snp_launch_start {
u32 ma_en:1; /* In */
u32 imi_en:1; /* In */
u32 rsvd:30;
+ u32 desired_tsc_khz; /* In */
u8 gosvw[16]; /* In */
} __packed;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 06cc8888199e..c334f82ed385 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,7 +19,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
return &sb->s_dquot;
}
-/* i_mutex must being held */
+/* i_rwsem must being held */
static inline bool is_quota_modification(struct mnt_idmap *idmap,
struct inode *inode, struct iattr *ia)
{
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 9ba771f2ddea..6fb4894b8cfd 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -159,7 +159,7 @@ struct rdt_ctrl_domain {
/**
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
- * @ci: cache info for this domain
+ * @ci_id: cache info id for this domain
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
* @mbm_local: saved state for MBM local bandwidth
@@ -170,7 +170,7 @@ struct rdt_ctrl_domain {
*/
struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
- struct cacheinfo *ci;
+ unsigned int ci_id;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 0cdbfc42f153..6f8a4965f9b9 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg)
* @sg: The current sg entry
*
* Description:
- * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * Usually the next entry will be @sg + 1, but if this sg element is part
* of a chained scatterlist, it could jump to the start of a new
* scatterlist array.
*
@@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg,
* @sgl: Second scatterlist
*
* Description:
- * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ * Links @prv and @sgl together, to form a longer scatterlist.
*
**/
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f78a64beb52..aa9c5be7a632 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -548,10 +548,6 @@ struct sched_statistics {
u64 nr_failed_migrations_running;
u64 nr_failed_migrations_hot;
u64 nr_forced_migrations;
-#ifdef CONFIG_NUMA_BALANCING
- u64 numa_task_migrated;
- u64 numa_task_swapped;
-#endif
u64 nr_wakeups;
u64 nr_wakeups_sync;
diff --git a/include/linux/security.h b/include/linux/security.h
index dba349629229..386463b5e848 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2211,7 +2211,6 @@ struct dentry *securityfs_create_symlink(const char *name,
const char *target,
const struct inode_operations *iops);
extern void securityfs_remove(struct dentry *dentry);
-extern void securityfs_recursive_remove(struct dentry *dentry);
#else /* CONFIG_SECURITYFS */
@@ -2243,6 +2242,8 @@ static inline void securityfs_remove(struct dentry *dentry)
#endif
+#define securityfs_recursive_remove securityfs_remove
+
#ifdef CONFIG_BPF_SYSCALL
union bpf_attr;
struct bpf_map;
diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h
new file mode 100644
index 000000000000..6738796986a7
--- /dev/null
+++ b/include/linux/soc/amd/isp4_misc.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __SOC_ISP4_MISC_H
+#define __SOC_ISP4_MISC_H
+
+#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter"
+
+#endif
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4789f91dae94..e9ea43234d9a 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -21,7 +21,7 @@
#include <uapi/linux/spi/spi.h>
/* Max no. of CS supported per spi device */
-#define SPI_CS_CNT_MAX 16
+#define SPI_CS_CNT_MAX 24
struct dma_chan;
struct software_node;
diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
index 51cab2def9ec..876130091384 100644
--- a/include/linux/sprintf.h
+++ b/include/linux/sprintf.h
@@ -4,6 +4,7 @@
#include <linux/compiler_attributes.h>
#include <linux/types.h>
+#include <linux/stdarg.h>
int num_to_str(char *buf, int size, unsigned long long num, unsigned int width);
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index c4b0eb2b2f04..ada17b57ca44 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -69,15 +69,17 @@ enum rpc_reject_stat {
};
enum rpc_auth_stat {
- RPC_AUTH_OK = 0,
- RPC_AUTH_BADCRED = 1,
- RPC_AUTH_REJECTEDCRED = 2,
- RPC_AUTH_BADVERF = 3,
- RPC_AUTH_REJECTEDVERF = 4,
- RPC_AUTH_TOOWEAK = 5,
+ RPC_AUTH_OK = 0, /* success */
+ RPC_AUTH_BADCRED = 1, /* bad credential (seal broken) */
+ RPC_AUTH_REJECTEDCRED = 2, /* client must begin new session */
+ RPC_AUTH_BADVERF = 3, /* bad verifier (seal broken) */
+ RPC_AUTH_REJECTEDVERF = 4, /* verifier expired or replayed */
+ RPC_AUTH_TOOWEAK = 5, /* rejected for security reasons */
+ RPC_AUTH_INVALIDRESP = 6, /* bogus response verifier */
+ RPC_AUTH_FAILED = 7, /* reason unknown */
/* RPCSEC_GSS errors */
- RPCSEC_GSS_CREDPROBLEM = 13,
- RPCSEC_GSS_CTXPROBLEM = 14
+ RPCSEC_GSS_CREDPROBLEM = 13, /* no credentials for user */
+ RPCSEC_GSS_CTXPROBLEM = 14 /* problem with context */
};
#define RPC_MAXNETNAMELEN 256
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 3b35b6f6533a..2cb406f8ff4e 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -98,7 +98,7 @@ static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
}
struct rpc_clnt;
-extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
+extern int rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
extern int rpc_remove_client_dir(struct rpc_clnt *);
extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh);
@@ -127,9 +127,9 @@ extern void rpc_remove_cache_dir(struct dentry *);
struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags);
void rpc_destroy_pipe_data(struct rpc_pipe *pipe);
-extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *,
+extern int rpc_mkpipe_dentry(struct dentry *, const char *, void *,
struct rpc_pipe *);
-extern int rpc_unlink(struct dentry *);
+extern void rpc_unlink(struct rpc_pipe *);
extern int register_rpc_pipefs(void);
extern void unregister_rpc_pipefs(void);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 48666b83fe68..40cbe81360ed 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -245,10 +245,10 @@ struct svc_rqst {
* initialisation success.
*/
- unsigned long bc_to_initval;
- unsigned int bc_to_retries;
- void ** rq_lease_breaker; /* The v4 client breaking a lease */
+ unsigned long bc_to_initval;
+ unsigned int bc_to_retries;
unsigned int rq_status_counter; /* RPC processing counter */
+ void **rq_lease_breaker; /* The v4 client breaking a lease */
};
/* bits for rq_flags */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2e111153f7cd..4b92fec23a49 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -86,7 +86,6 @@ struct auth_domain {
enum svc_auth_status {
SVC_GARBAGE = 1,
- SVC_SYSERR,
SVC_VALID,
SVC_NEGATIVE,
SVC_OK,
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a2ab813a9800..e3358c630ba1 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -119,6 +119,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF)
#define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF)
#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK)
+#define rpc_autherr_invalidresp cpu_to_be32(RPC_AUTH_INVALIDRESP)
+#define rpc_autherr_failed cpu_to_be32(RPC_AUTH_FAILED)
#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
@@ -242,8 +244,7 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
-extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
- struct page **pages, struct rpc_rqst *rqst);
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
extern void __xdr_commit_encode(struct xdr_stream *xdr);
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index b1c76c8f2c82..6a3f92098872 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
extern void ksys_sync_helper(void);
extern void pm_report_hw_sleep_time(u64 t);
extern void pm_report_max_hw_sleep(u64 t);
+void pm_restrict_gfp_mask(void);
+void pm_restore_gfp_mask(void);
#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
@@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
static inline void pm_report_hw_sleep_time(u64 t) {};
static inline void pm_report_max_hw_sleep(u64 t) {};
+static inline void pm_restrict_gfp_mask(void) {}
+static inline void pm_restore_gfp_mask(void) {}
+
static inline void ksys_sync_helper(void) {}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index a3d8305e88a5..9894c104dc93 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -182,7 +182,7 @@ struct tpm_chip {
unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */
bool duration_adjusted;
- struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES];
+ struct dentry *bios_dir;
const struct attribute_group *groups[3 + TPM_MAX_HASHES];
unsigned int groups_cnt;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 1b2545b4363b..92c752f5446f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -614,6 +614,7 @@ struct usb3_lpm_parameters {
* FIXME -- complete doc
* @authenticated: Crypto authentication passed
* @tunnel_mode: Connection native or tunneled over USB4
+ * @usb4_link: device link to the USB4 host interface
* @lpm_capable: device supports LPM
* @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range
* @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
@@ -724,6 +725,7 @@ struct usb_device {
unsigned reset_resume:1;
unsigned port_is_suspended:1;
enum usb_link_tunnel_mode tunnel_mode;
+ struct device_link *usb4_link;
int slot_id;
struct usb2_lpm_parameters l1_params;
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
index f2da264d9c14..acb0ad03bdac 100644
--- a/include/linux/usb/typec_dp.h
+++ b/include/linux/usb/typec_dp.h
@@ -57,6 +57,7 @@ enum {
DP_PIN_ASSIGN_D,
DP_PIN_ASSIGN_E,
DP_PIN_ASSIGN_F, /* Not supported after v1.0b */
+ DP_PIN_ASSIGN_MAX,
};
/* DisplayPort alt mode specific commands */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 91a3ce9a2687..9e15a088ba38 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
- NUMA_TASK_MIGRATE,
- NUMA_TASK_SWAP,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d56e6e135158..d40e978126e3 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-#ifdef CONFIG_BPF_SYSCALL
extern struct proto vsock_proto;
+#ifdef CONFIG_BPF_SYSCALL
int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init vsock_bpf_build_proto(void);
#else
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 82cbd54443ac..c79901f2dc2a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -377,6 +377,8 @@ enum {
* This quirk must be set before hci_register_dev is called.
*/
HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE,
+
+ __HCI_NUM_QUIRKS,
};
/* HCI device flags */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2b261e74e2c4..f79f59e67114 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -29,6 +29,7 @@
#include <linux/idr.h>
#include <linux/leds.h>
#include <linux/rculist.h>
+#include <linux/srcu.h>
#include <net/bluetooth/hci.h>
#include <net/bluetooth/hci_drv.h>
@@ -242,6 +243,7 @@ struct adv_info {
__u8 mesh;
__u8 instance;
__u8 handle;
+ __u8 sid;
__u32 flags;
__u16 timeout;
__u16 remaining_time;
@@ -346,6 +348,7 @@ struct adv_monitor {
struct hci_dev {
struct list_head list;
+ struct srcu_struct srcu;
struct mutex lock;
struct ida unset_handle_ida;
@@ -461,7 +464,7 @@ struct hci_dev {
unsigned int auto_accept_delay;
- unsigned long quirks;
+ DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS);
atomic_t cmd_cnt;
unsigned int acl_cnt;
@@ -546,6 +549,7 @@ struct hci_dev {
struct hci_conn_hash conn_hash;
struct list_head mesh_pending;
+ struct mutex mgmt_pending_lock;
struct list_head mgmt_pending;
struct list_head reject_list;
struct list_head accept_list;
@@ -652,6 +656,10 @@ struct hci_dev {
u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb);
};
+#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags)
+#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags)
+#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags)
+
#define HCI_PHY_HANDLE(handle) (handle & 0xff)
enum conn_reasons {
@@ -825,20 +833,20 @@ extern struct mutex hci_cb_list_lock;
#define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags)
-#define hci_dev_clear_volatile_flags(hdev) \
- do { \
- hci_dev_clear_flag(hdev, HCI_LE_SCAN); \
- hci_dev_clear_flag(hdev, HCI_LE_ADV); \
- hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
- hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \
- hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \
+#define hci_dev_clear_volatile_flags(hdev) \
+ do { \
+ hci_dev_clear_flag((hdev), HCI_LE_SCAN); \
+ hci_dev_clear_flag((hdev), HCI_LE_ADV); \
+ hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \
+ hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \
+ hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \
} while (0)
#define hci_dev_le_state_simultaneous(hdev) \
- (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \
- (hdev->le_states[4] & 0x08) && /* Central */ \
- (hdev->le_states[4] & 0x40) && /* Peripheral */ \
- (hdev->le_states[3] & 0x10)) /* Simultaneous */
+ (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \
+ ((hdev)->le_states[4] & 0x08) && /* Central */ \
+ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \
+ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */
/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
@@ -1346,8 +1354,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) ||
- c->state != state)
+ if (c->type != BIS_LINK || c->state != state)
continue;
if (handle == c->iso_qos.bcast.big) {
@@ -1550,13 +1557,14 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
-struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
__u8 base_len, __u8 *base);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos,
__u8 data_len, __u8 *data);
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
@@ -1831,6 +1839,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
void hci_adv_instances_clear(struct hci_dev *hdev);
struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance);
+struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid);
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance);
struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
u32 flags, u16 adv_data_len, u8 *adv_data,
@@ -1838,7 +1847,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
u16 timeout, u16 duration, s8 tx_power,
u32 min_interval, u32 max_interval,
u8 mesh_handle);
-struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid,
u32 flags, u8 data_len, u8 *data,
u32 min_interval, u32 max_interval);
int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
@@ -1926,8 +1935,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \
- !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \
- &(dev)->quirks))
+ !hci_test_quirk((dev), \
+ HCI_QUIRK_BROKEN_LE_CODED))
#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
@@ -1935,31 +1944,31 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
#define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \
- (hdev->commands[39] & 0x04))
+ ((dev)->commands[39] & 0x04))
#define read_key_size_capable(dev) \
((dev)->commands[20] & 0x10 && \
- !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE))
#define read_voice_setting_capable(dev) \
((dev)->commands[9] & 0x04 && \
- !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING))
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
#define enhanced_sync_conn_capable(dev) \
(((dev)->commands[29] & 0x08) && \
- !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN))
/* Use ext scanning if set ext scan param and ext scan enable is supported */
#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
((dev)->commands[37] & 0x40) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN))
/* Use ext create connection if command is supported */
#define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN))
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
@@ -1974,8 +1983,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
*/
#define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \
ext_adv_capable(dev)) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \
- &(dev)->quirks))
+ !hci_test_quirk((dev), \
+ HCI_QUIRK_BROKEN_EXT_CREATE_CONN))
/* Periodic advertising support */
#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV))
@@ -1992,7 +2001,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)
#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
- (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))
+ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG)))
/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER 0x01
@@ -2400,7 +2409,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
u8 instance);
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
bdaddr_t *bdaddr, u8 addr_type);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 72558c826aa1..5224f57f6af2 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance);
int hci_enable_advertising_sync(struct hci_dev *hdev);
int hci_enable_advertising(struct hci_dev *hdev);
-int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
- u8 *data, u32 flags, u16 min_interval,
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid,
+ u8 data_len, u8 *data, u32 flags, u16 min_interval,
u16 max_interval, u16 sync_interval);
int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d1848dc8ec99..10248d527616 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2690,7 +2690,7 @@ struct cfg80211_scan_request {
s8 tsf_report_link_id;
/* keep last */
- struct ieee80211_channel *channels[] __counted_by(n_channels);
+ struct ieee80211_channel *channels[];
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3f02a45773e8..ca26274196b9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
/* use after obtaining a reference count */
static inline bool nf_ct_should_gc(const struct nf_conn *ct)
{
- return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
- !nf_ct_is_dying(ct);
+ if (!nf_ct_is_confirmed(ct))
+ return false;
+
+ /* load ct->timeout after is_confirmed() test.
+ * Pairs with __nf_conntrack_confirm() which:
+ * 1. Increases ct->timeout value
+ * 2. Inserts ct into rcu hlist
+ * 3. Sets the confirmed bit
+ * 4. Unlocks the hlist lock
+ */
+ smp_acquire__after_ctrl_dep();
+
+ return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct);
}
#define NF_CT_DAY (86400 * HZ)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d711642e78b5..c003cd194fa2 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
- if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN))
return false;
*inner_proto = __nf_flow_pppoe_proto(skb);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e4d8e451e935..5e49619ae49c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1142,11 +1142,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
-struct nft_hook;
-void nf_tables_chain_device_notify(const struct nft_chain *chain,
- const struct nft_hook *hook,
- const struct net_device *dev, int event);
-
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d7b7b6cd4aa1..8a75c73fc555 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
struct netlink_ext_ack *extack);
void qdisc_put_rtab(struct qdisc_rate_table *tab);
void qdisc_put_stab(struct qdisc_size_table *tab);
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock, bool validate);
@@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
return true;
}
+static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
+{
+ if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
+ pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+ txt, qdisc->ops->id, qdisc->handle >> 16);
+ qdisc->flags |= TCQ_F_WARN_NONWC;
+ }
+}
+
+static inline unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+ unsigned int len;
+
+ skb = sch->ops->peek(sch);
+ if (unlikely(skb == NULL)) {
+ qdisc_warn_nonwc("qdisc_peek_len", sch);
+ return 0;
+ }
+ len = qdisc_pkt_len(skb);
+
+ return len;
+}
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 629368ab2787..638948be4c50 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -973,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
*backlog = qstats.backlog;
}
-static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
-{
- __u32 qlen, backlog;
-
- qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
- qdisc_tree_reduce_backlog(sch, qlen, backlog);
-}
-
static inline void qdisc_purge_queue(struct Qdisc *sch)
{
__u32 qlen, backlog;
diff --git a/include/net/sock.h b/include/net/sock.h
index 92e7c1aae3cc..4c37015b7cf7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -3010,8 +3010,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
- if (sk->sk_prot->sock_is_readable)
- return sk->sk_prot->sock_is_readable(sk);
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot->sock_is_readable)
+ return prot->sock_is_readable(sk);
+
return false;
}
#endif /* _SOCK_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a21e276dbe44..f3014e4f54fc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -441,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
void xfrm_flush_gc(void);
-void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
struct module *owner;
@@ -474,7 +473,7 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
-void xfrm_set_type_offload(struct xfrm_state *x);
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load);
static inline void xfrm_unset_type_offload(struct xfrm_state *x)
{
if (!x->type_offload)
@@ -916,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
xfrm_pol_put(pols[i]);
}
-void __xfrm_state_destroy(struct xfrm_state *, bool);
+void __xfrm_state_destroy(struct xfrm_state *);
static inline void __xfrm_state_put(struct xfrm_state *x)
{
@@ -926,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
static inline void xfrm_state_put(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, false);
-}
-
-static inline void xfrm_state_put_sync(struct xfrm_state *x)
-{
- if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, true);
+ __xfrm_state_destroy(x);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1770,7 +1763,7 @@ struct xfrmk_spdinfo {
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
bool task_valid);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index bebc252db865..d54fe354b390 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -144,7 +144,8 @@ FLUSH_STATES
#define EXTENT_FLAGS \
{ EXTENT_DIRTY, "DIRTY"}, \
{ EXTENT_LOCKED, "LOCKED"}, \
- { EXTENT_NEW, "NEW"}, \
+ { EXTENT_DIRTY_LOG1, "DIRTY_LOG1"}, \
+ { EXTENT_DIRTY_LOG2, "DIRTY_LOG2"}, \
{ EXTENT_DELALLOC, "DELALLOC"}, \
{ EXTENT_DEFRAG, "DEFRAG"}, \
{ EXTENT_BOUNDARY, "BOUNDARY"}, \
@@ -1095,7 +1096,7 @@ TRACE_EVENT(btrfs_cow_block,
TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = btrfs_root_id(root);
__entry->buf_start = buf->start;
- __entry->refs = atomic_read(&buf->refs);
+ __entry->refs = refcount_read(&buf->refs);
__entry->cow_start = cow->start;
__entry->buf_level = btrfs_header_level(buf);
__entry->cow_level = btrfs_header_level(cow);
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index a5f4b9234f46..dad7360f42f9 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit,
show_mflags(__entry->mflags), __entry->ret)
);
-TRACE_EVENT(erofs_destroy_inode,
- TP_PROTO(struct inode *inode),
-
- TP_ARGS(inode),
-
- TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( erofs_nid_t, nid )
- ),
-
- TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->nid = EROFS_I(inode)->nid;
- ),
-
- TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
-);
-
#endif /* _TRACE_EROFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 333d2e38dd2c..64a382fbc31a 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -50,12 +50,15 @@
#define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \
- EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_collect, "COLLECT") \
EM(netfs_rreq_trace_complete, "COMPLET") \
+ EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_dirty, "DIRTY ") \
EM(netfs_rreq_trace_done, "DONE ") \
+ EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \
EM(netfs_rreq_trace_free, "FREE ") \
+ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
+ EM(netfs_rreq_trace_recollect, "RECLLCT") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \
@@ -63,13 +66,15 @@
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \
EM(netfs_rreq_trace_unmark, "UNMARK ") \
+ EM(netfs_rreq_trace_unpause, "UNPAUSE") \
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
- EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
- EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \
+ EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \
+ EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \
+ EM(netfs_rreq_trace_waited_ip, "DONE-IP") \
+ EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \
+ EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \
- EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \
- EM(netfs_rreq_trace_unpause, "UNPAUSE") \
E_(netfs_rreq_trace_write_done, "WR-DONE")
#define netfs_sreq_sources \
@@ -82,6 +87,7 @@
E_(NETFS_WRITE_TO_CACHE, "WRIT")
#define netfs_sreq_traces \
+ EM(netfs_sreq_trace_abandoned, "ABNDN") \
EM(netfs_sreq_trace_add_donations, "+DON ") \
EM(netfs_sreq_trace_added, "ADD ") \
EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \
@@ -89,6 +95,7 @@
EM(netfs_sreq_trace_cache_write, "CA-WR") \
EM(netfs_sreq_trace_cancel, "CANCL") \
EM(netfs_sreq_trace_clear, "CLEAR") \
+ EM(netfs_sreq_trace_consumed, "CONSM") \
EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
EM(netfs_sreq_trace_donate_to_next, "DON-N") \
@@ -96,7 +103,12 @@
EM(netfs_sreq_trace_fail, "FAIL ") \
EM(netfs_sreq_trace_free, "FREE ") \
EM(netfs_sreq_trace_hit_eof, "EOF ") \
- EM(netfs_sreq_trace_io_progress, "IO ") \
+ EM(netfs_sreq_trace_io_bad, "I-BAD") \
+ EM(netfs_sreq_trace_io_malformed, "I-MLF") \
+ EM(netfs_sreq_trace_io_unknown, "I-UNK") \
+ EM(netfs_sreq_trace_io_progress, "I-OK ") \
+ EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \
+ EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
@@ -142,8 +154,8 @@
#define netfs_sreq_ref_traces \
EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \
- EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \
- EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \
+ EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \
+ EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \
@@ -366,7 +378,7 @@ TRACE_EVENT(netfs_sreq,
__entry->slot = sreq->io_iter.folioq_slot;
),
- TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d",
+ TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->what, netfs_sreq_traces),
@@ -548,6 +560,35 @@ TRACE_EVENT(netfs_write,
__entry->start, __entry->start + __entry->len - 1)
);
+TRACE_EVENT(netfs_copy2cache,
+ TP_PROTO(const struct netfs_io_request *rreq,
+ const struct netfs_io_request *creq),
+
+ TP_ARGS(rreq, creq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, rreq)
+ __field(unsigned int, creq)
+ __field(unsigned int, cookie)
+ __field(unsigned int, ino)
+ ),
+
+ TP_fast_assign(
+ struct netfs_inode *__ctx = netfs_inode(rreq->inode);
+ struct fscache_cookie *__cookie = netfs_i_cookie(__ctx);
+ __entry->rreq = rreq->debug_id;
+ __entry->creq = creq->debug_id;
+ __entry->cookie = __cookie ? __cookie->debug_id : 0;
+ __entry->ino = rreq->inode->i_ino;
+ ),
+
+ TP_printk("R=%08x CR=%08x c=%08x i=%x ",
+ __entry->rreq,
+ __entry->creq,
+ __entry->cookie,
+ __entry->ino)
+ );
+
TRACE_EVENT(netfs_collect,
TP_PROTO(const struct netfs_io_request *wreq),
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 378d2dfc7392..de6f6d25767c 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -322,20 +322,24 @@
EM(rxrpc_call_put_kernel, "PUT kernel ") \
EM(rxrpc_call_put_poke, "PUT poke ") \
EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \
+ EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \
EM(rxrpc_call_put_release_sock, "PUT rls-sock") \
EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \
EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \
- EM(rxrpc_call_put_unnotify, "PUT unnotify") \
EM(rxrpc_call_put_userid_exists, "PUT u-exists") \
EM(rxrpc_call_put_userid, "PUT user-id ") \
EM(rxrpc_call_see_accept, "SEE accept ") \
EM(rxrpc_call_see_activate_client, "SEE act-clnt") \
+ EM(rxrpc_call_see_already_released, "SEE alrdy-rl") \
EM(rxrpc_call_see_connect_failed, "SEE con-fail") \
EM(rxrpc_call_see_connected, "SEE connect ") \
EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \
+ EM(rxrpc_call_see_discard, "SEE discard ") \
EM(rxrpc_call_see_disconnected, "SEE disconn ") \
EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
EM(rxrpc_call_see_input, "SEE input ") \
+ EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \
+ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \
EM(rxrpc_call_see_release, "SEE release ") \
EM(rxrpc_call_see_userid_exists, "SEE u-exists") \
EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index aad697da1580..750ecce56930 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1691,7 +1691,6 @@ SVC_RQST_FLAG_LIST
__print_flags(flags, "|", SVC_RQST_FLAG_LIST)
TRACE_DEFINE_ENUM(SVC_GARBAGE);
-TRACE_DEFINE_ENUM(SVC_SYSERR);
TRACE_DEFINE_ENUM(SVC_VALID);
TRACE_DEFINE_ENUM(SVC_NEGATIVE);
TRACE_DEFINE_ENUM(SVC_OK);
@@ -1704,7 +1703,6 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
#define show_svc_auth_status(status) \
__print_symbolic(status, \
{ SVC_GARBAGE, "SVC_GARBAGE" }, \
- { SVC_SYSERR, "SVC_SYSERR" }, \
{ SVC_VALID, "SVC_VALID" }, \
{ SVC_NEGATIVE, "SVC_NEGATIVE" }, \
{ SVC_OK, "SVC_OK" }, \
@@ -2123,22 +2121,35 @@ TRACE_EVENT(svc_xprt_accept,
)
);
-TRACE_EVENT(svc_wake_up,
- TP_PROTO(int pid),
+DECLARE_EVENT_CLASS(svc_pool_thread_event,
+ TP_PROTO(const struct svc_pool *pool, pid_t pid),
- TP_ARGS(pid),
+ TP_ARGS(pool, pid),
TP_STRUCT__entry(
- __field(int, pid)
+ __field(unsigned int, pool_id)
+ __field(pid_t, pid)
),
TP_fast_assign(
+ __entry->pool_id = pool->sp_id;
__entry->pid = pid;
),
- TP_printk("pid=%d", __entry->pid)
+ TP_printk("pool=%u pid=%d", __entry->pool_id, __entry->pid)
);
+#define DEFINE_SVC_POOL_THREAD_EVENT(name) \
+ DEFINE_EVENT(svc_pool_thread_event, svc_pool_thread_##name, \
+ TP_PROTO( \
+ const struct svc_pool *pool, pid_t pid \
+ ), \
+ TP_ARGS(pool, pid))
+
+DEFINE_SVC_POOL_THREAD_EVENT(wake);
+DEFINE_SVC_POOL_THREAD_EVENT(running);
+DEFINE_SVC_POOL_THREAD_EVENT(noidle);
+
TRACE_EVENT(svc_alloc_arg_err,
TP_PROTO(
unsigned int requested,
diff --git a/include/uapi/asm-generic/param.h b/include/uapi/asm-generic/param.h
index baad02ea7f93..3ed505dfea13 100644
--- a/include/uapi/asm-generic/param.h
+++ b/include/uapi/asm-generic/param.h
@@ -2,8 +2,12 @@
#ifndef _UAPI__ASM_GENERIC_PARAM_H
#define _UAPI__ASM_GENERIC_PARAM_H
+#ifndef __USER_HZ
+#define __USER_HZ 100
+#endif
+
#ifndef HZ
-#define HZ 100
+#define HZ __USER_HZ
#endif
#ifndef EXEC_PAGESIZE
diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h
index 682b406e1067..a04afef9efca 100644
--- a/include/uapi/linux/bits.h
+++ b/include/uapi/linux/bits.h
@@ -4,9 +4,9 @@
#ifndef _UAPI_LINUX_BITS_H
#define _UAPI_LINUX_BITS_H
-#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))
+#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
-#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_U128(h, l) \
((_BIT128((h)) << 1) - (_BIT128(l)))
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index dd02160015b2..8e710bbb688e 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -616,8 +616,11 @@ struct btrfs_ioctl_clone_range_args {
#define BTRFS_DEFRAG_RANGE_COMPRESS 1
#define BTRFS_DEFRAG_RANGE_START_IO 2
#define BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL 4
+/* Request no compression on the range (uncompress if necessary). */
+#define BTRFS_DEFRAG_RANGE_NOCOMPRESS 8
#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \
BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL | \
+ BTRFS_DEFRAG_RANGE_NOCOMPRESS | \
BTRFS_DEFRAG_RANGE_START_IO)
struct btrfs_ioctl_defrag_range_args {
diff --git a/include/uapi/linux/coredump.h b/include/uapi/linux/coredump.h
new file mode 100644
index 000000000000..dc3789b78af0
--- /dev/null
+++ b/include/uapi/linux/coredump.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_COREDUMP_H
+#define _UAPI_LINUX_COREDUMP_H
+
+#include <linux/types.h>
+
+/**
+ * coredump_{req,ack} flags
+ * @COREDUMP_KERNEL: kernel writes coredump
+ * @COREDUMP_USERSPACE: userspace writes coredump
+ * @COREDUMP_REJECT: don't generate coredump
+ * @COREDUMP_WAIT: wait for coredump server
+ */
+enum {
+ COREDUMP_KERNEL = (1ULL << 0),
+ COREDUMP_USERSPACE = (1ULL << 1),
+ COREDUMP_REJECT = (1ULL << 2),
+ COREDUMP_WAIT = (1ULL << 3),
+};
+
+/**
+ * struct coredump_req - message kernel sends to userspace
+ * @size: size of struct coredump_req
+ * @size_ack: known size of struct coredump_ack on this kernel
+ * @mask: supported features
+ *
+ * When a coredump happens the kernel will connect to the coredump
+ * socket and send a coredump request to the coredump server. The @size
+ * member is set to the size of struct coredump_req and provides a hint
+ * to userspace how much data can be read. Userspace may use MSG_PEEK to
+ * peek the size of struct coredump_req and then choose to consume it in
+ * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0
+ * request. If the size the kernel sends is larger userspace simply
+ * discards any remaining data.
+ *
+ * The coredump_req->mask member is set to the currently know features.
+ * Userspace may only set coredump_ack->mask to the bits raised by the
+ * kernel in coredump_req->mask.
+ *
+ * The coredump_req->size_ack member is set by the kernel to the size of
+ * struct coredump_ack the kernel knows. Userspace may only send up to
+ * coredump_req->size_ack bytes to the kernel and must set
+ * coredump_ack->size accordingly.
+ */
+struct coredump_req {
+ __u32 size;
+ __u32 size_ack;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * struct coredump_ack - message userspace sends to kernel
+ * @size: size of the struct
+ * @spare: unused
+ * @mask: features kernel is supposed to use
+ *
+ * The @size member must be set to the size of struct coredump_ack. It
+ * may never exceed what the kernel returned in coredump_req->size_ack
+ * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <=
+ * coredump_req->size_ack).
+ *
+ * The @mask member must be set to the features the coredump server
+ * wants the kernel to use. Only bits the kernel returned in
+ * coredump_req->mask may be set.
+ */
+struct coredump_ack {
+ __u32 size;
+ __u32 spare;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * enum coredump_mark - Markers for the coredump socket
+ *
+ * The kernel will place a single byte on the coredump socket. The
+ * markers notify userspace whether the coredump ack succeeded or
+ * failed.
+ *
+ * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small
+ * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big
+ * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid
+ * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options
+ * @COREDUMP_MARK_REQACK: the coredump request and ack was successful
+ * @__COREDUMP_MARK_MAX: the maximum coredump mark value
+ */
+enum coredump_mark {
+ COREDUMP_MARK_REQACK = 0U,
+ COREDUMP_MARK_MINSIZE = 1U,
+ COREDUMP_MARK_MAXSIZE = 2U,
+ COREDUMP_MARK_UNSUPPORTED = 3U,
+ COREDUMP_MARK_CONFLICTING = 4U,
+ __COREDUMP_MARK_MAX = (1U << 31),
+};
+
+#endif /* _UAPI_LINUX_COREDUMP_H */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9ff72cfb2e98..09a75bdb6560 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -208,10 +208,6 @@ enum {
ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1)
};
-/* generic netlink info */
-#define ETHTOOL_GENL_NAME "ethtool"
-#define ETHTOOL_GENL_VERSION 1
-
#define ETHTOOL_MCGRP_MONITOR_NAME "monitor"
#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 9a02f579de22..aa8ab5227c1e 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -6,8 +6,8 @@
#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H
-#define ETHTOOL_FAMILY_NAME "ethtool"
-#define ETHTOOL_FAMILY_VERSION 1
+#define ETHTOOL_GENL_NAME "ethtool"
+#define ETHTOOL_GENL_VERSION 1
enum {
ETHTOOL_UDP_TUNNEL_TYPE_VXLAN,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d00b85cb168c..7a4c35ff03fe 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -178,6 +178,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -447,6 +448,31 @@ struct kvm_run {
__u64 gpa;
__u64 size;
} memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index 84fa8a21dfd0..6ac84b2f636c 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -27,14 +27,14 @@
* token, rem_id.
* @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error'
* should not be set. Attributes: token, family, loc_id, rem_id, saddr4 |
- * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error].
+ * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error].
* @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of
* sk_err) could be set if an error has been detected for this subflow.
* Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 |
- * daddr6, sport, dport, backup, if_idx [, error].
+ * daddr6, sport, dport, backup, if-idx [, error].
* @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error'
* should not be set. Attributes: token, family, loc_id, rem_id, saddr4 |
- * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error].
+ * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error].
* @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes:
* family, sport, saddr4 | saddr6.
* @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 518ba144544c..2beb30be2c5f 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -142,8 +142,6 @@ enum nf_tables_msg_types {
NFT_MSG_DESTROYOBJ,
NFT_MSG_DESTROYFLOWTABLE,
NFT_MSG_GETSETELEM_RESET,
- NFT_MSG_NEWDEV,
- NFT_MSG_DELDEV,
NFT_MSG_MAX,
};
@@ -1786,18 +1784,10 @@ enum nft_synproxy_attributes {
* enum nft_device_attributes - nf_tables device netlink attributes
*
* @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
- * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING)
- * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING)
- * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING)
- * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING)
*/
enum nft_devices_attributes {
NFTA_DEVICE_UNSPEC,
NFTA_DEVICE_NAME,
- NFTA_DEVICE_TABLE,
- NFTA_DEVICE_FLOWTABLE,
- NFTA_DEVICE_CHAIN,
- NFTA_DEVICE_SPEC,
__NFTA_DEVICE_MAX
};
#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 50d807af2649..6cd58cd2a6f0 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -25,8 +25,6 @@ enum nfnetlink_groups {
#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA
NFNLGRP_NFTRACE,
#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE
- NFNLGRP_NFT_DEV,
-#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 77d9d6af46da..c9751bdfd937 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -135,8 +135,28 @@
#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
/*
- * zero copy requires 4k block size, and can remap ublk driver's io
- * request into ublksrv's vm space
+ * ublk server can register data buffers for incoming I/O requests with a sparse
+ * io_uring buffer table. The request buffer can then be used as the data buffer
+ * for io_uring operations via the fixed buffer index.
+ * Note that the ublk server can never directly access the request data memory.
+ *
+ * To use this feature, the ublk server must first register a sparse buffer
+ * table on an io_uring instance.
+ * When an incoming ublk request is received, the ublk server submits a
+ * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The
+ * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register
+ * and addr is the index in the io_uring's buffer table to install the buffer.
+ * SQEs can now be submitted to the io_uring to read/write the request's buffer
+ * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or
+ * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index.
+ * Once the last io_uring operation using the request's buffer has completed,
+ * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag,
+ * and addr again specifying the request buffer to unregister.
+ * The ublk request is completed when its buffer is unregistered from all
+ * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ.
+ *
+ * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak
+ * uninitialized kernel memory by not reading into the full request buffer.
*/
#define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0)
@@ -450,10 +470,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
__u64 sqe_addr)
{
struct ublk_auto_buf_reg reg = {
- .index = sqe_addr & 0xffff,
- .flags = (sqe_addr >> 16) & 0xff,
- .reserved0 = (sqe_addr >> 24) & 0xff,
- .reserved1 = sqe_addr >> 32,
+ .index = (__u16)sqe_addr,
+ .flags = (__u8)(sqe_addr >> 16),
+ .reserved0 = (__u8)(sqe_addr >> 24),
+ .reserved1 = (__u32)(sqe_addr >> 32),
};
return reg;
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index ed07181d4eff..e05280e41522 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -17,6 +17,10 @@
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
+#ifndef __KERNEL__
+#include <sys/socket.h> /* for struct sockaddr and sa_family_t */
+#endif
+
#include <linux/socket.h>
#include <linux/types.h>
diff --git a/init/Kconfig b/init/Kconfig
index af4c2f085455..666783eb50ab 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1716,9 +1716,13 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y
+#
+# marked broken for performance reasons; gives us one more cycle to sort things out.
+#
config FUTEX_PRIVATE_HASH
bool
depends on FUTEX && !BASE_SMALL && MMU
+ depends on BROKEN
default y
config FUTEX_MPOL
diff --git a/init/initramfs.c b/init/initramfs.c
index 72bad44a1d41..097673b97784 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/async.h>
+#include <linux/export.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/types.h>
diff --git a/init/main.c b/init/main.c
index ed576c7f475d..225a58279acd 100644
--- a/init/main.c
+++ b/init/main.c
@@ -13,6 +13,7 @@
#define DEBUG /* Enable initcall_debug */
#include <linux/types.h>
+#include <linux/export.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index e9355276ab5d..9798d6fb4ec7 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
if (ctx->flags & IORING_SETUP_SQPOLL) {
struct io_sq_data *sq = ctx->sq_data;
+ struct task_struct *tsk;
+ rcu_read_lock();
+ tsk = rcu_dereference(sq->thread);
/*
* sq->thread might be NULL if we raced with the sqpoll
* thread termination.
*/
- if (sq->thread) {
+ if (tsk) {
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ getrusage(tsk, RUSAGE_SELF, &sq_usage);
+ put_task_struct(tsk);
sq_pid = sq->task_pid;
sq_cpu = sq->sq_cpu;
- getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
+ sq_usage.ru_stime.tv_usec);
sq_work_time = sq->work_time;
+ } else {
+ rcu_read_unlock();
}
}
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index cd1fcb115739..be91edf34f01 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -1259,8 +1259,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
atomic_set(&wq->worker_refs, 1);
init_completion(&wq->worker_done);
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
- if (ret)
+ if (ret) {
+ put_task_struct(wq->task);
goto err;
+ }
return wq;
err:
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cf759c172083..5111ec040c53 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
}
}
mutex_unlock(&ctx->uring_lock);
+
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ io_move_task_work_from_local(ctx);
}
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
@@ -2906,7 +2909,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
struct task_struct *tsk;
io_sq_thread_park(sqd);
- tsk = sqd->thread;
+ tsk = sqpoll_task_locked(sqd);
if (tsk && tsk->io_uring && tsk->io_uring->io_wq)
io_wq_cancel_cb(tsk->io_uring->io_wq,
io_cancel_ctx_cb, ctx, true);
@@ -3142,7 +3145,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
s64 inflight;
DEFINE_WAIT(wait);
- WARN_ON_ONCE(sqd && sqd->thread != current);
+ WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
if (!current->io_uring)
return;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index d59c12277d58..66c1ca73f55e 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -98,8 +98,6 @@ struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *coun
struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count);
void tctx_task_work(struct callback_head *cb);
__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
-int io_uring_alloc_task_context(struct task_struct *task,
- struct io_ring_ctx *ctx);
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
int start, int end);
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 2ea65f3cef72..f2d2cc319faa 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -270,8 +270,12 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
/* truncate end piece, if needed, for non partial buffers */
if (len > arg->max_len) {
len = arg->max_len;
- if (!(bl->flags & IOBL_INC))
+ if (!(bl->flags & IOBL_INC)) {
+ arg->partial_map = 1;
+ if (iov != arg->iovs)
+ break;
buf->len = len;
+ }
}
iov->iov_base = u64_to_user_ptr(buf->addr);
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 5d83c7adc739..723d0361898e 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -58,7 +58,8 @@ struct buf_sel_arg {
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
- unsigned buf_group;
+ unsigned short buf_group;
+ unsigned short partial_map;
};
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 71400d6cefc8..4c2578f2efcb 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw)
spin_unlock(&ctx->msg_lock);
}
if (req)
- kmem_cache_free(req_cachep, req);
+ kfree_rcu(req, rcu_head);
percpu_ref_put(&ctx->refs);
}
@@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
int res, u32 cflags, u64 user_data)
{
if (!READ_ONCE(ctx->submitter_task)) {
- kmem_cache_free(req_cachep, req);
+ kfree_rcu(req, rcu_head);
return -EOWNERDEAD;
}
req->opcode = IORING_OP_NOP;
diff --git a/io_uring/net.c b/io_uring/net.c
index e16633fd6630..bec8c6ed0a93 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -75,12 +75,17 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 buf_group;
- bool retry;
+ unsigned short retry_flags;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
};
+enum sr_retry_flags {
+ IO_SR_MSG_RETRY = 1,
+ IO_SR_MSG_PARTIAL_MAP = 2,
+};
+
/*
* Number of times we'll try and do receives if there's more data. If we
* exceed this limit, then add us to the back of the queue and retry from
@@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
sr->len = 0; /* get from the provided buffer */
}
@@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~SENDMSG_FLAGS)
@@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -821,9 +826,9 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (sr->flags & IORING_RECVSEND_BUNDLE) {
size_t this_ret = *ret - sr->done_io;
- cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
+ cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
- if (sr->retry)
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
@@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* If more is available AND it was a full transfer, retry and
* append to this one
*/
- if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+ if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
!iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
sr->done_io += this_ret;
- sr->retry = true;
+ sr->retry_flags |= IO_SR_MSG_RETRY;
return false;
}
} else {
@@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
if (unlikely(ret < 0))
return ret;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
+ kmsg->vec.nr = ret;
+ kmsg->vec.iovec = arg.iovs;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ if (arg.partial_map)
+ sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
+
/* special case 1 vec, can be a fast path */
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
@@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
- kmsg->vec.nr = ret;
- kmsg->vec.iovec = arg.iovs;
- req->flags |= REQ_F_NEED_CLEANUP;
- }
} else {
void __user *buf;
@@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
zc->done_io = 0;
- zc->retry = false;
+ zc->retry_flags = 0;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
@@ -1730,9 +1738,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
- if (unlikely(req->flags & REQ_F_FAIL)) {
- ret = -ECONNRESET;
- goto out;
+ if (connect->in_progress) {
+ struct poll_table_struct pt = { ._key = EPOLLERR };
+
+ if (vfs_poll(req->file, &pt) & EPOLLERR)
+ goto get_sock_err;
}
file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -1757,8 +1767,10 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
* which means the previous result is good. For both of these,
* grab the sock_error() and use that for the completion.
*/
- if (ret == -EBADFD || ret == -EISCONN)
+ if (ret == -EBADFD || ret == -EISCONN) {
+get_sock_err:
ret = sock_error(sock_from_file(req->file)->sk);
+ }
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 6e0882b051f9..6de6229207a8 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = {
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
+ .hash_reg_file = 1,
.prep = io_fallocate_prep,
.issue = io_fallocate,
},
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 83e36ad4e31b..d70700e5cef8 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -416,8 +416,6 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
ret = create_pipe_files(files, p->flags);
if (ret)
return ret;
- files[0]->f_mode |= FMODE_NOWAIT;
- files[1]->f_mode |= FMODE_NOWAIT;
if (!!p->file_slot)
ret = io_pipe_fixed(req, files, issue_flags);
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 0526062e2f81..20e9b46a4adf 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -273,8 +273,6 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw)
return IOU_POLL_REISSUE;
}
}
- if (unlikely(req->cqe.res & EPOLLERR))
- req_set_fail(req);
if (req->apoll_events & EPOLLONESHOT)
return IOU_POLL_DONE;
diff --git a/io_uring/register.c b/io_uring/register.c
index cc23a4c205cd..a59589249fce 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
if (ctx->flags & IORING_SETUP_SQPOLL) {
sqd = ctx->sq_data;
if (sqd) {
+ struct task_struct *tsk;
+
/*
* Observe the correct sqd->lock -> ctx->uring_lock
* ordering. Fine to drop uring_lock here, we hold
@@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
mutex_unlock(&ctx->uring_lock);
mutex_lock(&sqd->lock);
mutex_lock(&ctx->uring_lock);
- if (sqd->thread)
- tctx = sqd->thread->io_uring;
+ tsk = sqpoll_task_locked(sqd);
+ if (tsk)
+ tctx = tsk->io_uring;
}
} else {
tctx = current->io_uring;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index c592ceace97d..f2b31fb68992 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv)
struct io_mapped_ubuf *imu = priv;
unsigned int i;
- for (i = 0; i < imu->nr_bvecs; i++)
- unpin_user_page(imu->bvec[i].bv_page);
+ for (i = 0; i < imu->nr_bvecs; i++) {
+ struct folio *folio = page_folio(imu->bvec[i].bv_page);
+
+ unpin_user_folio(folio, 1);
+ }
}
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
@@ -731,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
data->nr_pages_mid = folio_nr_pages(folio);
data->folio_shift = folio_shift(folio);
+ data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
/*
* Check if pages are contiguous inside a folio, and all folios have
@@ -809,10 +813,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
imu->nr_bvecs = nr_pages;
ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
- if (ret) {
- unpin_user_pages(pages, nr_pages);
+ if (ret)
goto done;
- }
size = iov->iov_len;
/* store original address for later verification */
@@ -826,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (coalesced)
imu->folio_shift = data.folio_shift;
refcount_set(&imu->refs, 1);
- off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
+
+ off = (unsigned long)iov->iov_base & ~PAGE_MASK;
+ if (coalesced)
+ off += data.first_folio_page_idx << PAGE_SHIFT;
+
node->buf = imu;
ret = 0;
@@ -842,6 +848,10 @@ done:
if (ret) {
if (imu)
io_free_imu(ctx, imu);
+ if (pages) {
+ for (i = 0; i < nr_pages; i++)
+ unpin_user_folio(page_folio(pages[i]), 1);
+ }
io_cache_free(&ctx->node_cache, node);
node = ERR_PTR(ret);
}
@@ -1177,6 +1187,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
return -EINVAL;
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
return -EOVERFLOW;
+ if (nbufs > IORING_MAX_REG_BUFFERS)
+ return -EINVAL;
ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr));
if (ret)
@@ -1327,7 +1339,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
{
unsigned long folio_size = 1 << imu->folio_shift;
unsigned long folio_mask = folio_size - 1;
- u64 folio_addr = imu->ubuf & ~folio_mask;
struct bio_vec *res_bvec = vec->bvec;
size_t total_len = 0;
unsigned bvec_idx = 0;
@@ -1349,8 +1360,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
return -EOVERFLOW;
- /* by using folio address it also accounts for bvec offset */
- offset = buf_addr - folio_addr;
+ offset = buf_addr - imu->ubuf;
+ /*
+ * Only the first bvec can have non zero bv_offset, account it
+ * here and work with full folios below.
+ */
+ offset += imu->bvec[0].bv_offset;
+
src_bvec = imu->bvec + (offset >> imu->folio_shift);
offset &= folio_mask;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 0d2138f16322..25e7e998dcfd 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -49,6 +49,7 @@ struct io_imu_folio_data {
unsigned int nr_pages_mid;
unsigned int folio_shift;
unsigned int nr_folios;
+ unsigned long first_folio_page_idx;
};
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 03c699493b5a..a3f11349ce06 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -16,6 +16,7 @@
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
+#include "tctx.h"
#include "napi.h"
#include "sqpoll.h"
@@ -30,7 +31,7 @@ enum {
void io_sq_thread_unpark(struct io_sq_data *sqd)
__releases(&sqd->lock)
{
- WARN_ON_ONCE(sqd->thread == current);
+ WARN_ON_ONCE(sqpoll_task_locked(sqd) == current);
/*
* Do the dance but not conditional clear_bit() because it'd race with
@@ -46,24 +47,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd)
void io_sq_thread_park(struct io_sq_data *sqd)
__acquires(&sqd->lock)
{
- WARN_ON_ONCE(data_race(sqd->thread) == current);
+ struct task_struct *tsk;
atomic_inc(&sqd->park_pending);
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
mutex_lock(&sqd->lock);
- if (sqd->thread)
- wake_up_process(sqd->thread);
+
+ tsk = sqpoll_task_locked(sqd);
+ if (tsk) {
+ WARN_ON_ONCE(tsk == current);
+ wake_up_process(tsk);
+ }
}
void io_sq_thread_stop(struct io_sq_data *sqd)
{
- WARN_ON_ONCE(sqd->thread == current);
+ struct task_struct *tsk;
+
WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
mutex_lock(&sqd->lock);
- if (sqd->thread)
- wake_up_process(sqd->thread);
+ tsk = sqpoll_task_locked(sqd);
+ if (tsk) {
+ WARN_ON_ONCE(tsk == current);
+ wake_up_process(tsk);
+ }
mutex_unlock(&sqd->lock);
wait_for_completion(&sqd->exited);
}
@@ -270,7 +279,8 @@ static int io_sq_thread(void *data)
/* offload context creation failed, just exit */
if (!current->io_uring) {
mutex_lock(&sqd->lock);
- sqd->thread = NULL;
+ rcu_assign_pointer(sqd->thread, NULL);
+ put_task_struct(current);
mutex_unlock(&sqd->lock);
goto err_out;
}
@@ -379,7 +389,8 @@ static int io_sq_thread(void *data)
io_sq_tw(&retry_list, UINT_MAX);
io_uring_cancel_generic(true, sqd);
- sqd->thread = NULL;
+ rcu_assign_pointer(sqd->thread, NULL);
+ put_task_struct(current);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
io_run_task_work();
@@ -409,7 +420,6 @@ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
__cold int io_sq_offload_create(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
- struct task_struct *task_to_put = NULL;
int ret;
/* Retain compatibility with failing for an invalid attach attempt */
@@ -484,8 +494,11 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
goto err_sqpoll;
}
- sqd->thread = tsk;
- task_to_put = get_task_struct(tsk);
+ mutex_lock(&sqd->lock);
+ rcu_assign_pointer(sqd->thread, tsk);
+ mutex_unlock(&sqd->lock);
+
+ get_task_struct(tsk);
ret = io_uring_alloc_task_context(tsk, ctx);
wake_up_new_task(tsk);
if (ret)
@@ -495,16 +508,11 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
ret = -EINVAL;
goto err;
}
-
- if (task_to_put)
- put_task_struct(task_to_put);
return 0;
err_sqpoll:
complete(&ctx->sq_data->exited);
err:
io_sq_thread_finish(ctx);
- if (task_to_put)
- put_task_struct(task_to_put);
return ret;
}
@@ -515,10 +523,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
int ret = -EINVAL;
if (sqd) {
+ struct task_struct *tsk;
+
io_sq_thread_park(sqd);
/* Don't set affinity for a dying thread */
- if (sqd->thread)
- ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
+ tsk = sqpoll_task_locked(sqd);
+ if (tsk)
+ ret = io_wq_cpu_affinity(tsk->io_uring, mask);
io_sq_thread_unpark(sqd);
}
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index 4171666b1cf4..b83dcdec9765 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -8,7 +8,7 @@ struct io_sq_data {
/* ctx's that are using this sqd */
struct list_head ctx_list;
- struct task_struct *thread;
+ struct task_struct __rcu *thread;
struct wait_queue_head wait;
unsigned sq_thread_idle;
@@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
void io_put_sq_data(struct io_sq_data *sqd);
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
+
+static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
+{
+ return rcu_dereference_protected(sqd->thread,
+ lockdep_is_held(&sqd->lock));
+}
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 797247a34cb7..4a7011c799f0 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -76,6 +76,8 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
int dmabuf_fd = area_reg->dmabuf_fd;
int i, ret;
+ if (off)
+ return -EINVAL;
if (WARN_ON_ONCE(!ifq->dev))
return -EFAULT;
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
@@ -106,8 +108,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
for_each_sgtable_dma_sg(mem->sgt, sg, i)
total_size += sg_dma_len(sg);
- if (total_size < off + len)
- return -EINVAL;
+ if (total_size != len) {
+ ret = -EINVAL;
+ goto err;
+ }
mem->dmabuf_offset = off;
mem->size = len;
@@ -861,10 +865,7 @@ static int io_pp_zc_init(struct page_pool *pp)
static void io_pp_zc_destroy(struct page_pool *pp)
{
struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
- struct io_zcrx_area *area = ifq->area;
- if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs))
- return;
percpu_ref_put(&ifq->ctx->refs);
}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 82ed2d3c9846..093551fe66a7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -411,6 +411,7 @@ static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = MQUEUE_MAGIC;
sb->s_op = &mqueue_super_ops;
+ sb->s_d_flags = DCACHE_DONTCACHE;
inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
if (IS_ERR(inode))
@@ -482,7 +483,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
- mnt = fc_mount(fc);
+ mnt = fc_mount_longterm(fc);
put_fs_context(fc);
return mnt;
}
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index e64ce21f9a80..2ee603a98813 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -134,6 +134,7 @@ config CRASH_DM_CRYPT
depends on KEXEC_FILE
depends on CRASH_DUMP
depends on DM_CRYPT
+ depends on KEYS
help
With this option enabled, user space can intereact with
/sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f2f38903b2fe..b0eae2a3c895 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -668,12 +668,6 @@ int audit_remove_tree_rule(struct audit_krule *rule)
return 0;
}
-static int compare_root(struct vfsmount *mnt, void *arg)
-{
- return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
- (unsigned long)arg;
-}
-
void audit_trim_trees(void)
{
struct list_head cursor;
@@ -683,8 +677,9 @@ void audit_trim_trees(void)
while (cursor.next != &tree_list) {
struct audit_tree *tree;
struct path path;
- struct vfsmount *root_mnt;
struct audit_node *node;
+ struct path *paths;
+ struct path array[16];
int err;
tree = container_of(cursor.next, struct audit_tree, list);
@@ -696,9 +691,9 @@ void audit_trim_trees(void)
if (err)
goto skip_it;
- root_mnt = collect_mounts(&path);
+ paths = collect_paths(&path, array, 16);
path_put(&path);
- if (IS_ERR(root_mnt))
+ if (IS_ERR(paths))
goto skip_it;
spin_lock(&hash_lock);
@@ -706,14 +701,17 @@ void audit_trim_trees(void)
struct audit_chunk *chunk = find_chunk(node);
/* this could be NULL if the watch is dying else where... */
node->index |= 1U<<31;
- if (iterate_mounts(compare_root,
- (void *)(chunk->key),
- root_mnt))
- node->index &= ~(1U<<31);
+ for (struct path *p = paths; p->dentry; p++) {
+ struct inode *inode = p->dentry->d_inode;
+ if (inode_to_key(inode) == chunk->key) {
+ node->index &= ~(1U<<31);
+ break;
+ }
+ }
}
spin_unlock(&hash_lock);
trim_marked(tree);
- drop_collected_mounts(root_mnt);
+ drop_collected_paths(paths, array);
skip_it:
put_tree(tree);
mutex_lock(&audit_filter_mutex);
@@ -742,9 +740,14 @@ void audit_put_tree(struct audit_tree *tree)
put_tree(tree);
}
-static int tag_mount(struct vfsmount *mnt, void *arg)
+static int tag_mounts(struct path *paths, struct audit_tree *tree)
{
- return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
+ for (struct path *p = paths; p->dentry; p++) {
+ int err = tag_chunk(p->dentry->d_inode, tree);
+ if (err)
+ return err;
+ }
+ return 0;
}
/*
@@ -801,7 +804,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
{
struct audit_tree *seed = rule->tree, *tree;
struct path path;
- struct vfsmount *mnt;
+ struct path array[16];
+ struct path *paths;
int err;
rule->tree = NULL;
@@ -828,16 +832,16 @@ int audit_add_tree_rule(struct audit_krule *rule)
err = kern_path(tree->pathname, 0, &path);
if (err)
goto Err;
- mnt = collect_mounts(&path);
+ paths = collect_paths(&path, array, 16);
path_put(&path);
- if (IS_ERR(mnt)) {
- err = PTR_ERR(mnt);
+ if (IS_ERR(paths)) {
+ err = PTR_ERR(paths);
goto Err;
}
get_tree(tree);
- err = iterate_mounts(tag_mount, tree, mnt);
- drop_collected_mounts(mnt);
+ err = tag_mounts(paths, tree);
+ drop_collected_paths(paths, array);
if (!err) {
struct audit_node *node;
@@ -872,20 +876,21 @@ int audit_tag_tree(char *old, char *new)
struct list_head cursor, barrier;
int failed = 0;
struct path path1, path2;
- struct vfsmount *tagged;
+ struct path array[16];
+ struct path *paths;
int err;
err = kern_path(new, 0, &path2);
if (err)
return err;
- tagged = collect_mounts(&path2);
+ paths = collect_paths(&path2, array, 16);
path_put(&path2);
- if (IS_ERR(tagged))
- return PTR_ERR(tagged);
+ if (IS_ERR(paths))
+ return PTR_ERR(paths);
err = kern_path(old, 0, &path1);
if (err) {
- drop_collected_mounts(tagged);
+ drop_collected_paths(paths, array);
return err;
}
@@ -914,7 +919,7 @@ int audit_tag_tree(char *old, char *new)
continue;
}
- failed = iterate_mounts(tag_mount, tree, tagged);
+ failed = tag_mounts(paths, tree);
if (failed) {
put_tree(tree);
mutex_lock(&audit_filter_mutex);
@@ -955,7 +960,7 @@ int audit_tag_tree(char *old, char *new)
list_del(&cursor);
mutex_unlock(&audit_filter_mutex);
path_put(&path1);
- drop_collected_mounts(tagged);
+ drop_collected_paths(paths, array);
return failed;
}
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 3dabdd137d10..2d6e1c98d8ad 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
list) {
__bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
BPF_LRU_LOCAL_LIST_T_FREE);
- if (++nfree == LOCAL_FREE_TARGET)
+ if (++nfree == lru->target_free)
break;
}
- if (nfree < LOCAL_FREE_TARGET)
- __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
+ if (nfree < lru->target_free)
+ __bpf_lru_list_shrink(lru, l, lru->target_free - nfree,
local_free_list(loc_l),
BPF_LRU_LOCAL_LIST_T_FREE);
@@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
buf += elem_size;
}
+
+ lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2,
+ 1, LOCAL_FREE_TARGET);
}
static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index cbd8d3720c2b..fe2661a58ea9 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -58,6 +58,7 @@ struct bpf_lru {
del_from_htab_func del_from_htab;
void *del_arg;
unsigned int hash_offset;
+ unsigned int target_free;
unsigned int nr_scans;
bool percpu;
};
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9122c39870bf..f4885514f007 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -2134,7 +2134,7 @@ static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b71e428ad936..ad6df48b540c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -884,6 +884,13 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
if (fmt[i] == 'p') {
sizeof_cur_arg = sizeof(long);
+ if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+ ispunct(fmt[i + 1])) {
+ if (tmp_buf)
+ cur_arg = raw_args[num_spec];
+ goto nocopy_fmt;
+ }
+
if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
fmt[i + 2] == 's') {
fmt_ptype = fmt[i + 1];
@@ -891,11 +898,9 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
goto fmt_str;
}
- if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
- ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+ if (fmt[i + 1] == 'K' ||
fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
fmt[i + 1] == 'S') {
- /* just kernel pointers */
if (tmp_buf)
cur_arg = raw_args[num_spec];
i++;
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 941d0d2427e3..8e61dc555415 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -21,7 +21,7 @@ static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
{
unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
size_t vm_size = vma->vm_end - vma->vm_start;
- phys_addr_t addr = virt_to_phys(__start_BTF);
+ phys_addr_t addr = __pa_symbol(__start_BTF);
unsigned long pfn = addr >> PAGE_SHIFT;
if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a7d6e0c5928b..169845710c7e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7027,8 +7027,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) {
struct inode *f_inode;
};
-BTF_TYPE_SAFE_TRUSTED(struct dentry) {
- /* no negative dentry-s in places where bpf can see it */
+BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
struct inode *d_inode;
};
@@ -7066,7 +7065,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env,
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
}
@@ -7076,6 +7074,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
const char *field_name, u32 btf_id)
{
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
"__safe_trusted_or_null");
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 039d1eb2f215..dd9417425d92 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -66,15 +66,9 @@ static struct freezer *parent_freezer(struct freezer *freezer)
bool cgroup_freezing(struct task_struct *task)
{
bool ret;
- unsigned int state;
rcu_read_lock();
- /* Check if the cgroup is still FREEZING, but not FROZEN. The extra
- * !FROZEN check is required, because the FREEZING bit is not cleared
- * when the state FROZEN is reached.
- */
- state = task_freezer(task)->state;
- ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN);
+ ret = task_freezer(task)->state & CGROUP_FREEZING;
rcu_read_unlock();
return ret;
@@ -188,13 +182,12 @@ static void freezer_attach(struct cgroup_taskset *tset)
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
- freeze_task(task);
-
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN;
freezer = parent_freezer(freezer);
}
+ freeze_task(task);
}
}
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 8df0dfaaca18..67af8a55185d 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -222,7 +222,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
if (size_cmdline != -1) {
selected_size = size_cmdline;
selected_base = base_cmdline;
- selected_limit = min_not_zero(limit_cmdline, limit);
+
+ /* Hornor the user setup dma address limit */
+ selected_limit = limit_cmdline ?: limit;
+
if (base_cmdline + size_cmdline == limit_cmdline)
fixed = true;
} else {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f34c99f8ce8f..22fdf0c187cd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -207,6 +207,19 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
__perf_ctx_unlock(&cpuctx->ctx);
}
+typedef struct {
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+} class_perf_ctx_lock_t;
+
+static inline void class_perf_ctx_lock_destructor(class_perf_ctx_lock_t *_T)
+{ perf_ctx_unlock(_T->cpuctx, _T->ctx); }
+
+static inline class_perf_ctx_lock_t
+class_perf_ctx_lock_constructor(struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx)
+{ perf_ctx_lock(cpuctx, ctx); return (class_perf_ctx_lock_t){ cpuctx, ctx }; }
+
#define TASK_TOMBSTONE ((void *)-1L)
static bool is_kernel_event(struct perf_event *event)
@@ -938,13 +951,19 @@ static void perf_cgroup_switch(struct task_struct *task)
if (READ_ONCE(cpuctx->cgrp) == NULL)
return;
- WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
-
cgrp = perf_cgroup_from_task(task, NULL);
if (READ_ONCE(cpuctx->cgrp) == cgrp)
return;
- perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+ guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
+ /*
+ * Re-check, could've raced vs perf_remove_from_context().
+ */
+ if (READ_ONCE(cpuctx->cgrp) == NULL)
+ return;
+
+ WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
perf_ctx_disable(&cpuctx->ctx, true);
ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
@@ -962,7 +981,6 @@ static void perf_cgroup_switch(struct task_struct *task)
ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
perf_ctx_enable(&cpuctx->ctx, true);
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
static int perf_cgroup_ensure_storage(struct perf_event *event,
@@ -2120,18 +2138,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
if (event->group_leader == event)
del_event_from_groups(event, ctx);
- /*
- * If event was in error state, then keep it
- * that way, otherwise bogus counts will be
- * returned on read(). The only way to get out
- * of error state is by explicit re-enabling
- * of the event
- */
- if (event->state > PERF_EVENT_STATE_OFF) {
- perf_cgroup_event_disable(event, ctx);
- perf_event_set_state(event, PERF_EVENT_STATE_OFF);
- }
-
ctx->generation++;
event->pmu_ctx->nr_events--;
}
@@ -2149,8 +2155,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
}
static void put_event(struct perf_event *event);
-static void event_sched_out(struct perf_event *event,
- struct perf_event_context *ctx);
+static void __event_disable(struct perf_event *event,
+ struct perf_event_context *ctx,
+ enum perf_event_state state);
static void perf_put_aux_event(struct perf_event *event)
{
@@ -2183,8 +2190,7 @@ static void perf_put_aux_event(struct perf_event *event)
* state so that we don't try to schedule it again. Note
* that perf_event_enable() will clear the ERROR status.
*/
- event_sched_out(iter, ctx);
- perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+ __event_disable(iter, ctx, PERF_EVENT_STATE_ERROR);
}
}
@@ -2242,18 +2248,6 @@ static inline struct list_head *get_event_list(struct perf_event *event)
&event->pmu_ctx->flexible_active;
}
-/*
- * Events that have PERF_EV_CAP_SIBLING require being part of a group and
- * cannot exist on their own, schedule them out and move them into the ERROR
- * state. Also see _perf_event_enable(), it will not be able to recover
- * this ERROR state.
- */
-static inline void perf_remove_sibling_event(struct perf_event *event)
-{
- event_sched_out(event, event->ctx);
- perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
-}
-
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *leader = event->group_leader;
@@ -2289,8 +2283,15 @@ static void perf_group_detach(struct perf_event *event)
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
+ /*
+ * Events that have PERF_EV_CAP_SIBLING require being part of
+ * a group and cannot exist on their own, schedule them out
+ * and move them into the ERROR state. Also see
+ * _perf_event_enable(), it will not be able to recover this
+ * ERROR state.
+ */
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
- perf_remove_sibling_event(sibling);
+ __event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR);
sibling->group_leader = sibling;
list_del_init(&sibling->sibling_list);
@@ -2493,11 +2494,14 @@ __perf_remove_from_context(struct perf_event *event,
state = PERF_EVENT_STATE_EXIT;
if (flags & DETACH_REVOKE)
state = PERF_EVENT_STATE_REVOKED;
- if (flags & DETACH_DEAD) {
- event->pending_disable = 1;
+ if (flags & DETACH_DEAD)
state = PERF_EVENT_STATE_DEAD;
- }
+
event_sched_out(event, ctx);
+
+ if (event->state > PERF_EVENT_STATE_OFF)
+ perf_cgroup_event_disable(event, ctx);
+
perf_event_set_state(event, min(event->state, state));
if (flags & DETACH_GROUP)
@@ -2562,6 +2566,15 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
event_function_call(event, __perf_remove_from_context, (void *)flags);
}
+static void __event_disable(struct perf_event *event,
+ struct perf_event_context *ctx,
+ enum perf_event_state state)
+{
+ event_sched_out(event, ctx);
+ perf_cgroup_event_disable(event, ctx);
+ perf_event_set_state(event, state);
+}
+
/*
* Cross CPU call to disable a performance event
*/
@@ -2576,13 +2589,18 @@ static void __perf_event_disable(struct perf_event *event,
perf_pmu_disable(event->pmu_ctx->pmu);
ctx_time_update_event(ctx, event);
+ /*
+ * When disabling a group leader, the whole group becomes ineligible
+ * to run, so schedule out the full group.
+ */
if (event == event->group_leader)
group_sched_out(event, ctx);
- else
- event_sched_out(event, ctx);
- perf_event_set_state(event, PERF_EVENT_STATE_OFF);
- perf_cgroup_event_disable(event, ctx);
+ /*
+ * But only mark the leader OFF; the siblings will remain
+ * INACTIVE.
+ */
+ __event_disable(event, ctx, PERF_EVENT_STATE_OFF);
perf_pmu_enable(event->pmu_ctx->pmu);
}
@@ -2656,8 +2674,8 @@ static void perf_event_unthrottle(struct perf_event *event, bool start)
static void perf_event_throttle(struct perf_event *event)
{
- event->pmu->stop(event, 0);
event->hw.interrupts = MAX_INTERRUPTS;
+ event->pmu->stop(event, 0);
if (event == event->group_leader)
perf_log_throttle(event, 0);
}
@@ -7186,18 +7204,18 @@ void perf_event_wakeup(struct perf_event *event)
static void perf_sigtrap(struct perf_event *event)
{
/*
- * We'd expect this to only occur if the irq_work is delayed and either
- * ctx->task or current has changed in the meantime. This can be the
- * case on architectures that do not implement arch_irq_work_raise().
+ * Both perf_pending_task() and perf_pending_irq() can race with the
+ * task exiting.
*/
- if (WARN_ON_ONCE(event->ctx->task != current))
+ if (current->flags & PF_EXITING)
return;
/*
- * Both perf_pending_task() and perf_pending_irq() can race with the
- * task exiting.
+ * We'd expect this to only occur if the irq_work is delayed and either
+ * ctx->task or current has changed in the meantime. This can be the
+ * case on architectures that do not implement arch_irq_work_raise().
*/
- if (current->flags & PF_EXITING)
+ if (WARN_ON_ONCE(event->ctx->task != current))
return;
send_sig_perf((void __user *)event->pending_addr,
@@ -7233,15 +7251,15 @@ static void __perf_pending_disable(struct perf_event *event)
* CPU-A CPU-B
*
* perf_event_disable_inatomic()
- * @pending_disable = CPU-A;
+ * @pending_disable = 1;
* irq_work_queue();
*
* sched-out
- * @pending_disable = -1;
+ * @pending_disable = 0;
*
* sched-in
* perf_event_disable_inatomic()
- * @pending_disable = CPU-B;
+ * @pending_disable = 1;
* irq_work_queue(); // FAILS
*
* irq_work_run()
@@ -7439,6 +7457,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size,
if (!regs)
return 0;
+ /* No mm, no stack, no dump. */
+ if (!current->mm)
+ return 0;
+
/*
* Check if we fit in with the requested stack size into the:
* - TASK_SIZE
@@ -8150,6 +8172,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
const u32 max_stack = event->attr.sample_max_stack;
struct perf_callchain_entry *callchain;
+ if (!current->mm)
+ user = false;
+
if (!kernel && !user)
return &__empty_callchain;
@@ -11091,7 +11116,7 @@ static int perf_uprobe_event_init(struct perf_event *event)
if (event->attr.type != perf_uprobe.type)
return -ENOENT;
- if (!perfmon_capable())
+ if (!capable(CAP_SYS_ADMIN))
return -EACCES;
/*
@@ -11749,7 +11774,12 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- if (is_sampling_event(event)) {
+ /*
+ * The throttle can be triggered in the hrtimer handler.
+ * The HRTIMER_NORESTART should be used to stop the timer,
+ * rather than hrtimer_cancel(). See perf_swevent_hrtimer()
+ */
+ if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) {
ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
local64_set(&hwc->period_left, ktime_to_ns(remaining));
@@ -11804,7 +11834,8 @@ static void cpu_clock_event_start(struct perf_event *event, int flags)
static void cpu_clock_event_stop(struct perf_event *event, int flags)
{
perf_swevent_cancel_hrtimer(event);
- cpu_clock_event_update(event);
+ if (flags & PERF_EF_UPDATE)
+ cpu_clock_event_update(event);
}
static int cpu_clock_event_add(struct perf_event *event, int flags)
@@ -11882,7 +11913,8 @@ static void task_clock_event_start(struct perf_event *event, int flags)
static void task_clock_event_stop(struct perf_event *event, int flags)
{
perf_swevent_cancel_hrtimer(event);
- task_clock_event_update(event, event->ctx->time);
+ if (flags & PERF_EF_UPDATE)
+ task_clock_event_update(event, event->ctx->time);
}
static int task_clock_event_add(struct perf_event *event, int flags)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index d2aef87c7e9f..aa9a759e824f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -441,7 +441,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
* store that will be enabled on successful return
*/
if (!handle->size) { /* A, matches D */
- event->pending_disable = smp_processor_id();
+ perf_event_disable_inatomic(handle->event);
perf_output_wakeup(handle);
WRITE_ONCE(rb->aux_nest, 0);
goto err_put;
@@ -526,7 +526,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
if (wakeup) {
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
- handle->event->pending_disable = smp_processor_id();
+ perf_event_disable_inatomic(handle->event);
perf_output_wakeup(handle);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index bd743900354c..bb184a67ac73 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -940,6 +940,15 @@ void __noreturn do_exit(long code)
taskstats_exit(tsk, group_dead);
trace_sched_process_exit(tsk, group_dead);
+ /*
+ * Since sampling can touch ->mm, make sure to stop everything before we
+ * tear it down.
+ *
+ * Also flushes inherited counters to the parent - before the parent
+ * gets woken up by child-exit notifications.
+ */
+ perf_event_exit_task(tsk);
+
exit_mm();
if (group_dead)
@@ -955,14 +964,6 @@ void __noreturn do_exit(long code)
exit_task_work(tsk);
exit_thread(tsk);
- /*
- * Flush inherited counters to the parent - before the parent
- * gets woken up by child-exit notifications.
- *
- * because of cgroup mode, must be called before cgroup_exit()
- */
- perf_event_exit_task(tsk);
-
sched_autogroup_exit_task(tsk);
cgroup_exit(tsk);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 8d530d0949ff..6a96149aede9 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -201,18 +201,9 @@ static int __restore_freezer_state(struct task_struct *p, void *arg)
void __thaw_task(struct task_struct *p)
{
- unsigned long flags;
-
- spin_lock_irqsave(&freezer_lock, flags);
- if (WARN_ON_ONCE(freezing(p)))
- goto unlock;
-
- if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL))
- goto unlock;
-
- wake_up_state(p, TASK_FROZEN);
-unlock:
- spin_unlock_irqrestore(&freezer_lock, flags);
+ guard(spinlock_irqsave)(&freezer_lock);
+ if (frozen(p) && !task_call_func(p, __restore_freezer_state, NULL))
+ wake_up_state(p, TASK_FROZEN);
}
/**
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 565f9717c6ca..90d53fb0ee9e 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -583,8 +583,8 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
if (futex_get_value(&node, naddr))
return -EFAULT;
- if (node != FUTEX_NO_NODE &&
- (node >= MAX_NUMNODES || !node_possible(node)))
+ if ((node != FUTEX_NO_NODE) &&
+ ((unsigned int)node >= MAX_NUMNODES || !node_possible(node)))
return -EINVAL;
}
@@ -1629,6 +1629,16 @@ again:
mm->futex_phash_new = NULL;
if (fph) {
+ if (cur && (!cur->hash_mask || cur->immutable)) {
+ /*
+ * If two threads simultaneously request the global
+ * hash then the first one performs the switch,
+ * the second one returns here.
+ */
+ free = fph;
+ mm->futex_phash_new = new;
+ return -EBUSY;
+ }
if (cur && !new) {
/*
* If we have an existing hash, but do not yet have
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b0e0a7332993..2b274007e8ba 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -205,6 +205,14 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
void irq_startup_managed(struct irq_desc *desc)
{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+
+ /*
+ * Clear managed-shutdown flag, so we don't repeat managed-startup for
+ * multiple hotplugs, and cause imbalanced disable depth.
+ */
+ irqd_clr_managed_shutdown(d);
+
/*
* Only start it up when the disable depth is 1, so that a disable,
* hotunplug, hotplug sequence does not end up enabling it during
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index f07529ae4895..755346ea9819 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -210,13 +210,6 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
!irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
return;
- /*
- * Don't restore suspended interrupts here when a system comes back
- * from S3. They are reenabled via resume_device_irqs().
- */
- if (desc->istate & IRQS_SUSPENDED)
- return;
-
if (irqd_is_managed_and_shutdown(data))
irq_startup_managed(desc);
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 1a3d483548e2..ae4c9cbd1b4b 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -202,7 +202,7 @@ struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode,
void *data)
{
struct irq_sim_work_ctx *work_ctx __free(kfree) =
- kmalloc(sizeof(*work_ctx), GFP_KERNEL);
+ kzalloc(sizeof(*work_ctx), GFP_KERNEL);
if (!work_ctx)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 9c59fa480b0b..3a9a9f240dbc 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1136,6 +1136,7 @@ int kernel_kexec(void)
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
Resume_console:
+ pm_restore_gfp_mask();
console_resume_all();
thaw_processes();
Restore_console:
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 69b953551677..5a21dbe17950 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -164,11 +164,21 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
}
/* almost as free_reserved_page(), just don't free the page */
-static void kho_restore_page(struct page *page)
+static void kho_restore_page(struct page *page, unsigned int order)
{
- ClearPageReserved(page);
- init_page_count(page);
- adjust_managed_page_count(page, 1);
+ unsigned int nr_pages = (1 << order);
+
+ /* Head page gets refcount of 1. */
+ set_page_count(page, 1);
+
+ /* For higher order folios, tail pages get a page count of zero. */
+ for (unsigned int i = 1; i < nr_pages; i++)
+ set_page_count(page + i, 0);
+
+ if (order > 0)
+ prep_compound_page(page, order);
+
+ adjust_managed_page_count(page, nr_pages);
}
/**
@@ -186,15 +196,10 @@ struct folio *kho_restore_folio(phys_addr_t phys)
return NULL;
order = page->private;
- if (order) {
- if (order > MAX_PAGE_ORDER)
- return NULL;
-
- prep_compound_page(page, order);
- } else {
- kho_restore_page(page);
- }
+ if (order > MAX_PAGE_ORDER)
+ return NULL;
+ kho_restore_page(page, order);
return page_folio(page);
}
EXPORT_SYMBOL_GPL(kho_restore_folio);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 413ac6ea3702..c2c08007029d 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1573,8 +1573,14 @@ static int apply_relocations(struct module *mod, const struct load_info *info)
if (infosec >= info->hdr->e_shnum)
continue;
- /* Don't bother with non-allocated sections */
- if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
+ /*
+ * Don't bother with non-allocated sections.
+ * An exception is the percpu section, which has separate allocations
+ * for individual CPUs. We relocate the percpu section in the initial
+ * ELF template and subsequently copy it to the per-CPU destinations.
+ */
+ if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC) &&
+ (!infosec || infosec != info->index.pcpu))
continue;
if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH)
@@ -2696,9 +2702,8 @@ static int find_module_sections(struct module *mod, struct load_info *info)
static int move_module(struct module *mod, struct load_info *info)
{
- int i;
- enum mod_mem_type t = 0;
- int ret = -ENOMEM;
+ int i, ret;
+ enum mod_mem_type t = MOD_MEM_NUM_TYPES;
bool codetag_section_found = false;
for_each_mod_mem_type(type) {
@@ -2776,7 +2781,7 @@ static int move_module(struct module *mod, struct load_info *info)
return 0;
out_err:
module_memory_restore_rox(mod);
- for (t--; t >= 0; t--)
+ while (t--)
module_memory_free(mod, t);
if (codetag_section_found)
codetag_free_module_sections(mod);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 519fb09de5e0..9216e3b91d3b 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode)
}
console_suspend_all();
- pm_restrict_gfp_mask();
error = dpm_suspend(PMSG_FREEZE);
@@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode)
pm_prepare_console();
console_suspend_all();
- pm_restrict_gfp_mask();
error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
error = resume_target_kernel(platform_mode);
@@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode)
BUG_ON(!error);
}
dpm_resume_end(PMSG_RECOVER);
- pm_restore_gfp_mask();
console_resume_all();
pm_restore_console();
return error;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index cb1d71562002..7ccd709af93f 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {}
/* kernel/power/main.c */
extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down);
extern int pm_notifier_call_chain(unsigned long val);
-void pm_restrict_gfp_mask(void);
-void pm_restore_gfp_mask(void);
-#else
-static inline void pm_restrict_gfp_mask(void) {}
-static inline void pm_restore_gfp_mask(void) {}
#endif
#ifdef CONFIG_HIGHMEM
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 76b141b9aac0..b4ca17c2fecf 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -384,6 +384,7 @@ static int suspend_prepare(suspend_state_t state)
return 0;
dpm_save_failed_step(SUSPEND_FREEZE);
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_SUSPEND);
Restore:
pm_restore_console();
@@ -592,8 +593,6 @@ static int enter_state(suspend_state_t state)
ksys_sync_helper();
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
- if (filesystem_freeze_enabled)
- filesystems_freeze();
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
@@ -606,16 +605,13 @@ static int enter_state(suspend_state_t state)
trace_suspend_resume(TPS("suspend_enter"), state, false);
pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
- pm_restrict_gfp_mask();
error = suspend_devices_and_enter(state);
- pm_restore_gfp_mask();
Finish:
events_check_enabled = false;
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
- filesystems_thaw();
mutex_unlock(&system_transition_mutex);
return error;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e8a4b720d7d2..14d4499c6fc3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3072,6 +3072,10 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
/* Misaligned rcu_head! */
WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
+ /* Avoid NULL dereference if callback is NULL. */
+ if (WARN_ON_ONCE(!func))
+ return;
+
if (debug_rcu_head_queue(head)) {
/*
* Probable double call_rcu(), so leak the callback.
diff --git a/kernel/resource.c b/kernel/resource.c
index 8d3e6ed0bdc1..f9bb5481501a 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1279,8 +1279,9 @@ static int __request_region_locked(struct resource *res, struct resource *parent
* become unavailable to other users. Conflicts are
* not expected. Warn to aid debugging if encountered.
*/
- if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
- pr_warn("Unaddressable device %s %pR conflicts with %pR",
+ if (parent == &iomem_resource &&
+ conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
+ pr_warn("Unaddressable device %s %pR conflicts with %pR\n",
conflict->name, conflict, res);
}
if (conflict != parent) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dce50fa57471..81c6df746df1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu)
{
- __schedstat_inc(p->stats.numa_task_swapped);
- count_vm_numa_event(NUMA_TASK_SWAP);
- count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
-
if (task_on_rq_queued(p)) {
struct rq *src_rq, *dst_rq;
struct rq_flags srf, drf;
@@ -3943,6 +3939,11 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
if (!scx_allow_ttwu_queue(p))
return false;
+#ifdef CONFIG_SMP
+ if (p->sched_class == &stop_sched_class)
+ return false;
+#endif
+
/*
* Do not complicate things with the async wake_list while the CPU is
* in hotplug state.
@@ -7663,7 +7664,7 @@ const char *preempt_model_str(void)
if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) {
seq_buf_printf(&s, "(%s)%s",
- preempt_dynamic_mode > 0 ?
+ preempt_dynamic_mode >= 0 ?
preempt_modes[preempt_dynamic_mode] : "undef",
brace ? "}" : "");
return seq_buf_str(&s);
@@ -7934,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;
- __schedstat_inc(p->stats.numa_task_migrated);
- count_vm_numa_event(NUMA_TASK_MIGRATE);
- count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
+ /* TODO: This is not properly updating schedstats */
+
trace_sched_move_numa(p, curr_cpu, target_cpu);
return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
}
@@ -8545,7 +8545,7 @@ void __init sched_init(void)
init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_EXT_GROUP_SCHED
- root_task_group.scx_weight = CGROUP_WEIGHT_DFL;
+ scx_tg_init(&root_task_group);
#endif /* CONFIG_EXT_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -8985,7 +8985,7 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent))
goto err;
- scx_group_set_weight(tg, CGROUP_WEIGHT_DFL);
+ scx_tg_init(tg);
alloc_uclamp_sched_group(tg, parent);
return tg;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ad45a8fea245..89019a140826 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1504,7 +1504,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
if (dl_entity_is_special(dl_se))
return;
- scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
+ scaled_delta_exec = delta_exec;
+ if (!dl_server(dl_se))
+ scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
dl_se->runtime -= scaled_delta_exec;
@@ -1611,7 +1613,7 @@ throttle:
*/
void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
{
- s64 delta_exec, scaled_delta_exec;
+ s64 delta_exec;
if (!rq->fair_server.dl_defer)
return;
@@ -1624,9 +1626,7 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
if (delta_exec < 0)
return;
- scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec);
-
- rq->fair_server.runtime -= scaled_delta_exec;
+ rq->fair_server.runtime -= delta_exec;
if (rq->fair_server.runtime < 0) {
rq->fair_server.dl_defer_running = 0;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9d71baf08075..557246880a7e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P_SCHEDSTAT(nr_failed_migrations_running);
P_SCHEDSTAT(nr_failed_migrations_hot);
P_SCHEDSTAT(nr_forced_migrations);
-#ifdef CONFIG_NUMA_BALANCING
- P_SCHEDSTAT(numa_task_migrated);
- P_SCHEDSTAT(numa_task_swapped);
-#endif
P_SCHEDSTAT(nr_wakeups);
P_SCHEDSTAT(nr_wakeups_sync);
P_SCHEDSTAT(nr_wakeups_migrate);
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2c41c78be61e..7dd5cbcb7a06 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1272,7 +1272,8 @@ static inline struct rq *scx_locked_rq(void)
#define SCX_CALL_OP(sch, mask, op, rq, args...) \
do { \
- update_locked_rq(rq); \
+ if (rq) \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
(sch)->ops.op(args); \
@@ -1280,14 +1281,16 @@ do { \
} else { \
(sch)->ops.op(args); \
} \
- update_locked_rq(NULL); \
+ if (rq) \
+ update_locked_rq(NULL); \
} while (0)
#define SCX_CALL_OP_RET(sch, mask, op, rq, args...) \
({ \
__typeof__((sch)->ops.op(args)) __ret; \
\
- update_locked_rq(rq); \
+ if (rq) \
+ update_locked_rq(rq); \
if (mask) { \
scx_kf_allow(mask); \
__ret = (sch)->ops.op(args); \
@@ -1295,7 +1298,8 @@ do { \
} else { \
__ret = (sch)->ops.op(args); \
} \
- update_locked_rq(NULL); \
+ if (rq) \
+ update_locked_rq(NULL); \
__ret; \
})
@@ -4092,6 +4096,11 @@ bool scx_can_stop_tick(struct rq *rq)
DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem);
static bool scx_cgroup_enabled;
+void scx_tg_init(struct task_group *tg)
+{
+ tg->scx_weight = CGROUP_WEIGHT_DFL;
+}
+
int scx_tg_online(struct task_group *tg)
{
struct scx_sched *sch = scx_root;
@@ -4241,12 +4250,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
percpu_down_read(&scx_cgroup_rwsem);
- if (scx_cgroup_enabled && tg->scx_weight != weight) {
- if (SCX_HAS_OP(sch, cgroup_set_weight))
- SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
- tg_cgrp(tg), weight);
- tg->scx_weight = weight;
- }
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
+ tg->scx_weight != weight)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
+ tg_cgrp(tg), weight);
+
+ tg->scx_weight = weight;
percpu_up_read(&scx_cgroup_rwsem);
}
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 6e5072f57771..a75835c23f15 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -79,6 +79,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
#ifdef CONFIG_CGROUP_SCHED
#ifdef CONFIG_EXT_GROUP_SCHED
+void scx_tg_init(struct task_group *tg);
int scx_tg_online(struct task_group *tg);
void scx_tg_offline(struct task_group *tg);
int scx_cgroup_can_attach(struct cgroup_taskset *tset);
@@ -88,6 +89,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
void scx_group_set_idle(struct task_group *tg, bool idle);
#else /* CONFIG_EXT_GROUP_SCHED */
+static inline void scx_tg_init(struct task_group *tg) {}
static inline int scx_tg_online(struct task_group *tg) { return 0; }
static inline void scx_tg_offline(struct task_group *tg) {}
static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; }
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 6d29d3cbc670..001fb88a8481 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -903,7 +903,7 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
* selection optimizations and simply check whether the previously
* used CPU is idle and within the allowed cpumask.
*/
- if (p->nr_cpus_allowed == 1) {
+ if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) {
if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) &&
scx_idle_test_and_clear_cpu(prev_cpu))
cpu = prev_cpu;
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index c48900b856a2..52ca8e268cfc 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -80,7 +80,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
long nr_active, delta = 0;
nr_active = this_rq->nr_running - adjust;
- nr_active += (int)this_rq->nr_uninterruptible;
+ nr_active += (long)this_rq->nr_uninterruptible;
if (nr_active != this_rq->calc_load_active) {
delta = nr_active - this_rq->calc_load_active;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 475bb5998295..83e3aa917142 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1149,7 +1149,7 @@ struct rq {
* one CPU and if it got migrated afterwards it may decrease
* it on another CPU. Always updated under the runqueue lock:
*/
- unsigned int nr_uninterruptible;
+ unsigned long nr_uninterruptible;
union {
struct task_struct __rcu *donor; /* Scheduler context */
diff --git a/kernel/signal.c b/kernel/signal.c
index 148082db9a55..e2c928de7d2c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3016,7 +3016,7 @@ relock:
* first and our do_group_exit call below will use
* that value and ignore the one we pass it.
*/
- do_coredump(&ksig->info);
+ vfs_coredump(&ksig->info);
}
/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 5d2d0562115b..3fe6b0c99f3d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -82,18 +82,15 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
- struct cpu_stop_work *work,
- struct wake_q_head *wakeq)
+ struct cpu_stop_work *work)
{
list_add_tail(&work->list, &stopper->works);
- wake_q_add(wakeq, stopper->thread);
}
/* queue @work to @stopper. if offline, @work is completed immediately */
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
- DEFINE_WAKE_Q(wakeq);
unsigned long flags;
bool enabled;
@@ -101,12 +98,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
raw_spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled;
if (enabled)
- __cpu_stop_queue_work(stopper, work, &wakeq);
+ __cpu_stop_queue_work(stopper, work);
else if (work->done)
cpu_stop_signal_done(work->done);
raw_spin_unlock_irqrestore(&stopper->lock, flags);
- wake_up_q(&wakeq);
+ if (enabled)
+ wake_up_process(stopper->thread);
preempt_enable();
return enabled;
@@ -264,7 +262,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
{
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
- DEFINE_WAKE_Q(wakeq);
int err;
retry:
@@ -300,8 +297,8 @@ retry:
}
err = 0;
- __cpu_stop_queue_work(stopper1, work1, &wakeq);
- __cpu_stop_queue_work(stopper2, work2, &wakeq);
+ __cpu_stop_queue_work(stopper1, work1);
+ __cpu_stop_queue_work(stopper2, work2);
unlock:
raw_spin_unlock(&stopper2->lock);
@@ -316,7 +313,10 @@ unlock:
goto retry;
}
- wake_up_q(&wakeq);
+ if (!err) {
+ wake_up_process(stopper1->thread);
+ wake_up_process(stopper2->thread);
+ }
preempt_enable();
return err;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 50e8d04ab661..2e5b89d7d866 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1406,6 +1406,15 @@ void run_posix_cpu_timers(void)
lockdep_assert_irqs_disabled();
/*
+ * Ensure that release_task(tsk) can't happen while
+ * handle_posix_cpu_timers() is running. Otherwise, a concurrent
+ * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and
+ * miss timer->it.cpu.firing != 0.
+ */
+ if (tsk->exit_state)
+ return;
+
+ /*
* If the actual expiry is deferred to task work context and the
* work is already scheduled there is no point to do anything here.
*/
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a009c91f7b05..83c65f3afcca 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1256,7 +1256,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
struct system_time_snapshot *history_begin,
struct system_device_crosststamp *xtstamp)
{
- struct system_counterval_t system_counterval;
+ struct system_counterval_t system_counterval = {};
struct timekeeper *tk = &tk_core.timekeeper;
u64 cycles, now, interval_start;
unsigned int clock_was_set_seq = 0;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 120531268abf..d01e5c910ce1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3136,7 +3136,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
if (ret < 0)
return ret;
+ down_write(&trace_event_sem);
list_add(&call->list, &ftrace_events);
+ up_write(&trace_event_sem);
+
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
@@ -3750,6 +3753,8 @@ __trace_add_event_dirs(struct trace_array *tr)
struct trace_event_call *call;
int ret;
+ lockdep_assert_held(&trace_event_sem);
+
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index ea8b364b6818..3885aadc434d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1436,15 +1436,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
INIT_LIST_HEAD(&head->list);
- item = kmalloc(sizeof(*item), GFP_KERNEL);
- if (!item) {
- kfree(head);
- goto free_now;
- }
-
- item->filter = filter;
- list_add_tail(&item->list, &head->list);
-
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
@@ -1456,6 +1447,13 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
event_clear_filter(file);
}
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ goto free_now;
+
+ item->filter = filter;
+ list_add_tail(&item->list, &head->list);
+
delay_free_filter(head);
return;
free_now:
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 9234e2c39abf..14d74a7491b8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -455,10 +455,16 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
+static struct tracer graph_trace;
+
static int ftrace_graph_trace_args(struct trace_array *tr, int set)
{
trace_func_graph_ent_t entry;
+ /* Do nothing if the current tracer is not this tracer */
+ if (tr->current_trace != &graph_trace)
+ return 0;
+
if (set)
entry = trace_graph_entry_args;
else
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 6819b93309ce..fd259da0aa64 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -637,8 +637,8 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u
entry = ring_buffer_event_data(event);
- memcpy(&entry->caller, fstack->calls, size);
entry->size = fstack->nr_entries;
+ memcpy(&entry->caller, fstack->calls, size);
trace_buffer_unlock_commit_nostack(buffer, event);
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 424751cdf31f..40830a3ecd96 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -657,7 +657,7 @@ static int parse_btf_arg(char *varname,
ret = query_btf_context(ctx);
if (ret < 0 || ctx->nr_params == 0) {
trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
- return PTR_ERR(params);
+ return -ENOENT;
}
}
params = ctx->params;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 97f37b5bae66..9f9148075828 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -7767,7 +7767,8 @@ void __init workqueue_init_early(void)
restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask);
-
+ cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask,
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs();
diff --git a/lib/Kconfig b/lib/Kconfig
index 6c1b8f184267..37db228f70a9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -716,6 +716,7 @@ config GENERIC_LIB_DEVMEM_IS_ALLOWED
config PLDMFW
bool
+ select CRC32
default n
config ASN1_ENCODER
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index d48b80f3f007..0142bc916f73 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -10,6 +10,7 @@
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
#define ALLOCINFO_FILE_NAME "allocinfo"
#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
@@ -134,6 +135,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
struct codetag_bytes n;
unsigned int i, nr = 0;
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return 0;
+
if (can_sleep)
codetag_lock_module_list(alloc_tag_cttype, true);
else if (!codetag_trylock_module_list(alloc_tag_cttype))
@@ -632,8 +636,13 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag
mod->name);
return -ENOMEM;
}
- }
+ /*
+ * Avoid a kmemleak false positive. The pointer to the counters is stored
+ * in the alloc_tag section of the module and cannot be directly accessed.
+ */
+ kmemleak_ignore_percpu(tag->counters);
+ }
return 0;
}
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 3e79283b617d..b0c0f8aea269 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -35,6 +35,10 @@ obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o
libcurve25519-generic-y := curve25519-fiat32.o
libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
libcurve25519-generic-y += curve25519-generic.o
+# clang versions prior to 18 may blow out the stack with KASAN
+ifeq ($(call clang-min-version, 180000),)
+KASAN_SANITIZE_curve25519-hacl64.o := n
+endif
obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o
libcurve25519-y += curve25519.o
@@ -62,7 +66,7 @@ libsha256-generic-y := sha256-generic.o
obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_CRYPTO_SELFTESTS) += simd.o
+obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o
obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o
libsm3-y := sm3.o
diff --git a/lib/crypto/aescfb.c b/lib/crypto/aescfb.c
index 437613265e14..2f09ae92ffa0 100644
--- a/lib/crypto/aescfb.c
+++ b/lib/crypto/aescfb.c
@@ -106,11 +106,11 @@ MODULE_LICENSE("GPL");
*/
static struct {
- u8 ptext[64];
- u8 ctext[64];
+ u8 ptext[64] __nonstring;
+ u8 ctext[64] __nonstring;
- u8 key[AES_MAX_KEY_SIZE];
- u8 iv[AES_BLOCK_SIZE];
+ u8 key[AES_MAX_KEY_SIZE] __nonstring;
+ u8 iv[AES_BLOCK_SIZE] __nonstring;
int klen;
int len;
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
index 277824d6b4af..faa4dee9bb1b 100644
--- a/lib/crypto/aesgcm.c
+++ b/lib/crypto/aesgcm.c
@@ -205,19 +205,19 @@ MODULE_LICENSE("GPL");
* Test code below. Vectors taken from crypto/testmgr.h
*/
-static const u8 __initconst ctext0[16] =
+static const u8 __initconst ctext0[16] __nonstring =
"\x58\xe2\xfc\xce\xfa\x7e\x30\x61"
"\x36\x7f\x1d\x57\xa4\xe7\x45\x5a";
static const u8 __initconst ptext1[16];
-static const u8 __initconst ctext1[32] =
+static const u8 __initconst ctext1[32] __nonstring =
"\x03\x88\xda\xce\x60\xb6\xa3\x92"
"\xf3\x28\xc2\xb9\x71\xb2\xfe\x78"
"\xab\x6e\x47\xd4\x2c\xec\x13\xbd"
"\xf5\x3a\x67\xb2\x12\x57\xbd\xdf";
-static const u8 __initconst ptext2[64] =
+static const u8 __initconst ptext2[64] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -227,7 +227,7 @@ static const u8 __initconst ptext2[64] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
-static const u8 __initconst ctext2[80] =
+static const u8 __initconst ctext2[80] __nonstring =
"\x42\x83\x1e\xc2\x21\x77\x74\x24"
"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
@@ -239,7 +239,7 @@ static const u8 __initconst ctext2[80] =
"\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6"
"\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4";
-static const u8 __initconst ptext3[60] =
+static const u8 __initconst ptext3[60] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -249,7 +249,7 @@ static const u8 __initconst ptext3[60] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39";
-static const u8 __initconst ctext3[76] =
+static const u8 __initconst ctext3[76] __nonstring =
"\x42\x83\x1e\xc2\x21\x77\x74\x24"
"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
@@ -261,17 +261,17 @@ static const u8 __initconst ctext3[76] =
"\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb"
"\x94\xfa\xe9\x5a\xe7\x12\x1a\x47";
-static const u8 __initconst ctext4[16] =
+static const u8 __initconst ctext4[16] __nonstring =
"\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b"
"\xa0\x0e\xd1\xf3\x12\x57\x24\x35";
-static const u8 __initconst ctext5[32] =
+static const u8 __initconst ctext5[32] __nonstring =
"\x98\xe7\x24\x7c\x07\xf0\xfe\x41"
"\x1c\x26\x7e\x43\x84\xb0\xf6\x00"
"\x2f\xf5\x8d\x80\x03\x39\x27\xab"
"\x8e\xf4\xd4\x58\x75\x14\xf0\xfb";
-static const u8 __initconst ptext6[64] =
+static const u8 __initconst ptext6[64] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -281,7 +281,7 @@ static const u8 __initconst ptext6[64] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
-static const u8 __initconst ctext6[80] =
+static const u8 __initconst ctext6[80] __nonstring =
"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
@@ -293,17 +293,17 @@ static const u8 __initconst ctext6[80] =
"\x99\x24\xa7\xc8\x58\x73\x36\xbf"
"\xb1\x18\x02\x4d\xb8\x67\x4a\x14";
-static const u8 __initconst ctext7[16] =
+static const u8 __initconst ctext7[16] __nonstring =
"\x53\x0f\x8a\xfb\xc7\x45\x36\xb9"
"\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b";
-static const u8 __initconst ctext8[32] =
+static const u8 __initconst ctext8[32] __nonstring =
"\xce\xa7\x40\x3d\x4d\x60\x6b\x6e"
"\x07\x4e\xc5\xd3\xba\xf3\x9d\x18"
"\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0"
"\x26\x5b\x98\xb5\xd4\x8a\xb9\x19";
-static const u8 __initconst ptext9[64] =
+static const u8 __initconst ptext9[64] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -313,7 +313,7 @@ static const u8 __initconst ptext9[64] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
-static const u8 __initconst ctext9[80] =
+static const u8 __initconst ctext9[80] __nonstring =
"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
@@ -325,7 +325,7 @@ static const u8 __initconst ctext9[80] =
"\xb0\x94\xda\xc5\xd9\x34\x71\xbd"
"\xec\x1a\x50\x22\x70\xe3\xcc\x6c";
-static const u8 __initconst ptext10[60] =
+static const u8 __initconst ptext10[60] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -335,7 +335,7 @@ static const u8 __initconst ptext10[60] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39";
-static const u8 __initconst ctext10[76] =
+static const u8 __initconst ctext10[76] __nonstring =
"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
@@ -347,7 +347,7 @@ static const u8 __initconst ctext10[76] =
"\x76\xfc\x6e\xce\x0f\x4e\x17\x68"
"\xcd\xdf\x88\x53\xbb\x2d\x55\x1b";
-static const u8 __initconst ptext11[60] =
+static const u8 __initconst ptext11[60] __nonstring =
"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
@@ -357,7 +357,7 @@ static const u8 __initconst ptext11[60] =
"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
"\xba\x63\x7b\x39";
-static const u8 __initconst ctext11[76] =
+static const u8 __initconst ctext11[76] __nonstring =
"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
@@ -369,7 +369,7 @@ static const u8 __initconst ctext11[76] =
"\x25\x19\x49\x8e\x80\xf1\x47\x8f"
"\x37\xba\x55\xbd\x6d\x27\x61\x8c";
-static const u8 __initconst ptext12[719] =
+static const u8 __initconst ptext12[719] __nonstring =
"\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
"\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
"\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
@@ -461,7 +461,7 @@ static const u8 __initconst ptext12[719] =
"\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
"\xa4\x78\xdb\x74\x3d\x8b\xb5";
-static const u8 __initconst ctext12[735] =
+static const u8 __initconst ctext12[735] __nonstring =
"\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20"
"\xbb\xb1\x12\x7f\x41\xea\xb3\xc0"
"\xa2\xb4\x37\x19\x11\x58\xb6\x0b"
@@ -559,9 +559,9 @@ static struct {
const u8 *ptext;
const u8 *ctext;
- u8 key[AES_MAX_KEY_SIZE];
- u8 iv[GCM_AES_IV_SIZE];
- u8 assoc[20];
+ u8 key[AES_MAX_KEY_SIZE] __nonstring;
+ u8 iv[GCM_AES_IV_SIZE] __nonstring;
+ u8 assoc[20] __nonstring;
int klen;
int clen;
diff --git a/lib/group_cpus.c b/lib/group_cpus.c
index ee272c4cefcc..18d43a406114 100644
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -352,6 +352,9 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
int ret = -ENOMEM;
struct cpumask *masks = NULL;
+ if (numgrps == 0)
+ return NULL;
+
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
@@ -426,8 +429,12 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
#else /* CONFIG_SMP */
struct cpumask *group_cpus_evenly(unsigned int numgrps)
{
- struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
+ struct cpumask *masks;
+ if (numgrps == 0)
+ return NULL;
+
+ masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
if (!masks)
return NULL;
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index affe979bd14d..ef66be963798 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5319,6 +5319,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
struct maple_enode *start;
if (mte_is_leaf(enode)) {
+ mte_set_node_dead(enode);
node->type = mte_node_type(enode);
goto free_leaf;
}
@@ -5527,8 +5528,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
mas->store_type = mas_wr_store_type(&wr_mas);
request = mas_prealloc_calc(&wr_mas, entry);
if (!request)
- return ret;
+ goto set_flag;
+ mas->mas_flags &= ~MA_STATE_PREALLOC;
mas_node_count_gfp(mas, request, gfp);
if (mas_is_err(mas)) {
mas_set_alloc_req(mas, 0);
@@ -5538,6 +5540,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
return ret;
}
+set_flag:
mas->mas_flags |= MA_STATE_PREALLOC;
return ret;
}
diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c
index f0887344b274..7d82efa5b14f 100644
--- a/lib/raid6/rvv.c
+++ b/lib/raid6/rvv.c
@@ -26,9 +26,9 @@ static int rvv_has_vector(void)
static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
- unsigned long d;
- int z, z0;
u8 *p, *q;
+ unsigned long vl, d;
+ int z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0 + 1]; /* XOR parity */
@@ -36,8 +36,9 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
@@ -99,7 +100,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
- unsigned long d;
+ unsigned long vl, d;
int z, z0;
z0 = stop; /* P/Q right side optimization */
@@ -108,8 +109,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
@@ -195,9 +197,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
- unsigned long d;
- int z, z0;
u8 *p, *q;
+ unsigned long vl, d;
+ int z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0 + 1]; /* XOR parity */
@@ -205,8 +207,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
@@ -287,7 +290,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
- unsigned long d;
+ unsigned long vl, d;
int z, z0;
z0 = stop; /* P/Q right side optimization */
@@ -296,8 +299,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
@@ -413,9 +417,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
- unsigned long d;
- int z, z0;
u8 *p, *q;
+ unsigned long vl, d;
+ int z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0 + 1]; /* XOR parity */
@@ -423,8 +427,9 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
@@ -539,7 +544,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
- unsigned long d;
+ unsigned long vl, d;
int z, z0;
z0 = stop; /* P/Q right side optimization */
@@ -548,8 +553,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
@@ -721,9 +727,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
- unsigned long d;
- int z, z0;
u8 *p, *q;
+ unsigned long vl, d;
+ int z, z0;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0 + 1]; /* XOR parity */
@@ -731,8 +737,9 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
@@ -915,7 +922,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
- unsigned long d;
+ unsigned long vl, d;
int z, z0;
z0 = stop; /* P/Q right side optimization */
@@ -924,8 +931,9 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
asm volatile (".option push\n"
".option arch,+v\n"
- "vsetvli t0, x0, e8, m1, ta, ma\n"
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
".option pop\n"
+ : "=&r" (vl)
);
/*
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7582dfab7fe3..4af1c8b0775a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len);
* Should only be used casually, it (currently) scans the entire list
* to get the last entry.
*
- * Note that the @sgl@ pointer passed in need not be the first one,
- * the important bit is that @nents@ denotes the number of entries that
- * exist from @sgl@.
+ * Note that the @sgl pointer passed in need not be the first one,
+ * the important bit is that @nents denotes the number of entries that
+ * exist from @sgl.
*
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table);
* @gfp_mask: GFP allocation mask
*
* Description:
- * Allocate and initialize an sg table. If @nents@ is larger than
+ * Allocate and initialize an sg table. If @nents is larger than
* SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
*
**/
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index d34df4306b87..222b39fc2629 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints,
int err;
stats = objagg_hints_stats_get(objagg_hints);
- if (IS_ERR(stats))
+ if (IS_ERR(stats)) {
+ *errmsg = "objagg_hints_stats_get() failed.";
return PTR_ERR(stats);
+ }
err = __check_expect_stats(stats, expect_stats, errmsg);
objagg_stats_put(stats);
return err;
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index 551745df011b..c93d0c56b963 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -4,7 +4,6 @@ menu "Data Access Monitoring"
config DAMON
bool "DAMON: Data Access Monitoring Framework"
- default y
help
This builds a framework that allows kernel subsystems to monitor
access frequency of each memory region. The information can be useful
diff --git a/mm/damon/core.c b/mm/damon/core.c
index b217e0120e09..339116ea30e3 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -754,6 +754,19 @@ static struct damos_quota_goal *damos_nth_quota_goal(
return NULL;
}
+static void damos_commit_quota_goal_union(
+ struct damos_quota_goal *dst, struct damos_quota_goal *src)
+{
+ switch (dst->metric) {
+ case DAMOS_QUOTA_NODE_MEM_USED_BP:
+ case DAMOS_QUOTA_NODE_MEM_FREE_BP:
+ dst->nid = src->nid;
+ break;
+ default:
+ break;
+ }
+}
+
static void damos_commit_quota_goal(
struct damos_quota_goal *dst, struct damos_quota_goal *src)
{
@@ -762,6 +775,7 @@ static void damos_commit_quota_goal(
if (dst->metric == DAMOS_QUOTA_USER_INPUT)
dst->current_value = src->current_value;
/* keep last_psi_total as is, since it will be updated in next cycle */
+ damos_commit_quota_goal_union(dst, src);
}
/**
@@ -795,6 +809,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src)
src_goal->metric, src_goal->target_value);
if (!new_goal)
return -ENOMEM;
+ damos_commit_quota_goal_union(new_goal, src_goal);
damos_add_quota_goal(dst, new_goal);
}
return 0;
@@ -1449,6 +1464,7 @@ static unsigned long damon_get_intervals_score(struct damon_ctx *c)
}
}
target_access_events = max_access_events * goal_bp / 10000;
+ target_access_events = target_access_events ? : 1;
return access_events * 10000 / target_access_events;
}
@@ -2355,9 +2371,8 @@ static void kdamond_usleep(unsigned long usecs)
*
* If there is a &struct damon_call_control request that registered via
* &damon_call() on @ctx, do or cancel the invocation of the function depending
- * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS
- * watermarks, or the kdamond is already out of the main loop and therefore
- * will be terminated.
+ * on @cancel. @cancel is set when the kdamond is already out of the main loop
+ * and therefore will be terminated.
*/
static void kdamond_call(struct damon_ctx *ctx, bool cancel)
{
@@ -2405,7 +2420,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
if (ctx->callback.after_wmarks_check &&
ctx->callback.after_wmarks_check(ctx))
break;
- kdamond_call(ctx, true);
+ kdamond_call(ctx, false);
damos_walk_cancel(ctx);
}
return -EBUSY;
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 0f6c9e1fec0b..30ae7518ffbf 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -472,6 +472,7 @@ static ssize_t memcg_path_store(struct kobject *kobj,
return -ENOMEM;
strscpy(path, buf, count + 1);
+ kfree(filter->memcg_path);
filter->memcg_path = path;
return count;
}
diff --git a/mm/execmem.c b/mm/execmem.c
index 9720ac2dfa41..2b683e7d864d 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -254,34 +254,6 @@ out_unlock:
return ptr;
}
-static bool execmem_cache_rox = false;
-
-void execmem_cache_make_ro(void)
-{
- struct maple_tree *free_areas = &execmem_cache.free_areas;
- struct maple_tree *busy_areas = &execmem_cache.busy_areas;
- MA_STATE(mas_free, free_areas, 0, ULONG_MAX);
- MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX);
- struct mutex *mutex = &execmem_cache.mutex;
- void *area;
-
- execmem_cache_rox = true;
-
- mutex_lock(mutex);
-
- mas_for_each(&mas_free, area, ULONG_MAX) {
- unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT;
- set_memory_ro(mas_free.index, pages);
- }
-
- mas_for_each(&mas_busy, area, ULONG_MAX) {
- unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT;
- set_memory_ro(mas_busy.index, pages);
- }
-
- mutex_unlock(mutex);
-}
-
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
@@ -302,15 +274,9 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
- if (execmem_cache_rox) {
- err = set_memory_rox((unsigned long)p, vm->nr_pages);
- if (err)
- goto err_free_mem;
- } else {
- err = set_memory_x((unsigned long)p, vm->nr_pages);
- if (err)
- goto err_free_mem;
- }
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
+ if (err)
+ goto err_free_mem;
err = execmem_cache_add(p, alloc_size);
if (err)
diff --git a/mm/filemap.c b/mm/filemap.c
index bada249b9fb7..ba089d75fc86 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4109,7 +4109,7 @@ retry:
break;
}
- status = a_ops->write_begin(file, mapping, pos, bytes,
+ status = a_ops->write_begin(iocb, mapping, pos, bytes,
&folio, &fsdata);
if (unlikely(status < 0))
break;
@@ -4130,7 +4130,7 @@ retry:
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
flush_dcache_folio(folio);
- status = a_ops->write_end(file, mapping, pos, bytes, copied,
+ status = a_ops->write_end(iocb, mapping, pos, bytes, copied,
folio, fsdata);
if (unlikely(status != copied)) {
iov_iter_revert(i, copied - max(status, 0L));
diff --git a/mm/gup.c b/mm/gup.c
index e065a49842a8..3c39cbbeebef 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2303,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
/*
* Returns the number of collected folios. Return value is always >= 0.
*/
-static void collect_longterm_unpinnable_folios(
+static unsigned long collect_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
+ unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;
- unsigned long i;
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);
@@ -2321,6 +2321,8 @@ static void collect_longterm_unpinnable_folios(
if (folio_is_longterm_pinnable(folio))
continue;
+ collected++;
+
if (folio_is_device_coherent(folio))
continue;
@@ -2342,6 +2344,8 @@ static void collect_longterm_unpinnable_folios(
NR_ISOLATED_ANON + folio_is_file_lru(folio),
folio_nr_pages(folio));
}
+
+ return collected;
}
/*
@@ -2418,9 +2422,11 @@ static long
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
{
LIST_HEAD(movable_folio_list);
+ unsigned long collected;
- collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
- if (list_empty(&movable_folio_list))
+ collected = collect_longterm_unpinnable_folios(&movable_folio_list,
+ pofs);
+ if (!collected)
return 0;
return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8746ed2fec13..a0d285d20992 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2340,12 +2340,15 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
struct folio *folio;
spin_lock_irq(&hugetlb_lock);
+ if (!h->resv_huge_pages) {
+ spin_unlock_irq(&hugetlb_lock);
+ return NULL;
+ }
+
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid,
nmask);
- if (folio) {
- VM_BUG_ON(!h->resv_huge_pages);
+ if (folio)
h->resv_huge_pages--;
- }
spin_unlock_irq(&hugetlb_lock);
return folio;
@@ -2787,20 +2790,24 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
/*
* alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
* the old one
- * @h: struct hstate old page belongs to
* @old_folio: Old folio to dissolve
* @list: List to isolate the page in case we need to
* Returns 0 on success, otherwise negated error.
*/
-static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
- struct folio *old_folio, struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
+ struct list_head *list)
{
- gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
+ gfp_t gfp_mask;
+ struct hstate *h;
int nid = folio_nid(old_folio);
struct folio *new_folio = NULL;
int ret = 0;
retry:
+ /*
+ * The old_folio might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ */
spin_lock_irq(&hugetlb_lock);
if (!folio_test_hugetlb(old_folio)) {
/*
@@ -2829,8 +2836,10 @@ retry:
cond_resched();
goto retry;
} else {
+ h = folio_hstate(old_folio);
if (!new_folio) {
spin_unlock_irq(&hugetlb_lock);
+ gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
NULL, NULL);
if (!new_folio)
@@ -2874,35 +2883,24 @@ free_new:
int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
{
- struct hstate *h;
int ret = -EBUSY;
- /*
- * The page might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- * Return success when racing as if we dissolved the page ourselves.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (!folio_test_hugetlb(folio))
return 0;
- }
- spin_unlock_irq(&hugetlb_lock);
/*
* Fence off gigantic pages as there is a cyclic dependency between
* alloc_contig_range and them. Return -ENOMEM as this has the effect
* of bailing out right away without further retrying.
*/
- if (hstate_is_gigantic(h))
+ if (folio_order(folio) > MAX_PAGE_ORDER)
return -ENOMEM;
if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list))
ret = 0;
else if (!folio_ref_count(folio))
- ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
+ ret = alloc_and_dissolve_hugetlb_folio(folio, list);
return ret;
}
@@ -2916,7 +2914,6 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list)
*/
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
{
- struct hstate *h;
struct folio *folio;
int ret = 0;
@@ -2925,23 +2922,9 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
while (start_pfn < end_pfn) {
folio = pfn_folio(start_pfn);
- /*
- * The folio might have been dissolved from under our feet, so make sure
- * to carefully check the state under the lock.
- */
- spin_lock_irq(&hugetlb_lock);
- if (folio_test_hugetlb(folio)) {
- h = folio_hstate(folio);
- } else {
- spin_unlock_irq(&hugetlb_lock);
- start_pfn++;
- continue;
- }
- spin_unlock_irq(&hugetlb_lock);
-
- if (!folio_ref_count(folio)) {
- ret = alloc_and_dissolve_hugetlb_folio(h, folio,
- &isolate_list);
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) {
+ ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list);
if (ret)
break;
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 8357e1a33699..62c01b4527eb 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -370,36 +370,6 @@ static inline bool init_task_stack_addr(const void *addr)
sizeof(init_thread_union.stack));
}
-/*
- * This function is invoked with report_lock (a raw_spinlock) held. A
- * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping
- * rt_spinlock.
- *
- * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a
- * lockdep warning for this raw_spinlock -> spinlock dependency. This config
- * option is enabled by default to ensure better test coverage to expose this
- * kind of RT kernel problem. This lockdep splat, however, can be suppressed
- * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the
- * invalid PREEMPT_RT case has been taken care of.
- */
-static inline struct vm_struct *kasan_find_vm_area(void *addr)
-{
- static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP);
- struct vm_struct *va;
-
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- return NULL;
-
- /*
- * Suppress lockdep warning and fetch vmalloc area of the
- * offending address.
- */
- lock_map_acquire_try(&vmalloc_map);
- va = find_vm_area(addr);
- lock_map_release(&vmalloc_map);
- return va;
-}
-
static void print_address_description(void *addr, u8 tag,
struct kasan_report_info *info)
{
@@ -429,19 +399,10 @@ static void print_address_description(void *addr, u8 tag,
}
if (is_vmalloc_addr(addr)) {
- struct vm_struct *va = kasan_find_vm_area(addr);
-
- if (va) {
- pr_err("The buggy address belongs to the virtual mapping at\n"
- " [%px, %px) created by:\n"
- " %pS\n",
- va->addr, va->addr + va->size, va->caller);
- pr_err("\n");
-
- page = vmalloc_to_page(addr);
- } else {
- pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr);
- }
+ pr_err("The buggy address belongs to a");
+ if (!vmalloc_dump_obj(addr))
+ pr_cont(" vmalloc virtual mapping\n");
+ page = vmalloc_to_page(addr);
}
if (page) {
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index da9cee34ee1b..8d588e685311 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1247,6 +1247,20 @@ void __ref kmemleak_transient_leak(const void *ptr)
EXPORT_SYMBOL(kmemleak_transient_leak);
/**
+ * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu
+ * address argument
+ * @ptr: percpu address of the object
+ */
+void __ref kmemleak_ignore_percpu(const void __percpu *ptr)
+{
+ pr_debug("%s(0x%px)\n", __func__, ptr);
+
+ if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr))
+ make_black_object((unsigned long)ptr, OBJECT_PERCPU);
+}
+EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu);
+
+/**
* kmemleak_ignore - ignore an allocated object
* @ptr: pointer to beginning of the object
*
diff --git a/mm/ksm.c b/mm/ksm.c
index 8583fb91ef13..a9d3e719e089 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -3669,10 +3669,10 @@ static ssize_t advisor_mode_show(struct kobject *kobj,
{
const char *output;
- if (ksm_advisor == KSM_ADVISOR_NONE)
- output = "[none] scan-time";
- else if (ksm_advisor == KSM_ADVISOR_SCAN_TIME)
+ if (ksm_advisor == KSM_ADVISOR_SCAN_TIME)
output = "none [scan-time]";
+ else
+ output = "[none] scan-time";
return sysfs_emit(buf, "%s\n", output);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f7a66a1617e..1d44a35ae85c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -508,6 +508,7 @@ restart:
pte_offset_map_lock(mm, pmd, addr, &ptl);
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
@@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
start_pte = pte;
if (!start_pte)
break;
+ flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
if (!err)
nr = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 902da8a9c643..70fdeda1120b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = {
NUMA_PAGE_MIGRATE,
NUMA_PTE_UPDATES,
NUMA_HINT_FAULTS,
- NUMA_TASK_MIGRATE,
- NUMA_TASK_SWAP,
#endif
};
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b91a33fb6c69..225dddff091d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1561,6 +1561,10 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
return ret;
}
+/*
+ * The caller must guarantee the folio isn't large folio, except hugetlb.
+ * try_to_unmap() can't handle it.
+ */
int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
diff --git a/mm/memory.c b/mm/memory.c
index 8eba595056fe..b0cda5aab398 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf)
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
-{
- struct swap_info_struct *si = swp_swap_info(entry);
- pgoff_t offset = swp_offset(entry);
- int i;
-
- /*
- * While allocating a large folio and doing swap_read_folio, which is
- * the case the being faulted pte doesn't have swapcache. We need to
- * ensure all PTEs have no cache as well, otherwise, we might go to
- * swap devices while the content is in swapcache.
- */
- for (i = 0; i < max_nr; i++) {
- if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
- return i;
- }
-
- return i;
-}
-
/*
* Check if the PTEs within a range are contiguous swap entries
* and have consistent swapcache, zeromap.
diff --git a/mm/migrate.c b/mm/migrate.c
index 8cf0f9c9599d..2c88f3b33833 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2399,6 +2399,7 @@ set_status:
static int get_compat_pages_array(const void __user *chunk_pages[],
const void __user * __user *pages,
+ unsigned long chunk_offset,
unsigned long chunk_nr)
{
compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
@@ -2406,7 +2407,7 @@ static int get_compat_pages_array(const void __user *chunk_pages[],
int i;
for (i = 0; i < chunk_nr; i++) {
- if (get_user(p, pages32 + i))
+ if (get_user(p, pages32 + chunk_offset + i))
return -EFAULT;
chunk_pages[i] = compat_ptr(p);
}
@@ -2425,27 +2426,28 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
#define DO_PAGES_STAT_CHUNK_NR 16UL
const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
int chunk_status[DO_PAGES_STAT_CHUNK_NR];
+ unsigned long chunk_offset = 0;
while (nr_pages) {
unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR);
if (in_compat_syscall()) {
if (get_compat_pages_array(chunk_pages, pages,
- chunk_nr))
+ chunk_offset, chunk_nr))
break;
} else {
- if (copy_from_user(chunk_pages, pages,
+ if (copy_from_user(chunk_pages, pages + chunk_offset,
chunk_nr * sizeof(*chunk_pages)))
break;
}
do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
- if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
+ if (copy_to_user(status + chunk_offset, chunk_status,
+ chunk_nr * sizeof(*status)))
break;
- pages += chunk_nr;
- status += chunk_nr;
+ chunk_offset += chunk_nr;
nr_pages -= chunk_nr;
}
return nr_pages ? -EFAULT : 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index fb63d9256f09..1320b88fab74 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
#endif
}
-/* We support batch unmapping of PTEs for lazyfree large folios */
-static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
- struct folio *folio, pte_t *ptep)
+static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
+ struct page_vma_mapped_walk *pvmw,
+ enum ttu_flags flags, pte_t pte)
{
const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
- int max_nr = folio_nr_pages(folio);
- pte_t pte = ptep_get(ptep);
+ unsigned long end_addr, addr = pvmw->address;
+ struct vm_area_struct *vma = pvmw->vma;
+ unsigned int max_nr;
+
+ if (flags & TTU_HWPOISON)
+ return 1;
+ if (!folio_test_large(folio))
+ return 1;
+ /* We may only batch within a single VMA and a single page table. */
+ end_addr = pmd_addr_end(addr, vma->vm_end);
+ max_nr = (end_addr - addr) >> PAGE_SHIFT;
+
+ /* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
- return false;
+ return 1;
if (pte_unused(pte))
- return false;
- if (pte_pfn(pte) != folio_pfn(folio))
- return false;
+ return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
- NULL, NULL) == max_nr;
+ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags,
+ NULL, NULL, NULL);
}
/*
@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_dirty(pteval))
folio_mark_dirty(folio);
} else if (likely(pte_present(pteval))) {
- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
- can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
- nr_pages = folio_nr_pages(folio);
+ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
end_addr = address + nr_pages * PAGE_SIZE;
flush_cache_range(vma, address, end_addr);
@@ -2206,13 +2213,16 @@ discard:
hugetlb_remove_rmap(folio);
} else {
folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
- folio_ref_sub(folio, nr_pages - 1);
}
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
- folio_put(folio);
- /* We have already batched the entire folio */
- if (nr_pages > 1)
+ folio_put_refs(folio, nr_pages);
+
+ /*
+ * If we are sure that we batched the entire folio and cleared
+ * all PTEs, we can just optimize and stop right here.
+ */
+ if (nr_pages == folio_nr_pages(folio))
goto walk_done;
continue;
walk_abort:
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 589b26c2d553..b7c5592d6711 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -195,20 +195,13 @@ static struct file *secretmem_file_create(unsigned long flags)
struct file *file;
struct inode *inode;
const char *anon_name = "[secretmem]";
- int err;
- inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
+ inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
if (IS_ERR(inode))
return ERR_CAST(inode);
- err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL);
- if (err) {
- file = ERR_PTR(err);
- goto err_free_inode;
- }
-
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
- O_RDWR, &secretmem_fops);
+ O_RDWR | O_LARGEFILE, &secretmem_fops);
if (IS_ERR(file))
goto err_free_inode;
@@ -222,6 +215,8 @@ static struct file *secretmem_file_create(unsigned long flags)
inode->i_mode |= S_IFREG;
inode->i_size = 0;
+ atomic_inc(&secretmem_users);
+
return file;
err_free_inode:
@@ -255,9 +250,6 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
goto err_put_fd;
}
- file->f_flags |= O_LARGEFILE;
-
- atomic_inc(&secretmem_users);
fd_install(fd, file);
return fd;
@@ -268,7 +260,15 @@ err_put_fd:
static int secretmem_init_fs_context(struct fs_context *fc)
{
- return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
+ struct pseudo_fs_context *ctx;
+
+ ctx = init_pseudo(fc, SECRETMEM_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->s_iflags |= SB_I_NOEXEC;
+ fc->s_iflags |= SB_I_NODEV;
+ return 0;
}
static struct file_system_type secretmem_fs = {
@@ -286,9 +286,6 @@ static int __init secretmem_init(void)
if (IS_ERR(secretmem_mnt))
return PTR_ERR(secretmem_mnt);
- /* prevent secretmem mappings from ever getting PROT_EXEC */
- secretmem_mnt->mnt_flags |= MNT_NOEXEC;
-
return 0;
}
fs_initcall(secretmem_init);
diff --git a/mm/shmem.c b/mm/shmem.c
index 0c5fb4ffa03a..457355cda4a9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
folio = swap_cache_get_folio(swap, NULL, 0);
order = xa_get_order(&mapping->i_pages, index);
if (!folio) {
+ int nr_pages = 1 << order;
bool fallback_order0 = false;
/* Or update major stats only when swapin succeeds?? */
@@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
* If uffd is active for the vma, we need per-page fault
* fidelity to maintain the uffd semantics, then fallback
* to swapin order-0 folio, as well as for zswap case.
+ * Any existing sub folio in the swap cache also blocks
+ * mTHP swapin.
*/
if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) ||
- !zswap_never_enabled()))
+ !zswap_never_enabled() ||
+ non_swapcache_batch(swap, nr_pages) != nr_pages))
fallback_order0 = true;
/* Skip swapcache for synchronous device. */
@@ -3266,9 +3270,9 @@ static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;
static int
-shmem_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
+shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -3300,9 +3304,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
}
static int
-shmem_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
+shmem_write_end(const struct kiocb *iocb, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
@@ -4980,7 +4984,6 @@ static void shmem_put_super(struct super_block *sb)
static const struct dentry_operations shmem_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
.d_compare = generic_ci_d_compare,
- .d_delete = always_delete_dentry,
};
#endif
@@ -5028,7 +5031,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
if (ctx->encoding) {
sb->s_encoding = ctx->encoding;
- sb->s_d_op = &shmem_ci_dentry_ops;
+ set_default_d_op(sb, &shmem_ci_dentry_ops);
if (ctx->strict_encoding)
sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
}
@@ -5037,6 +5040,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#else
sb->s_flags |= SB_NOUSER;
#endif /* CONFIG_TMPFS */
+ sb->s_d_flags |= DCACHE_DONTCACHE;
sbinfo->max_blocks = ctx->blocks;
sbinfo->max_inodes = ctx->inodes;
sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
diff --git a/mm/swap.h b/mm/swap.h
index 2269eb9df0af..9096082a915e 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
return find_next_bit(sis->zeromap, end, start) - start;
}
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+ struct swap_info_struct *si = swp_swap_info(entry);
+ pgoff_t offset = swp_offset(entry);
+ int i;
+
+ /*
+ * While allocating a large folio and doing mTHP swapin, we need to
+ * ensure all entries are not cached, otherwise, the mTHP folio will
+ * be in conflict with the folio in swap cache.
+ */
+ for (i = 0; i < max_nr; i++) {
+ if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
+ return i;
+ }
+
+ return i;
+}
+
#else /* CONFIG_SWAP */
struct swap_iocb;
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
@@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
return 0;
}
+static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+{
+ return 0;
+}
#endif /* CONFIG_SWAP */
/**
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index bc473ad21202..8253978ee0fb 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
pte_t orig_dst_pte, pte_t orig_src_pte,
pmd_t *dst_pmd, pmd_t dst_pmdval,
spinlock_t *dst_ptl, spinlock_t *src_ptl,
- struct folio *src_folio)
+ struct folio *src_folio,
+ struct swap_info_struct *si, swp_entry_t entry)
{
+ /*
+ * Check if the folio still belongs to the target swap entry after
+ * acquiring the lock. Folio can be freed in the swap cache while
+ * not locked.
+ */
+ if (src_folio && unlikely(!folio_test_swapcache(src_folio) ||
+ entry.val != src_folio->swap.val))
+ return -EAGAIN;
+
double_pt_lock(dst_ptl, src_ptl);
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
@@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
if (src_folio) {
folio_move_anon_rmap(src_folio, dst_vma);
src_folio->index = linear_page_index(dst_vma, dst_addr);
+ } else {
+ /*
+ * Check if the swap entry is cached after acquiring the src_pte
+ * lock. Otherwise, we might miss a newly loaded swap cache folio.
+ *
+ * Check swap_map directly to minimize overhead, READ_ONCE is sufficient.
+ * We are trying to catch newly added swap cache, the only possible case is
+ * when a folio is swapped in and out again staying in swap cache, using the
+ * same entry before the PTE check above. The PTL is acquired and released
+ * twice, each time after updating the swap_map's flag. So holding
+ * the PTL here ensures we see the updated value. False positive is possible,
+ * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the
+ * cache, or during the tiny synchronization window between swap cache and
+ * swap_map, but it will be gone very quickly, worst result is retry jitters.
+ */
+ if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) {
+ double_pt_unlock(dst_ptl, src_ptl);
+ return -EAGAIN;
+ }
}
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
@@ -1412,7 +1441,7 @@ retry:
}
err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte,
orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval,
- dst_ptl, src_ptl, src_folio);
+ dst_ptl, src_ptl, src_folio, si, entry);
}
out:
diff --git a/mm/util.c b/mm/util.c
index 448117da071f..0b270c43d7d1 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio)
}
EXPORT_SYMBOL(flush_dcache_folio);
#endif
+
+/**
+ * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an
+ * existing VMA
+ * @file: The file which possesss an f_op->mmap_prepare() hook
+ * @vma: The VMA to apply the .mmap_prepare() hook to.
+ *
+ * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain
+ * 'wrapper' file systems invoke a nested mmap hook of an underlying file.
+ *
+ * Until all filesystems are converted to use .mmap_prepare(), we must be
+ * conservative and continue to invoke these 'wrapper' filesystems using the
+ * deprecated .mmap() hook.
+ *
+ * However we have a problem if the underlying file system possesses an
+ * .mmap_prepare() hook, as we are in a different context when we invoke the
+ * .mmap() hook, already having a VMA to deal with.
+ *
+ * compat_vma_mmap_prepare() is a compatibility function that takes VMA state,
+ * establishes a struct vm_area_desc descriptor, passes to the underlying
+ * .mmap_prepare() hook and applies any changes performed by it.
+ *
+ * Once the conversion of filesystems is complete this function will no longer
+ * be required and will be removed.
+ *
+ * Returns: 0 on success or error.
+ */
+int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma)
+{
+ struct vm_area_desc desc;
+ int err;
+
+ err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc));
+ if (err)
+ return err;
+ set_vma_from_desc(vma, &desc);
+
+ return 0;
+}
+EXPORT_SYMBOL(compat_vma_mmap_prepare);
diff --git a/mm/vma.c b/mm/vma.c
index 726b2a31ce59..fef67a66a095 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -967,26 +967,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
err = dup_anon_vma(next, middle, &anon_dup);
}
- if (err)
+ if (err || commit_merge(vmg))
goto abort;
- err = commit_merge(vmg);
- if (err) {
- VM_WARN_ON(err != -ENOMEM);
-
- if (anon_dup)
- unlink_anon_vmas(anon_dup);
-
- /*
- * We've cleaned up any cloned anon_vma's, no VMAs have been
- * modified, no harm no foul if the user requests that we not
- * report this and just give up, leaving the VMAs unmerged.
- */
- if (!vmg->give_up_on_oom)
- vmg->state = VMA_MERGE_ERROR_NOMEM;
- return NULL;
- }
-
khugepaged_enter_vma(vmg->target, vmg->flags);
vmg->state = VMA_MERGE_SUCCESS;
return vmg->target;
@@ -995,6 +978,9 @@ abort:
vma_iter_set(vmg->vmi, start);
vma_iter_load(vmg->vmi);
+ if (anon_dup)
+ unlink_anon_vmas(anon_dup);
+
/*
* This means we have failed to clone anon_vma's correctly, but no
* actual changes to VMAs have occurred, so no harm no foul - if the
@@ -3127,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock)
return ret;
}
-
/* Insert vm structure into process list sorted by address
* and into the inode's i_mmap tree. If vm_file is non-NULL
* then i_mmap_rwsem is taken here.
diff --git a/mm/vma.h b/mm/vma.h
index 0db066e7a45d..f47112a352db 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
return 0;
}
+
+/*
+ * Temporary helper functions for file systems which wrap an invocation of
+ * f_op->mmap() but which might have an underlying file system which implements
+ * f_op->mmap_prepare().
+ */
+
+static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma,
+ struct vm_area_desc *desc)
+{
+ desc->mm = vma->vm_mm;
+ desc->start = vma->vm_start;
+ desc->end = vma->vm_end;
+
+ desc->pgoff = vma->vm_pgoff;
+ desc->file = vma->vm_file;
+ desc->vm_flags = vma->vm_flags;
+ desc->page_prot = vma->vm_page_prot;
+
+ desc->vm_ops = NULL;
+ desc->private_data = NULL;
+
+ return desc;
+}
+
+static inline void set_vma_from_desc(struct vm_area_struct *vma,
+ struct vm_area_desc *desc)
+{
+ /*
+ * Since we're invoking .mmap_prepare() despite having a partially
+ * established VMA, we must take care to handle setting fields
+ * correctly.
+ */
+
+ /* Mutable fields. Populated with initial state. */
+ vma->vm_pgoff = desc->pgoff;
+ if (vma->vm_file != desc->file)
+ vma_set_file(vma, desc->file);
+ if (vma->vm_flags != desc->vm_flags)
+ vm_flags_set(vma, desc->vm_flags);
+ vma->vm_page_prot = desc->page_prot;
+
+ /* User-defined fields. */
+ vma->vm_ops = desc->vm_ops;
+ vma->vm_private_data = desc->private_data;
+}
+
int
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm, unsigned long start,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab986dd09b6a..6dbcdceecae1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -514,6 +514,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
+ int err = 0;
pte_t *pte;
/*
@@ -530,12 +531,18 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
do {
struct page *page = pages[*nr];
- if (WARN_ON(!pte_none(ptep_get(pte))))
- return -EBUSY;
- if (WARN_ON(!page))
- return -ENOMEM;
- if (WARN_ON(!pfn_valid(page_to_pfn(page))))
- return -EINVAL;
+ if (WARN_ON(!pte_none(ptep_get(pte)))) {
+ err = -EBUSY;
+ break;
+ }
+ if (WARN_ON(!page)) {
+ err = -ENOMEM;
+ break;
+ }
+ if (WARN_ON(!pfn_valid(page_to_pfn(page)))) {
+ err = -EINVAL;
+ break;
+ }
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*nr)++;
@@ -543,7 +550,8 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
arch_leave_lazy_mmu_mode();
*mask |= PGTBL_PTE_MODIFIED;
- return 0;
+
+ return err;
}
static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f8dfd2864bbf..424412680cfc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1138,6 +1138,14 @@ retry:
goto keep;
if (folio_contain_hwpoisoned_page(folio)) {
+ /*
+ * unmap_poisoned_folio() can't handle large
+ * folio, just skip it. memory_failure() will
+ * handle it if the UCE is triggered again.
+ */
+ if (folio_test_large(folio))
+ goto keep_locked;
+
unmap_poisoned_folio(folio, folio_pfn(folio), false);
folio_unlock(folio);
folio_put(folio);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 429ae5339bfe..a78d70ddeacd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = {
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",
- "numa_task_migrated",
- "numa_task_swapped",
#endif
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 999b513c7fdf..f3e2215f95eb 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1043,6 +1043,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
if (!zspage)
return NULL;
+ if (!IS_ENABLED(CONFIG_COMPACTION))
+ gfp &= ~__GFP_MOVABLE;
+
zspage->magic = ZSPAGE_MAGIC;
zspage->pool = pool;
zspage->class = class->index;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 06908e37c3d9..9a6df8c1daf9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -357,6 +357,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
return err;
}
+static void vlan_vid0_add(struct net_device *dev)
+{
+ struct vlan_info *vlan_info;
+ int err;
+
+ if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ return;
+
+ pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name);
+
+ err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
+ if (err)
+ return;
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ vlan_info->auto_vid0 = true;
+}
+
+static void vlan_vid0_del(struct net_device *dev)
+{
+ struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info);
+
+ if (!vlan_info || !vlan_info->auto_vid0)
+ return;
+
+ vlan_info->auto_vid0 = false;
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+}
+
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
@@ -378,15 +407,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
return notifier_from_errno(err);
}
- if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- pr_info("adding VLAN 0 to HW filter on device %s\n",
- dev->name);
- vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
- }
- if (event == NETDEV_DOWN &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+ if (event == NETDEV_UP)
+ vlan_vid0_add(dev);
+ else if (event == NETDEV_DOWN)
+ vlan_vid0_del(dev);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5eaf38875554..c7ffe591d593 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,6 +33,7 @@ struct vlan_info {
struct vlan_group grp;
struct list_head vid_list;
unsigned int nr_vids;
+ bool auto_vid0;
struct rcu_head rcu;
};
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 9c787e2e4b17..4744e3fd4544 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -35,6 +35,7 @@
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/etherdevice.h>
+#include <linux/refcount.h>
int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME;
int sysctl_aarp_tick_time = AARP_TICK_TIME;
@@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
/* Lists of aarp entries */
/**
* struct aarp_entry - AARP entry
+ * @refcnt: Reference count
* @last_sent: Last time we xmitted the aarp request
* @packet_queue: Queue of frames wait for resolution
* @status: Used for proxy AARP
@@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
* @next: Next entry in chain
*/
struct aarp_entry {
+ refcount_t refcnt;
/* These first two are only used for unresolved entries */
unsigned long last_sent;
struct sk_buff_head packet_queue;
@@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock);
/* Used to walk the list and purge/kick entries. */
static struct timer_list aarp_timer;
+static inline void aarp_entry_get(struct aarp_entry *a)
+{
+ refcount_inc(&a->refcnt);
+}
+
+static inline void aarp_entry_put(struct aarp_entry *a)
+{
+ if (refcount_dec_and_test(&a->refcnt))
+ kfree(a);
+}
+
/*
* Delete an aarp queue
*
@@ -87,7 +101,7 @@ static struct timer_list aarp_timer;
static void __aarp_expire(struct aarp_entry *a)
{
skb_queue_purge(&a->packet_queue);
- kfree(a);
+ aarp_entry_put(a);
}
/*
@@ -380,9 +394,11 @@ static void aarp_purge(void)
static struct aarp_entry *aarp_alloc(void)
{
struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC);
+ if (!a)
+ return NULL;
- if (a)
- skb_queue_head_init(&a->packet_queue);
+ refcount_set(&a->refcnt, 1);
+ skb_queue_head_init(&a->packet_queue);
return a;
}
@@ -477,6 +493,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
entry->dev = atif->dev;
write_lock_bh(&aarp_lock);
+ aarp_entry_get(entry);
hash = sa->s_node % (AARP_HASH_SIZE - 1);
entry->next = proxies[hash];
@@ -502,6 +519,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
retval = 1;
}
+ aarp_entry_put(entry);
write_unlock_bh(&aarp_lock);
out:
return retval;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 73ea7e67f05a..30242fe10341 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint)
/* Fill in the routing entry */
rt->target = ta->sat_addr;
+ dev_put(rt->dev); /* Release old device */
dev_hold(devhint);
rt->dev = devhint;
rt->flags = r->rt_flags;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 61b5b700817d..f7a5565e794e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -45,7 +45,8 @@
#include <net/atmclip.h>
static struct net_device *clip_devs;
-static struct atm_vcc *atmarpd;
+static struct atm_vcc __rcu *atmarpd;
+static DEFINE_MUTEX(atmarpd_lock);
static struct timer_list idle_timer;
static const struct neigh_ops clip_neigh_ops;
@@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
{
struct sock *sk;
struct atmarp_ctrl *ctrl;
+ struct atm_vcc *vcc;
struct sk_buff *skb;
+ int err = 0;
pr_debug("(%d)\n", type);
- if (!atmarpd)
- return -EUNATCH;
+
+ rcu_read_lock();
+ vcc = rcu_dereference(atmarpd);
+ if (!vcc) {
+ err = -EUNATCH;
+ goto unlock;
+ }
skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ err = -ENOMEM;
+ goto unlock;
+ }
ctrl = skb_put(skb, sizeof(struct atmarp_ctrl));
ctrl->type = type;
ctrl->itf_num = itf;
ctrl->ip = ip;
- atm_force_charge(atmarpd, skb->truesize);
+ atm_force_charge(vcc, skb->truesize);
- sk = sk_atm(atmarpd);
+ sk = sk_atm(vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
- return 0;
+unlock:
+ rcu_read_unlock();
+ return err;
}
static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry)
@@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("\n");
- if (!clip_devs) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- return;
- }
-
if (!skb) {
pr_debug("removing VCC %p\n", clip_vcc);
if (clip_vcc->entry)
@@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
atm_return(vcc, skb->truesize);
+ if (!clip_devs) {
+ kfree_skb(skb);
+ return;
+ }
+
skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs;
/* clip_vcc->entry == NULL if we don't have an IP address yet */
if (!skb->dev) {
@@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
if (!vcc->push)
return -EBADFD;
+ if (vcc->user_back)
+ return -EINVAL;
clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL);
if (!clip_vcc)
return -ENOMEM;
@@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc)
{
pr_debug("\n");
- rtnl_lock();
- atmarpd = NULL;
+ mutex_lock(&atmarpd_lock);
+ RCU_INIT_POINTER(atmarpd, NULL);
+ mutex_unlock(&atmarpd_lock);
+
+ synchronize_rcu();
skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
- rtnl_unlock();
pr_debug("(done)\n");
module_put(THIS_MODULE);
}
+static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ atm_return_tx(vcc, skb);
+ dev_kfree_skb_any(skb);
+ return 0;
+}
+
static const struct atmdev_ops atmarpd_dev_ops = {
- .close = atmarpd_close
+ .close = atmarpd_close,
+ .send = atmarpd_send
};
@@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = {
static int atm_init_atmarp(struct atm_vcc *vcc)
{
- rtnl_lock();
+ if (vcc->push == clip_push)
+ return -EINVAL;
+
+ mutex_lock(&atmarpd_lock);
if (atmarpd) {
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return -EADDRINUSE;
}
mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
- atmarpd = vcc;
+ rcu_assign_pointer(atmarpd, vcc);
set_bit(ATM_VF_META, &vcc->flags);
set_bit(ATM_VF_READY, &vcc->flags);
/* allow replies and avoid getting closed if signaling dies */
@@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc)
vcc->push = NULL;
vcc->pop = NULL; /* crash */
vcc->push_oam = NULL; /* crash */
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return 0;
}
static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct atm_vcc *vcc = ATM_SD(sock);
+ struct sock *sk = sock->sk;
int err = 0;
switch (cmd) {
@@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = clip_create(arg);
break;
case ATMARPD_CTRL:
+ lock_sock(sk);
err = atm_init_atmarp(vcc);
if (!err) {
sock->state = SS_CONNECTED;
__module_get(THIS_MODULE);
}
+ release_sock(sk);
break;
case ATMARP_MKIP:
+ lock_sock(sk);
err = clip_mkip(vcc, arg);
+ release_sock(sk);
break;
case ATMARP_SETENTRY:
err = clip_setentry(vcc, (__force __be32)arg);
diff --git a/net/atm/common.c b/net/atm/common.c
index 9b75699992ff..d7f7976ea13a 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
skb->dev = NULL; /* for paths shared with net_device interfaces */
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
+ atm_return_tx(vcc, skb);
kfree_skb(skb);
error = -EFAULT;
goto out;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index acef984f3367..afb8d3eb2185 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
/* Device structures */
static struct net_device *dev_lec[MAX_LEC_ITF];
+static DEFINE_MUTEX(lec_mutex);
#if IS_ENABLED(CONFIG_BRIDGE)
static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
@@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
int bytes_left;
struct atmlec_ioc ioc_data;
+ lockdep_assert_held(&lec_mutex);
/* Lecd must be up in this case */
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
if (bytes_left != 0)
@@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
+ lockdep_assert_held(&lec_mutex);
if (arg < 0 || arg >= MAX_LEC_ITF)
return -EINVAL;
arg = array_index_nospec(arg, MAX_LEC_ITF);
@@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
int i;
struct lec_priv *priv;
+ lockdep_assert_held(&lec_mutex);
if (arg < 0)
arg = 0;
if (arg >= MAX_LEC_ITF)
@@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
if (register_netdev(dev_lec[i])) {
free_netdev(dev_lec[i]);
+ dev_lec[i] = NULL;
return -EINVAL;
}
@@ -904,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l)
v = (dev && netdev_priv(dev)) ?
lec_priv_walk(state, l, netdev_priv(dev)) : NULL;
if (!v && dev) {
- dev_put(dev);
/* Partial state reset for the next time we get called */
dev = NULL;
}
@@ -928,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
{
struct lec_state *state = seq->private;
+ mutex_lock(&lec_mutex);
state->itf = 0;
state->dev = NULL;
state->locked = NULL;
@@ -945,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v)
if (state->dev) {
spin_unlock_irqrestore(&state->locked->lec_arp_lock,
state->flags);
- dev_put(state->dev);
+ state->dev = NULL;
}
+ mutex_unlock(&lec_mutex);
}
static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -1003,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
+ mutex_lock(&lec_mutex);
switch (cmd) {
case ATMLEC_CTRL:
err = lecd_attach(vcc, (int)arg);
@@ -1017,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
}
+ mutex_unlock(&lec_mutex);
return err;
}
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2b5f78a7ec3e..1e6511ec842c 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("(%d) %d -= %d\n",
vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize);
- WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc));
+ atm_return_tx(vcc, skb);
dev_kfree_skb_any(skb);
sk->sk_write_space(sk);
}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 995d29e7fb13..b19d851e1f44 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev)
*/
mutex_lock(&atm_dev_mutex);
list_del(&dev->dev_list);
- mutex_unlock(&atm_dev_mutex);
-
atm_dev_release_vccs(dev);
atm_unregister_sysfs(dev);
atm_proc_dev_deregister(dev);
+ mutex_unlock(&atm_dev_mutex);
atm_dev_put(dev);
}
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 1bc51e2b05a3..3f72111ba651 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
return ad_len;
}
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size)
{
struct adv_info *adv = NULL;
u8 ad_len = 0, flags = 0;
@@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
/* If flags would still be empty, then there is no need to
* include the "Flags" AD field".
*/
- if (flags) {
+ if (flags && (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_FLAGS;
ptr[2] = flags;
@@ -316,7 +316,8 @@ skip_flags:
}
/* Provide Tx Power only if we can provide a valid value for it */
- if (adv_tx_power != HCI_TX_POWER_INVALID) {
+ if (adv_tx_power != HCI_TX_POWER_INVALID &&
+ (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_TX_POWER;
ptr[2] = (u8)adv_tx_power;
@@ -366,17 +367,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
{
- while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ size_t dlen;
+
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) {
u16 value = get_unaligned_le16(eir);
if (uuid == value) {
if (len)
- *len -= 2;
+ *len = dlen - 2;
return &eir[2];
}
- eir += *len;
- eir_len -= *len;
+ eir += dlen;
+ eir_len -= dlen;
}
return NULL;
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 5c89a05e8b29..9372db83f912 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -9,7 +9,7 @@
void eir_create(struct hci_dev *hdev, u8 *data);
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size);
u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 99efeed6a766..4f379184df5b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1501,8 +1501,8 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* This function requires the caller holds hdev->lock */
static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
- struct bt_iso_qos *qos, __u8 base_len,
- __u8 *base)
+ __u8 sid, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
{
struct hci_conn *conn;
int err;
@@ -1543,6 +1543,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
return conn;
conn->state = BT_CONNECT;
+ conn->sid = sid;
hci_conn_hold(conn);
return conn;
@@ -2062,7 +2063,8 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
if (qos->bcast.bis)
sync_interval = interval * 4;
- err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len,
+ err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->sid,
+ conn->le_per_adv_data_len,
conn->le_per_adv_data, flags, interval,
interval, sync_interval);
if (err)
@@ -2134,7 +2136,7 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
}
}
-struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
__u8 base_len, __u8 *base)
{
@@ -2156,7 +2158,7 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
base, base_len);
/* We need hci_conn object using the BDADDR_ANY as dst */
- conn = hci_add_bis(hdev, dst, qos, base_len, eir);
+ conn = hci_add_bis(hdev, dst, sid, qos, base_len, eir);
if (IS_ERR(conn))
return conn;
@@ -2207,20 +2209,35 @@ static void bis_mark_per_adv(struct hci_conn *conn, void *data)
}
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos,
__u8 base_len, __u8 *base)
{
struct hci_conn *conn;
int err;
struct iso_list_data data;
- conn = hci_bind_bis(hdev, dst, qos, base_len, base);
+ conn = hci_bind_bis(hdev, dst, sid, qos, base_len, base);
if (IS_ERR(conn))
return conn;
if (conn->state == BT_CONNECTED)
return conn;
+ /* Check if SID needs to be allocated then search for the first
+ * available.
+ */
+ if (conn->sid == HCI_SID_INVALID) {
+ u8 sid;
+
+ for (sid = 0; sid <= 0x0f; sid++) {
+ if (!hci_find_adv_sid(hdev, sid)) {
+ conn->sid = sid;
+ break;
+ }
+ }
+ }
+
data.big = qos->bcast.big;
data.bis = qos->bcast.bis;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3b49828160b7..441cb1700f99 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida);
/* Get HCI device by index.
* Device is held on return. */
-struct hci_dev *hci_dev_get(int index)
+static struct hci_dev *__hci_dev_get(int index, int *srcu_index)
{
struct hci_dev *hdev = NULL, *d;
@@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index)
list_for_each_entry(d, &hci_dev_list, list) {
if (d->id == index) {
hdev = hci_dev_hold(d);
+ if (srcu_index)
+ *srcu_index = srcu_read_lock(&d->srcu);
break;
}
}
@@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index)
return hdev;
}
+struct hci_dev *hci_dev_get(int index)
+{
+ return __hci_dev_get(index, NULL);
+}
+
+static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index)
+{
+ return __hci_dev_get(index, srcu_index);
+}
+
+static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index)
+{
+ srcu_read_unlock(&hdev->srcu, srcu_index);
+ hci_dev_put(hdev);
+}
+
/* ---- Inquiry support ---- */
bool hci_discovery_active(struct hci_dev *hdev)
@@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
int hci_dev_reset(__u16 dev)
{
struct hci_dev *hdev;
- int err;
+ int err, srcu_index;
- hdev = hci_dev_get(dev);
+ hdev = hci_dev_get_srcu(dev, &srcu_index);
if (!hdev)
return -ENODEV;
@@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev)
err = hci_dev_do_reset(hdev);
done:
- hci_dev_put(hdev);
+ hci_dev_put_srcu(hdev, srcu_index);
return err;
}
@@ -1585,6 +1603,19 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
}
/* This function requires the caller holds hdev->lock */
+struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid)
+{
+ struct adv_info *adv;
+
+ list_for_each_entry(adv, &hdev->adv_instances, list) {
+ if (adv->sid == sid)
+ return adv;
+ }
+
+ return NULL;
+}
+
+/* This function requires the caller holds hdev->lock */
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance)
{
struct adv_info *cur_instance;
@@ -1736,7 +1767,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
}
/* This function requires the caller holds hdev->lock */
-struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid,
u32 flags, u8 data_len, u8 *data,
u32 min_interval, u32 max_interval)
{
@@ -1748,6 +1779,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
if (IS_ERR(adv))
return adv;
+ adv->sid = sid;
adv->periodic = true;
adv->per_adv_data_len = data_len;
@@ -1877,10 +1909,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
if (monitor->handle)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
+ if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED)
hdev->adv_monitors_cnt--;
- mgmt_adv_monitor_removed(hdev, monitor->handle);
- }
kfree(monitor);
}
@@ -2421,6 +2451,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
if (!hdev)
return NULL;
+ if (init_srcu_struct(&hdev->srcu)) {
+ kfree(hdev);
+ return NULL;
+ }
+
hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
hdev->esco_type = (ESCO_HV1);
hdev->link_mode = (HCI_LM_ACCEPT);
@@ -2487,6 +2522,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ mutex_init(&hdev->mgmt_pending_lock);
ida_init(&hdev->unset_handle_ida);
@@ -2618,7 +2654,7 @@ int hci_register_dev(struct hci_dev *hdev)
/* Devices that are marked for raw-only usage are unconfigured
* and should not be included in normal operation.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
/* Mark Remote Wakeup connection flag as supported if driver has wakeup
@@ -2665,6 +2701,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ synchronize_srcu(&hdev->srcu);
+ cleanup_srcu_struct(&hdev->srcu);
+
disable_work_sync(&hdev->rx_work);
disable_work_sync(&hdev->cmd_work);
disable_work_sync(&hdev->tx_work);
@@ -2745,7 +2784,7 @@ int hci_register_suspend_notifier(struct hci_dev *hdev)
int ret = 0;
if (!hdev->suspend_notifier.notifier_call &&
- !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ !hci_test_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER)) {
hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
ret = register_pm_notifier(&hdev->suspend_notifier);
}
@@ -3417,23 +3456,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
bt_dev_err(hdev, "link tx timeout");
- rcu_read_lock();
+ hci_dev_lock(hdev);
/* Kill stalled connections */
- list_for_each_entry_rcu(c, &h->list, list) {
+ list_for_each_entry(c, &h->list, list) {
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
- /* hci_disconnect might sleep, so, we have to release
- * the RCU read lock before calling it.
- */
- rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
- rcu_read_lock();
}
}
- rcu_read_unlock();
+ hci_dev_unlock(hdev);
}
static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index f625074d1f00..99e2e9fc70e8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -38,7 +38,7 @@ static ssize_t __name ## _read(struct file *file, \
struct hci_dev *hdev = file->private_data; \
char buf[3]; \
\
- buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \
+ buf[0] = test_bit(__quirk, hdev->quirk_flags) ? 'Y' : 'N'; \
buf[1] = '\n'; \
buf[2] = '\0'; \
return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \
@@ -59,10 +59,10 @@ static ssize_t __name ## _write(struct file *file, \
if (err) \
return err; \
\
- if (enable == test_bit(__quirk, &hdev->quirks)) \
+ if (enable == test_bit(__quirk, hdev->quirk_flags)) \
return -EALREADY; \
\
- change_bit(__quirk, &hdev->quirks); \
+ change_bit(__quirk, hdev->quirk_flags); \
\
return count; \
} \
@@ -1356,7 +1356,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
* for the vendor callback. Instead just store the desired value and
* the setting will be programmed when the controller gets powered on.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) &&
(!test_bit(HCI_RUNNING, &hdev->flags) ||
hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
goto done;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 66052d6aaa1d..cf4b30ac9e0e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -908,8 +908,8 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data,
return rp->status;
if (hdev->max_page < rp->max_page) {
- if (test_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2,
- &hdev->quirks))
+ if (hci_test_quirk(hdev,
+ HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2))
bt_dev_warn(hdev, "broken local ext features page 2");
else
hdev->max_page = rp->max_page;
@@ -936,7 +936,7 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt);
hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt);
- if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE)) {
hdev->sco_mtu = 64;
hdev->sco_pkts = 8;
}
@@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_le_set_ext_adv_params *rp = data;
- struct hci_cp_le_set_ext_adv_params *cp;
- struct adv_info *adv_instance;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS);
- if (!cp)
- return rp->status;
-
- hci_dev_lock(hdev);
- hdev->adv_addr_type = cp->own_addr_type;
- if (!cp->handle) {
- /* Store in hdev for instance 0 */
- hdev->adv_tx_power = rp->tx_power;
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- adv_instance->tx_power = rp->tx_power;
- }
- /* Update adv data as tx power is known now */
- hci_update_adv_data(hdev, cp->handle);
-
- hci_dev_unlock(hdev);
-
- return rp->status;
-}
-
static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3005,7 +2971,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data,
* state to indicate completion.
*/
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
- !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
goto unlock;
}
@@ -3024,7 +2990,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data,
* state to indicate completion.
*/
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
- !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
}
@@ -3648,8 +3614,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
/* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers
* to avoid unexpected SMP command errors when pairing.
*/
- if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT,
- &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT))
goto notify;
/* Set the default Authenticated Payload Timeout after
@@ -4164,8 +4129,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
hci_cc_le_read_num_adv_sets,
sizeof(struct hci_rp_le_read_num_supported_adv_sets)),
- HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param,
- sizeof(struct hci_rp_le_set_ext_adv_params)),
HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE,
hci_cc_le_set_ext_adv_enable),
HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
@@ -5950,7 +5913,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
* while we have an existing one in peripheral role.
*/
if (hdev->conn_hash.le_num_peripheral > 0 &&
- (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) ||
+ (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES) ||
!(hdev->le_states[3] & 0x10)))
return NULL;
@@ -6346,8 +6309,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
- if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
- &hdev->quirks)) {
+ if (hci_test_quirk(hdev,
+ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY)) {
info->primary_phy &= 0x1f;
info->secondary_phy &= 0x1f;
}
@@ -7002,7 +6965,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
if (!ev->status) {
+ bis->state = BT_CONNECTED;
set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_debugfs_create_conn(bis);
+ hci_conn_add_sysfs(bis);
hci_iso_setup_path(bis);
}
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 62d1ff951ebe..7938c004071c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -393,7 +393,7 @@ static void le_scan_disable(struct work_struct *work)
if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
goto _return;
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) {
if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
hdev->discovery.state != DISCOVERY_RESOLVING)
goto discov_stopped;
@@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int
+hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv,
+ const struct hci_cp_le_set_ext_adv_params *cp,
+ struct hci_rp_le_set_ext_adv_params *rp)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp),
+ cp, HCI_CMD_TIMEOUT);
+
+ /* If command return a status event, skb will be set to -ENODATA */
+ if (skb == ERR_PTR(-ENODATA))
+ return 0;
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*rp)) {
+ bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len);
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ memcpy(rp, skb->data, sizeof(*rp));
+ kfree_skb(skb);
+
+ if (!rp->status) {
+ hdev->adv_addr_type = cp->own_addr_type;
+ if (!cp->handle) {
+ /* Store in hdev for instance 0 */
+ hdev->adv_tx_power = rp->tx_power;
+ } else if (adv) {
+ adv->tx_power = rp->tx_power;
+ }
+ }
+
+ return rp->status;
+}
+
+static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
+ u8 len;
+ struct adv_info *adv = NULL;
+ int err;
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->adv_data_changed)
+ return 0;
+ }
+
+ len = eir_create_adv_data(hdev, instance, pdu->data,
+ HCI_MAX_EXT_AD_LENGTH);
+
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
+ struct_size(pdu, data, len), pdu,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ /* Update data if the command succeed */
+ if (adv) {
+ adv->adv_data_changed = false;
+ } else {
+ memcpy(hdev->adv_data, pdu->data, len);
+ hdev->adv_data_len = len;
+ }
+
+ return 0;
+}
+
+static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_adv_data cp;
+ u8 len;
+
+ memset(&cp, 0, sizeof(cp));
+
+ len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data));
+
+ /* There's nothing to do if the data hasn't changed */
+ if (hdev->adv_data_len == len &&
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return 0;
+
+ memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ hdev->adv_data_len = len;
+
+ cp.length = len;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return 0;
+
+ if (ext_adv_capable(hdev))
+ return hci_set_ext_adv_data_sync(hdev, instance);
+
+ return hci_set_adv_data_sync(hdev, instance);
+}
+
int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
bool connectable;
u32 flags;
bdaddr_t random_addr;
@@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
* Command Disallowed error, so we must first disable the
* instance if it is active.
*/
- if (adv && !adv->pending) {
+ if (adv) {
err = hci_disable_ext_adv_instance_sync(hdev, instance);
if (err)
return err;
@@ -1261,10 +1378,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
hci_cpu_to_le24(adv->min_interval, cp.min_interval);
hci_cpu_to_le24(adv->max_interval, cp.max_interval);
cp.tx_power = adv->tx_power;
+ cp.sid = adv->sid;
} else {
hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval);
hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval);
cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE;
+ cp.sid = 0x00;
}
secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);
@@ -1314,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.secondary_phy = HCI_ADV_PHY_1M;
}
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -1559,7 +1682,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
{
u8 bid[3];
- u8 ad[4 + 3];
+ u8 ad[HCI_MAX_EXT_AD_LENGTH];
+ u8 len;
/* Skip if NULL adv as instance 0x00 is used for general purpose
* advertising so it cannot used for the likes of Broadcast Announcement
@@ -1585,14 +1709,16 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
- eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
- hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);
+ len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ memcpy(ad + len, adv->adv_data, adv->adv_data_len);
+ hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
+ ad, 0, NULL);
return hci_update_adv_data_sync(hdev, adv->instance);
}
-int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
- u8 *data, u32 flags, u16 min_interval,
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid,
+ u8 data_len, u8 *data, u32 flags, u16 min_interval,
u16 max_interval, u16 sync_interval)
{
struct adv_info *adv = NULL;
@@ -1603,9 +1729,28 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
- /* Create an instance if that could not be found */
- if (!adv) {
- adv = hci_add_per_instance(hdev, instance, flags,
+ if (adv) {
+ if (sid != HCI_SID_INVALID && adv->sid != sid) {
+ /* If the SID don't match attempt to find by
+ * SID.
+ */
+ adv = hci_find_adv_sid(hdev, sid);
+ if (!adv) {
+ bt_dev_err(hdev,
+ "Unable to find adv_info");
+ return -EINVAL;
+ }
+ }
+
+ /* Turn it into periodic advertising */
+ adv->periodic = true;
+ adv->per_adv_data_len = data_len;
+ if (data)
+ memcpy(adv->per_adv_data, data, data_len);
+ adv->flags = flags;
+ } else if (!adv) {
+ /* Create an instance if that could not be found */
+ adv = hci_add_per_instance(hdev, instance, sid, flags,
data_len, data,
sync_interval,
sync_interval);
@@ -1798,78 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
-static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
- HCI_MAX_EXT_AD_LENGTH);
- u8 len;
- struct adv_info *adv = NULL;
- int err;
-
- if (instance) {
- adv = hci_find_adv_instance(hdev, instance);
- if (!adv || !adv->adv_data_changed)
- return 0;
- }
-
- len = eir_create_adv_data(hdev, instance, pdu->data);
-
- pdu->length = len;
- pdu->handle = adv ? adv->handle : instance;
- pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- struct_size(pdu, data, len), pdu,
- HCI_CMD_TIMEOUT);
- if (err)
- return err;
-
- /* Update data if the command succeed */
- if (adv) {
- adv->adv_data_changed = false;
- } else {
- memcpy(hdev->adv_data, pdu->data, len);
- hdev->adv_data_len = len;
- }
-
- return 0;
-}
-
-static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- struct hci_cp_le_set_adv_data cp;
- u8 len;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_adv_data(hdev, instance, cp.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return 0;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
-int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return 0;
-
- if (ext_adv_capable(hdev))
- return hci_set_ext_adv_data_sync(hdev, instance);
-
- return hci_set_adv_data_sync(hdev, instance);
-}
-
int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
bool force)
{
@@ -1945,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
- int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- err = hci_clear_adv_sets_sync(hdev, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_clear_adv_sets_sync(hdev, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -1979,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err = 0;
+ int err;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2084,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
- int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_disable_ext_adv_instance_sync(hdev, 0x00);
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
@@ -2456,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev)
int err;
int old_state;
+ /* If controller is not advertising we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
/* If already been paused there is nothing to do. */
if (hdev->advertising_paused)
return 0;
@@ -3518,7 +3587,7 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;
- if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN))
baswap(&hdev->public_addr, &ba);
else
bacpy(&hdev->public_addr, &ba);
@@ -3593,7 +3662,7 @@ static int hci_init0_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
/* Reset */
- if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) {
err = hci_reset_sync(hdev);
if (err)
return err;
@@ -3606,7 +3675,7 @@ static int hci_unconf_init_sync(struct hci_dev *hdev)
{
int err;
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return 0;
err = hci_init0_sync(hdev);
@@ -3649,7 +3718,7 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev)
* supported commands.
*/
if (hdev->hci_ver > BLUETOOTH_VER_1_1 &&
- !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS))
return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS,
0, NULL, HCI_CMD_TIMEOUT);
@@ -3663,7 +3732,7 @@ static int hci_init1_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
/* Reset */
- if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) {
err = hci_reset_sync(hdev);
if (err)
return err;
@@ -3726,7 +3795,7 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type,
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -3753,7 +3822,7 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev)
* a hci_set_event_filter_sync() call succeeds, but we do
* the check both for parity and as a future reminder.
*/
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00,
@@ -3777,7 +3846,7 @@ static int hci_write_sync_flowctl_sync(struct hci_dev *hdev)
/* Check if the controller supports SCO and HCI_OP_WRITE_SYNC_FLOWCTL */
if (!lmp_sco_capable(hdev) || !(hdev->commands[10] & BIT(4)) ||
- !test_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -3852,7 +3921,7 @@ static int hci_write_inquiry_mode_sync(struct hci_dev *hdev)
u8 mode;
if (!lmp_inq_rssi_capable(hdev) &&
- !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE))
return 0;
/* If Extended Inquiry Result events are supported, then
@@ -4042,7 +4111,7 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev)
}
if (lmp_inq_rssi_capable(hdev) ||
- test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE))
events[4] |= 0x02; /* Inquiry Result with RSSI */
if (lmp_ext_feat_capable(hdev))
@@ -4094,7 +4163,7 @@ static int hci_read_stored_link_key_sync(struct hci_dev *hdev)
struct hci_cp_read_stored_link_key cp;
if (!(hdev->commands[6] & 0x20) ||
- test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -4143,7 +4212,7 @@ static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
!(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -4157,7 +4226,7 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev)
* this command in the bit mask of supported commands.
*/
if (!(hdev->commands[13] & 0x01) ||
- test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE,
@@ -4352,7 +4421,7 @@ static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev)
static int hci_le_read_tx_power_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[38] & 0x80) ||
- test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER,
@@ -4395,7 +4464,7 @@ static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev)
__le16 timeout = cpu_to_le16(hdev->rpa_timeout);
if (!(hdev->commands[35] & 0x04) ||
- test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT,
@@ -4540,7 +4609,7 @@ static int hci_delete_stored_link_key_sync(struct hci_dev *hdev)
* just disable this command.
*/
if (!(hdev->commands[6] & 0x80) ||
- test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -4666,7 +4735,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
if (!(hdev->commands[18] & 0x08) ||
!(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -4879,7 +4948,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
size_t i;
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
- !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP))
return 0;
bt_dev_dbg(hdev, "");
@@ -4890,7 +4959,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
ret = hdev->setup(hdev);
for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
- if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
+ if (hci_test_quirk(hdev, hci_broken_table[i].quirk))
bt_dev_warn(hdev, "%s", hci_broken_table[i].desc);
}
@@ -4898,10 +4967,10 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
* BD_ADDR invalid before creating the HCI device or in
* its setup callback.
*/
- invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ invalid_bdaddr = hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
if (!ret) {
- if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
hci_dev_get_bd_addr_from_property(hdev);
@@ -4923,7 +4992,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
* In case any of them is set, the controller has to
* start up as unconfigured.
*/
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) ||
invalid_bdaddr)
hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
@@ -4983,7 +5052,7 @@ static int hci_dev_init_sync(struct hci_dev *hdev)
* then they need to be reprogrammed after the init procedure
* completed.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
ret = hdev->set_diag(hdev, true);
@@ -5240,7 +5309,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
/* Reset device */
skb_queue_purge(&hdev->cmd_q);
atomic_set(&hdev->cmd_cnt, 1);
- if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) &&
!auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
set_bit(HCI_INIT, &hdev->flags);
hci_reset_sync(hdev);
@@ -5424,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
{
struct hci_cp_disconnect cp;
- if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
+ if (conn->type == BIS_LINK) {
/* This is a BIS connection, hci_conn_del will
* do the necessary cleanup.
*/
@@ -5890,7 +5959,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
own_addr_type = ADDR_LE_DEV_PUBLIC;
if (hci_is_adv_monitoring(hdev) ||
- (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
+ (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER) &&
hdev->discovery.result_filtering)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
@@ -5953,8 +6022,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev)
* and LE scanning are done sequentially with separate
* timeouts.
*/
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
- &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) {
timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
/* During simultaneous discovery, we double LE scan
* interval. We must leave some time for the controller
@@ -6031,7 +6099,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
/* Some fake CSR controllers lock up after setting this type of
* filter, so avoid sending the request altogether.
*/
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
/* Always clear event filter when starting */
@@ -6252,6 +6320,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
struct hci_conn *conn)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
int err;
bdaddr_t random_addr;
u8 own_addr_type;
@@ -6293,8 +6362,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
if (err)
return err;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -6741,8 +6814,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
return 0;
}
- /* No privacy so use a public address. */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
+ /* No privacy, use the current address */
+ hci_copy_identity_address(hdev, rand_addr, own_addr_type);
return 0;
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 6e2c752aaa8f..3c2c98eecc62 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -336,7 +336,7 @@ static int iso_connect_bis(struct sock *sk)
struct hci_dev *hdev;
int err;
- BT_DBG("%pMR", &iso_pi(sk)->src);
+ BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid);
hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
iso_pi(sk)->src_type);
@@ -365,7 +365,7 @@ static int iso_connect_bis(struct sock *sk)
/* Just bind if DEFER_SETUP has been set */
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
- hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst,
+ hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid,
&iso_pi(sk)->qos, iso_pi(sk)->base_len,
iso_pi(sk)->base);
if (IS_ERR(hcon)) {
@@ -375,12 +375,16 @@ static int iso_connect_bis(struct sock *sk)
} else {
hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
le_addr_type(iso_pi(sk)->dst_type),
- &iso_pi(sk)->qos, iso_pi(sk)->base_len,
- iso_pi(sk)->base);
+ iso_pi(sk)->bc_sid, &iso_pi(sk)->qos,
+ iso_pi(sk)->base_len, iso_pi(sk)->base);
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto unlock;
}
+
+ /* Update SID if it was not set */
+ if (iso_pi(sk)->bc_sid == HCI_SID_INVALID)
+ iso_pi(sk)->bc_sid = hcon->sid;
}
conn = iso_conn_add(hcon);
@@ -1337,10 +1341,13 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr,
addr->sa_family = AF_BLUETOOTH;
if (peer) {
+ struct hci_conn *hcon = iso_pi(sk)->conn ?
+ iso_pi(sk)->conn->hcon : NULL;
+
bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst);
sa->iso_bdaddr_type = iso_pi(sk)->dst_type;
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ if (hcon && hcon->type == BIS_LINK) {
sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid;
sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis;
memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a5bde5db58ef..805c752ac0a9 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
struct l2cap_conf_efs efs;
u8 remote_efs = 0;
- u16 mtu = L2CAP_DEFAULT_MTU;
+ u16 mtu = 0;
u16 result = L2CAP_CONF_SUCCESS;
u16 size;
@@ -3520,6 +3520,29 @@ done:
/* Configure output options and let the other side know
* which ones we don't like. */
+ /* If MTU is not provided in configure request, try adjusting it
+ * to the current output MTU if it has been set
+ *
+ * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5
+ *
+ * Each configuration parameter value (if any is present) in an
+ * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a
+ * configuration parameter value that has been sent (or, in case
+ * of default values, implied) in the corresponding
+ * L2CAP_CONFIGURATION_REQ packet.
+ */
+ if (!mtu) {
+ /* Only adjust for ERTM channels as for older modes the
+ * remote stack may not be able to detect that the
+ * adjustment causing it to silently drop packets.
+ */
+ if (chan->mode == L2CAP_MODE_ERTM &&
+ chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU)
+ mtu = chan->omtu;
+ else
+ mtu = L2CAP_DEFAULT_MTU;
+ }
+
if (mtu < L2CAP_DEFAULT_MIN_MTU)
result = L2CAP_CONF_UNACCEPT;
else {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5aa55fa69594..82d943c4cb50 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1703,6 +1703,9 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) {
sk->sk_state = BT_CONNECTED;
chan->state = BT_CONNECTED;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 14a9462fced5..63dba0503653 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -464,7 +464,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
@@ -522,7 +522,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
@@ -576,7 +576,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
@@ -612,12 +612,12 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
static bool is_configured(struct hci_dev *hdev)
{
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
return false;
- if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
+ if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
return false;
@@ -628,12 +628,12 @@ static __le32 get_missing_options(struct hci_dev *hdev)
{
u32 options = 0;
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
options |= MGMT_OPTION_EXTERNAL_CONFIG;
- if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
+ if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
options |= MGMT_OPTION_PUBLIC_ADDRESS;
@@ -669,7 +669,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev,
memset(&rp, 0, sizeof(rp));
rp.manufacturer = cpu_to_le16(hdev->manufacturer);
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG))
options |= MGMT_OPTION_EXTERNAL_CONFIG;
if (hdev->set_bdaddr)
@@ -828,8 +828,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_sc_capable(hdev))
settings |= MGMT_SETTING_SECURE_CONN;
- if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED))
settings |= MGMT_SETTING_WIDEBAND_SPEECH;
}
@@ -841,8 +840,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_ADVERTISING;
}
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
- hdev->set_bdaddr)
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || hdev->set_bdaddr)
settings |= MGMT_SETTING_CONFIGURATION;
if (cis_central_capable(hdev))
@@ -1080,7 +1078,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data)
struct mgmt_mesh_tx *mesh_tx;
hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
- hci_disable_advertising_sync(hdev);
+ if (list_empty(&hdev->adv_instances))
+ hci_disable_advertising_sync(hdev);
mesh_tx = mgmt_mesh_next(hdev, NULL);
if (mesh_tx)
@@ -1447,22 +1446,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data)
send_settings_rsp(cmd->sk, cmd->opcode, match->hdev);
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
u8 *status = data;
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status);
- mgmt_pending_remove(cmd);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status);
}
static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
@@ -1476,8 +1470,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
if (cmd->cmd_complete) {
cmd->cmd_complete(cmd, match->mgmt_status);
- mgmt_pending_remove(cmd);
-
return;
}
@@ -1486,13 +1478,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, cmd->param_len);
}
static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, sizeof(struct mgmt_addr_info));
}
@@ -1532,7 +1524,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
goto done;
}
@@ -1707,7 +1699,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -1943,8 +1935,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
new_settings(hdev, NULL);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp,
- &mgmt_err);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true,
+ cmd_status_rsp, &mgmt_err);
return;
}
@@ -1954,7 +1946,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -2074,12 +2066,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
- &status);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp,
+ &status);
return;
}
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match);
new_settings(hdev, match.sk);
@@ -2138,7 +2130,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err)
struct sock *sk = cmd->sk;
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -2160,6 +2152,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data)
else
hci_dev_clear_flag(hdev, HCI_MESH);
+ hdev->le_scan_interval = __le16_to_cpu(cp->period);
+ hdev->le_scan_window = __le16_to_cpu(cp->window);
+
len -= sizeof(*cp);
/* If filters don't fit, forward all adv pkts */
@@ -2174,6 +2169,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_set_mesh *cp = data;
struct mgmt_pending_cmd *cmd;
+ __u16 period, window;
int err = 0;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -2187,6 +2183,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
MGMT_STATUS_INVALID_PARAMS);
+ /* Keep allowed ranges in sync with set_scan_params() */
+ period = __le16_to_cpu(cp->period);
+
+ if (period < 0x0004 || period > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ window = __le16_to_cpu(cp->window);
+
+ if (window < 0x0004 || window > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (window > period)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len);
@@ -2638,7 +2651,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), hdev->dev_class, 3);
mgmt_pending_free(cmd);
@@ -3427,7 +3440,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status)
bacpy(&rp.addr.bdaddr, &conn->dst);
rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
- err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE,
+ err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE,
status, &rp, sizeof(rp));
/* So we don't get further callbacks for this connection */
@@ -4292,7 +4305,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
bt_dev_dbg(hdev, "sock %p", sk);
- if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks))
+ if (!hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED))
return mgmt_cmd_status(sk, hdev->id,
MGMT_OP_SET_WIDEBAND_SPEECH,
MGMT_STATUS_NOT_SUPPORTED);
@@ -5108,24 +5121,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
}
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle)
+static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
+ __le16 handle)
{
struct mgmt_ev_adv_monitor_removed ev;
- struct mgmt_pending_cmd *cmd;
- struct sock *sk_skip = NULL;
- struct mgmt_cp_remove_adv_monitor *cp;
-
- cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
- if (cmd) {
- cp = cmd->param;
- if (cp->monitor_handle)
- sk_skip = cmd->sk;
- }
-
- ev.monitor_handle = cpu_to_le16(handle);
+ ev.monitor_handle = handle;
- mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip);
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
}
static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
@@ -5196,7 +5199,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
hci_update_passive_scan(hdev);
}
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
@@ -5227,8 +5230,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
if (pending_find(MGMT_OP_SET_LE, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
- pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) {
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
@@ -5398,8 +5400,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
struct mgmt_pending_cmd *cmd = data;
struct mgmt_cp_remove_adv_monitor *cp;
- if (status == -ECANCELED ||
- cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
+ if (status == -ECANCELED)
return;
hci_dev_lock(hdev);
@@ -5408,12 +5409,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
rp.monitor_handle = cp->monitor_handle;
- if (!status)
+ if (!status) {
+ mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle);
hci_update_passive_scan(hdev);
+ }
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
@@ -5423,10 +5426,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data)
{
struct mgmt_pending_cmd *cmd = data;
-
- if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
- return -ECANCELED;
-
struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
u16 handle = __le16_to_cpu(cp->monitor_handle);
@@ -5445,14 +5444,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
if (!cmd) {
status = MGMT_STATUS_NO_RESOURCES;
goto unlock;
@@ -5462,7 +5460,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
mgmt_remove_adv_monitor_complete);
if (err) {
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
if (err == -ENOMEM)
status = MGMT_STATUS_NO_RESOURCES;
@@ -5792,7 +5790,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6013,7 +6011,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6238,7 +6236,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
u8 status = mgmt_status(err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -6248,7 +6246,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
else
hci_dev_clear_flag(hdev, HCI_ADVERTISING);
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp,
&match);
new_settings(hdev, match.sk);
@@ -6454,6 +6452,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS,
MGMT_STATUS_NOT_SUPPORTED);
+ /* Keep allowed ranges in sync with set_mesh() */
interval = __le16_to_cpu(cp->interval);
if (interval < 0x0004 || interval > 0x4000)
@@ -6592,7 +6591,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err)
*/
hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
} else {
send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev);
new_settings(hdev, cmd->sk);
@@ -6729,7 +6728,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err)
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -7176,7 +7175,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
rp.max_tx_power = HCI_TX_POWER_INVALID;
}
- mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status,
&rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -7336,7 +7335,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err)
}
complete:
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp,
sizeof(rp));
mgmt_pending_free(cmd);
@@ -7934,7 +7933,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
MGMT_STATUS_INVALID_PARAMS);
- if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ if (!hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
MGMT_STATUS_NOT_SUPPORTED);
@@ -8586,10 +8585,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
add_adv_complete(hdev, cmd->sk, cp->instance, err);
@@ -8777,10 +8776,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
hci_remove_adv_instance(hdev, cp->instance);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
} else {
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
}
@@ -8927,10 +8926,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9089,10 +9088,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data,
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9337,7 +9336,7 @@ void mgmt_index_added(struct hci_dev *hdev)
{
struct mgmt_ev_ext_index ev;
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return;
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
@@ -9361,10 +9360,10 @@ void mgmt_index_removed(struct hci_dev *hdev)
struct mgmt_ev_ext_index ev;
struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX };
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
@@ -9402,7 +9401,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
hci_update_passive_scan(hdev);
}
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
new_settings(hdev, match.sk);
@@ -9417,7 +9417,8 @@ void __mgmt_power_off(struct hci_dev *hdev)
struct cmd_lookup match = { NULL, hdev };
u8 zero_cod[] = { 0, 0, 0 };
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
/* If the power off is because of hdev unregistration let
* use the appropriate INVALID_INDEX status. Otherwise use
@@ -9431,7 +9432,7 @@ void __mgmt_power_off(struct hci_dev *hdev)
else
match.mgmt_status = MGMT_STATUS_NOT_POWERED;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -9672,7 +9673,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk);
cmd->cmd_complete(cmd, 0);
- mgmt_pending_remove(cmd);
}
bool mgmt_powering_down(struct hci_dev *hdev)
@@ -9728,8 +9728,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct mgmt_cp_disconnect *cp;
struct mgmt_pending_cmd *cmd;
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
+ mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true,
+ unpair_device_rsp, hdev);
cmd = pending_find(MGMT_OP_DISCONNECT, hdev);
if (!cmd)
@@ -9922,7 +9922,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
if (status) {
u8 mgmt_err = mgmt_status(status);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
cmd_status_rsp, &mgmt_err);
return;
}
@@ -9932,8 +9932,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
else
changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp,
- &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
+ settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -9957,9 +9957,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
{
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup,
+ &match);
if (!status) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -10084,7 +10087,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
(rssi == HCI_RSSI_INVALID ||
(rssi < hdev->discovery.rssi &&
- !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks))))
+ !hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER))))
return false;
if (hdev->discovery.uuid_count != 0) {
@@ -10102,7 +10105,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
/* If duplicate filtering does not report RSSI changes, then restart
* scanning to ensure updated result with updated RSSI values.
*/
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)) {
/* Validate RSSI value against the RSSI threshold once more. */
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
rssi < hdev->discovery.rssi)
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 3713ff490c65..a88a07da3947 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev)
{
- struct mgmt_pending_cmd *cmd;
+ struct mgmt_pending_cmd *cmd, *tmp;
+
+ mutex_lock(&hdev->mgmt_pending_lock);
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (hci_sock_get_channel(cmd->sk) != channel)
continue;
- if (cmd->opcode == opcode)
+
+ if (cmd->opcode == opcode) {
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
+ }
}
+ mutex_unlock(&hdev->mgmt_pending_lock);
+
return NULL;
}
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data)
{
struct mgmt_pending_cmd *cmd, *tmp;
+ mutex_lock(&hdev->mgmt_pending_lock);
+
list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (opcode > 0 && cmd->opcode != opcode)
continue;
+ if (remove)
+ list_del(&cmd->list);
+
cb(cmd, data);
+
+ if (remove)
+ mgmt_pending_free(cmd);
}
+
+ mutex_unlock(&hdev->mgmt_pending_lock);
}
struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
@@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
return NULL;
cmd->opcode = opcode;
- cmd->index = hdev->id;
+ cmd->hdev = hdev;
cmd->param = kmemdup(data, len, GFP_KERNEL);
if (!cmd->param) {
@@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
+ mutex_lock(&hdev->mgmt_pending_lock);
list_add_tail(&cmd->list, &hdev->mgmt_pending);
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
}
@@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd)
void mgmt_pending_remove(struct mgmt_pending_cmd *cmd)
{
+ mutex_lock(&cmd->hdev->mgmt_pending_lock);
list_del(&cmd->list);
+ mutex_unlock(&cmd->hdev->mgmt_pending_lock);
+
mgmt_pending_free(cmd);
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index f2ba994ab1d8..024e51dd6937 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -33,7 +33,7 @@ struct mgmt_mesh_tx {
struct mgmt_pending_cmd {
struct list_head list;
u16 opcode;
- int index;
+ struct hci_dev *hdev;
void *param;
size_t param_len;
struct sock *sk;
@@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev);
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data);
struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 5a8ccc491b14..c560d8467669 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -989,7 +989,7 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
- if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER)) {
if (!handle_data)
return;
mgmt_handle = handle_data->mgmt_handle;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 47f359f24d1f..8115d42fc15b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1379,7 +1379,7 @@ static void smp_timeout(struct work_struct *work)
bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
- hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM);
+ hci_disconnect(conn->hcon, HCI_ERROR_AUTH_FAILURE);
}
static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -2977,8 +2977,25 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
if (code > SMP_CMD_MAX)
goto drop;
- if (smp && !test_and_clear_bit(code, &smp->allow_cmd))
+ if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) {
+ /* If there is a context and the command is not allowed consider
+ * it a failure so the session is cleanup properly.
+ */
+ switch (code) {
+ case SMP_CMD_IDENT_INFO:
+ case SMP_CMD_IDENT_ADDR_INFO:
+ case SMP_CMD_SIGN_INFO:
+ /* 3.6.1. Key distribution and generation
+ *
+ * A device may reject a distributed key by sending the
+ * Pairing Failed command with the reason set to
+ * "Key Rejected".
+ */
+ smp_failure(conn, SMP_KEY_REJECTED);
+ break;
+ }
goto drop;
+ }
/* If we don't have a context the only allowed commands are
* pairing request and security request.
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 87a59ec2c9f0..c5da53dfab04 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -138,6 +138,7 @@ struct smp_cmd_keypress_notify {
#define SMP_NUMERIC_COMP_FAILED 0x0c
#define SMP_BREDR_PAIRING_IN_PROGRESS 0x0d
#define SMP_CROSS_TRANSP_NOT_ALLOWED 0x0e
+#define SMP_KEY_REJECTED 0x0f
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 0224ef3dfec0..1377f31b719c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2015,10 +2015,19 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port,
void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
{
+ struct net_bridge *br = pmctx->port->br;
+ bool del = false;
+
#if IS_ENABLED(CONFIG_IPV6)
timer_delete_sync(&pmctx->ip6_mc_router_timer);
#endif
timer_delete_sync(&pmctx->ip4_mc_router_timer);
+
+ spin_lock_bh(&br->multicast_lock);
+ del |= br_ip6_multicast_rport_del(pmctx);
+ del |= br_ip4_multicast_rport_del(pmctx);
+ br_multicast_rport_del_notify(pmctx, del);
+ spin_unlock_bh(&br->multicast_lock);
}
int br_multicast_add_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 95d7355a0407..9a910cf0256e 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
return false;
+ if (br_multicast_igmp_type(skb))
+ return false;
+
return (p->flags & BR_TX_FWD_OFFLOAD) &&
(p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 327ca73f9cd7..7a72f766aacf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
+{
+ skb->protocol = htons(proto);
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
+}
+
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
/* Caller already did skb_cow() with len as headroom,
@@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
skb_clear_hash(skb);
return 0;
@@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
skb_clear_hash(skb);
return 0;
@@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
}
if (skb_is_gso(skb)) {
@@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4ddb7490df4b..6ad84d4a2b46 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -432,6 +432,7 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
+ udph->check = 0;
if (np->ipv6) {
udph->check = csum_ipv6_magic(&np->local_ip.in6,
&np->remote_ip.in6,
@@ -460,7 +461,6 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb_reset_mac_header(skb);
skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
} else {
- udph->check = 0;
udph->check = csum_tcpudp_magic(np->local_ip.ip,
np->remote_ip.ip,
udp_len, IPPROTO_UDP,
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 35f807ea9952..406faf8e5f3f 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -160,8 +160,9 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
skb->csum = 0;
skb->ip_summed = CHECKSUM_PARTIAL;
if (attr->tcp) {
- thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr,
- ihdr->daddr, 0);
+ int l4len = skb->len - skb_transport_offset(skb);
+
+ thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..d6420b74ea9c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6261,9 +6261,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
- if (!skb_frags_readable(skb))
- return -EFAULT;
-
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 39ec920f5de7..71c828d0bf31 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1083,7 +1083,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
ethtool_get_flow_spec_ring(info.fs.ring_cookie))
return -EINVAL;
- if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context))
+ if (info.rss_context &&
+ !xa_load(&dev->ethtool->rss_ctx, info.rss_context))
return -EINVAL;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 30a5e9460d00..5a49eb99e5c4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -319,8 +319,8 @@ static int ip_rcv_finish_core(struct net *net,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int err, drop_reason;
struct rtable *rt;
+ int drop_reason;
if (ip_can_use_hint(skb, iph, hint)) {
drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
@@ -345,9 +345,10 @@ static int ip_rcv_finish_core(struct net *net,
break;
case IPPROTO_UDP:
if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
- err = udp_v4_early_demux(skb);
- if (unlikely(err))
+ drop_reason = udp_v4_early_demux(skb);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5a4fb2539b08..9a45aed508d1 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -54,6 +54,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
}
/* We always hold one tunnel user reference to indicate a tunnel */
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -62,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPIP;
t->id.spi = x->props.saddr.a4;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f64f8276a73c..461a9ab540af 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1176,7 +1176,7 @@ restart:
goto do_error;
while (msg_data_left(msg)) {
- ssize_t copy = 0;
+ int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 9b83d639b5ac..5107121c5e37 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -3,6 +3,7 @@
#include <linux/tcp.h>
#include <linux/rcupdate.h>
#include <net/tcp.h>
+#include <net/busy_poll.h>
void tcp_fastopen_init_key_once(struct net *net)
{
@@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
+ sk_mark_napi_id_set(child, skb);
+
/* Now finish processing the fastopen child socket. */
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8ec92dec321a..68bc79eb9019 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2479,20 +2479,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
const struct sock *sk = (const struct sock *)tp;
- if (tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
- return true; /* got echoed TS before first retransmission */
-
- /* Check if nothing was retransmitted (retrans_stamp==0), which may
- * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
- * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
- * retrans_stamp even if we had retransmitted the SYN.
+ /* Received an echoed timestamp before the first retransmission? */
+ if (tp->retrans_stamp)
+ return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+
+ /* We set tp->retrans_stamp upon the first retransmission of a loss
+ * recovery episode, so normally if tp->retrans_stamp is 0 then no
+ * retransmission has happened yet (likely due to TSQ, which can cause
+ * fast retransmits to be delayed). So if snd_una advanced while
+ * (tp->retrans_stamp is 0 then apparently a packet was merely delayed,
+ * not lost. But there are exceptions where we retransmit but then
+ * clear tp->retrans_stamp, so we check for those exceptions.
*/
- if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
- sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
- return true; /* nothing was retransmitted */
- return false;
+ /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen()
+ * clears tp->retrans_stamp when snd_una == high_seq.
+ */
+ if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq))
+ return false;
+
+ /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp
+ * when setting FLAG_SYN_ACKED is set, even if the SYN was
+ * retransmitted.
+ */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return false;
+
+ return true; /* tp->retrans_stamp is zero; no retransmit yet */
}
/* Undo procedures. */
@@ -5168,7 +5181,9 @@ end:
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
- tcp_rcvbuf_grow(sk);
+ /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
+ if (sk->sk_socket)
+ tcp_rcvbuf_grow(sk);
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index d293087b426d..be5c2294610e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -359,6 +359,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
flush |= skb->ip_summed != p->ip_summed;
flush |= skb->csum_level != p->csum_level;
flush |= NAPI_GRO_CB(p)->count >= 64;
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
if (flush || skb_gro_receive_list(p, skb))
mss = 1;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 85b5aa82d7d7..e0a6bfa95118 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -767,6 +767,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
ret = skb_gro_receive_list(p, skb);
} else {
skb_gro_postpull_rcsum(skb, uh,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 0d31a8c108d4..f28cfd88eaf5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ba2ec7c870cc..870a0bd6c2ba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3525,11 +3525,9 @@ static void addrconf_gre_config(struct net_device *dev)
ASSERT_RTNL();
- idev = ipv6_find_idev(dev);
- if (IS_ERR(idev)) {
- pr_debug("%s: add_dev failed\n", __func__);
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev))
return;
- }
/* Generate the IPv6 link-local address using addrconf_addr_gen(),
* unless we have an IPv4 GRE device not bound to an IP address and
@@ -3543,9 +3541,6 @@ static void addrconf_gre_config(struct net_device *dev)
}
add_v4_addrs(idev);
-
- if (dev->flags & IFF_POINTOPOINT)
- addrconf_add_mroute(dev);
}
#endif
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 62618a058b8f..a247bb93908b 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req,
struct ipv6_opt_hdr *old, *new;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return -ENOMEM;
+
if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt)
old = req_inet->ipv6_opt->hopopt;
else
@@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req)
struct ipv6_txoptions *txopts;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return;
+
if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt)
return;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 72d4858dec18..8607569de34f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPV6;
t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 65831b4fee1f..616bf4c0c8fd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -807,8 +807,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
} else {
im->mca_crcount = idev->mc_qrv;
}
- in6_dev_put(pmc->idev);
ip6_mc_clear_src(pmc);
+ in6_dev_put(pmc->idev);
kfree_rcu(pmc, rcu);
}
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0143262094b0..79c8f1acf8a3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3737,6 +3737,53 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
}
}
+static int fib6_config_validate(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ /* RTF_PCPU is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_PCPU) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
+ goto errout;
+ }
+
+ /* RTF_CACHE is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_CACHE) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
+ goto errout;
+ }
+
+ if (cfg->fc_type > RTN_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid route type");
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ goto errout;
+ }
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (cfg->fc_src_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid source address length");
+ goto errout;
+ }
+
+ if (cfg->fc_nh_id && cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ goto errout;
+ }
+#else
+ if (cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack,
+ "Specifying source address requires IPV6_SUBTREES to be enabled");
+ goto errout;
+ }
+#endif
+ return 0;
+errout:
+ return -EINVAL;
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
@@ -3886,6 +3933,10 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct fib6_info *rt;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
rt = ip6_route_info_create(cfg, gfp_flags, extack);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -4479,53 +4530,6 @@ void rt6_purge_dflt_routers(struct net *net)
rcu_read_unlock();
}
-static int fib6_config_validate(struct fib6_config *cfg,
- struct netlink_ext_ack *extack)
-{
- /* RTF_PCPU is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_PCPU) {
- NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
- goto errout;
- }
-
- /* RTF_CACHE is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_CACHE) {
- NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
- goto errout;
- }
-
- if (cfg->fc_type > RTN_MAX) {
- NL_SET_ERR_MSG(extack, "Invalid route type");
- goto errout;
- }
-
- if (cfg->fc_dst_len > 128) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- goto errout;
- }
-
-#ifdef CONFIG_IPV6_SUBTREES
- if (cfg->fc_src_len > 128) {
- NL_SET_ERR_MSG(extack, "Invalid source address length");
- goto errout;
- }
-
- if (cfg->fc_nh_id && cfg->fc_src_len) {
- NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
- goto errout;
- }
-#else
- if (cfg->fc_src_len) {
- NL_SET_ERR_MSG(extack,
- "Specifying source address requires IPV6_SUBTREES to be enabled");
- goto errout;
- }
-#endif
- return 0;
-errout:
- return -EINVAL;
-}
-
static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
@@ -4563,10 +4567,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
switch (cmd) {
case SIOCADDRT:
- err = fib6_config_validate(&cfg, NULL);
- if (err)
- break;
-
/* Only do the default setting of fc_metric in route adding */
if (cfg.fc_metric == 0)
cfg.fc_metric = IP6_RT_PRIO_USER;
@@ -5402,6 +5402,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
int nhn = 0;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
replace = (cfg->fc_nlinfo.nlh &&
(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
@@ -5636,10 +5640,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- err = fib6_config_validate(&cfg, extack);
- if (err)
- return err;
-
if (cfg.fc_metric == 0)
cfg.fc_metric = IP6_RT_PRIO_USER;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 7c05ac846646..eccfa4203e96 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -129,13 +129,13 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
- const struct ipv6hdr *oldhdr;
+ struct ipv6hdr oldhdr;
struct ipv6hdr *hdr;
unsigned char *buf;
size_t hdrlen;
int err;
- oldhdr = ipv6_hdr(skb);
+ memcpy(&oldhdr, ipv6_hdr(skb), sizeof(oldhdr));
buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC);
if (!buf)
@@ -147,7 +147,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
memcpy(isrh, srh, sizeof(*isrh));
memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1],
(srh->segments_left - 1) * 16);
- isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr;
+ isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr.daddr;
ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0],
isrh->segments_left - 1);
@@ -169,7 +169,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
skb_mac_header_rebuild(skb);
hdr = ipv6_hdr(skb);
- memmove(hdr, oldhdr, sizeof(*hdr));
+ memmove(hdr, &oldhdr, sizeof(*hdr));
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, csrh, hdrlen);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 841c81abaaf4..9005fc156a20 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bf140ef781c1..5120a763da0d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
- xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index efc2a91f4c48..b5d761700776 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1766,7 +1766,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
if (proto == 0)
return -EINVAL;
- err = xfrm_state_flush(net, proto, true, false);
+ err = xfrm_state_flush(net, proto, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d9d88f2f2831..954795b0fe48 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1959,6 +1959,20 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
ieee80211_sta_init_nss(link_sta);
if (params->opmode_notif_used) {
+ enum nl80211_chan_width width = link->conf->chanreq.oper.width;
+
+ switch (width) {
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_320: /* not VHT, allowed for HE/EHT */
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* returned value is only needed for rc update, but the
* rc isn't initialized here yet, so ignore it
*/
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 5b81998cb0c9..ef7c1a68d88d 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,10 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions
- * Copyright (C) 2022 - 2024 Intel Corporation
+ * Copyright (C) 2022 - 2025 Intel Corporation
*/
#ifndef __MAC80211_DEBUG_H
#define __MAC80211_DEBUG_H
+#include <linux/once_lite.h>
#include <net/cfg80211.h>
#ifdef CONFIG_MAC80211_OCB_DEBUG
@@ -152,6 +153,8 @@ do { \
else \
_sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \
} while (0)
+#define link_err_once(link, fmt, ...) \
+ DO_ONCE_LITE(link_err, link, fmt, ##__VA_ARGS__)
#define link_id_info(sdata, link_id, fmt, ...) \
do { \
if (ieee80211_vif_is_mld(&sdata->vif)) \
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7c27f3cd841c..c01634fdba78 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1150,6 +1150,8 @@ static void ieee80211_sdata_init(struct ieee80211_local *local,
{
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->key_list);
+
/*
* Initialize the default link, so we can use link_id 0 for non-MLD,
* and that continues to work for non-MLD-aware drivers that use just
@@ -2210,8 +2212,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_init_frag_cache(&sdata->frags);
- INIT_LIST_HEAD(&sdata->key_list);
-
wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk,
ieee80211_delayed_tailroom_dec);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index d40c2bd3b50b..4f7b7d0f64f2 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -93,9 +93,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
if (link_id < 0)
link_id = 0;
- rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
- rcu_assign_pointer(sdata->link[link_id], link);
-
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
struct ieee80211_sub_if_data *ap_bss;
struct ieee80211_bss_conf *ap_bss_conf;
@@ -145,6 +142,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_link_debugfs_add(link);
}
+
+ rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
+ rcu_assign_pointer(sdata->link[link_id], link);
}
void ieee80211_link_stop(struct ieee80211_link_data *link)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2d46d4af60d7..0ed68182f79b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3934,6 +3934,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
lockdep_assert_wiphy(local->hw.wiphy);
+ if (frame_buf)
+ memset(frame_buf, 0, IEEE80211_DEAUTH_FRAME_LEN);
+
if (WARN_ON(!ap_sta))
return;
@@ -7195,6 +7198,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
struct ieee80211_mgmt *mgmt = (void *) hdr;
+ struct ieee80211_ext *ext = NULL;
size_t baselen;
struct ieee802_11_elems *elems;
struct ieee80211_local *local = sdata->local;
@@ -7220,7 +7224,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
/* Process beacon from the current BSS */
bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- struct ieee80211_ext *ext = (void *) mgmt;
+ ext = (void *)mgmt;
variable = ext->u.s1g_beacon.variable +
ieee80211_s1g_optional_len(ext->frame_control);
}
@@ -7407,7 +7411,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) ||
- ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ (ext && ieee80211_is_s1g_short_beacon(ext->frame_control,
+ parse_params.start,
+ parse_params.len)))
goto free;
link->u.mgd.beacon_crc = ncrc;
link->u.mgd.beacon_crc_valid = true;
@@ -10699,8 +10705,8 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata,
*/
for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs,
elems->ml_epcs_len) {
+ struct ieee802_11_elems *link_elems __free(kfree) = NULL;
struct ieee80211_link_data *link;
- struct ieee802_11_elems *link_elems __free(kfree);
u8 *pos = (void *)sub->data;
u16 control;
ssize_t len;
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 96584b39215e..c5e0f7f46004 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
{
const struct element *elem, *sub;
size_t profile_len = 0;
- bool found = false;
if (!bss || !bss->transmitted_bss)
return profile_len;
@@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
index[2],
new_bssid);
if (ether_addr_equal(new_bssid, bss->bssid)) {
- found = true;
elems->bssid_index_len = index[1];
elems->bssid_index = (void *)&index[2];
- break;
+ return profile_len;
}
}
}
- return found ? profile_len : 0;
+ return 0;
}
static void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 09beb65d6108..e73431549ce7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4432,6 +4432,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!multicast &&
!ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
return false;
+ /* reject invalid/our STA address */
+ if (!is_valid_ether_addr(hdr->addr2) ||
+ ether_addr_equal(sdata->dev->dev_addr, hdr->addr2))
+ return false;
if (!rx->sta) {
int rate_idx;
if (status->encoding != RX_ENC_LEGACY)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d8d4f3d7d7f2..d58b80813bdd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -5016,12 +5016,25 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
}
}
-static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
+static u8 __ieee80211_beacon_update_cntdwn(struct ieee80211_link_data *link,
+ struct beacon_data *beacon)
{
- beacon->cntdwn_current_counter--;
+ if (beacon->cntdwn_current_counter == 1) {
+ /*
+ * Channel switch handling is done by a worker thread while
+ * beacons get pulled from hardware timers. It's therefore
+ * possible that software threads are slow enough to not be
+ * able to complete CSA handling in a single beacon interval,
+ * in which case we get here. There isn't much to do about
+ * it, other than letting the user know that the AP isn't
+ * behaving correctly.
+ */
+ link_err_once(link,
+ "beacon TX faster than countdown (channel/color switch) completion\n");
+ return 0;
+ }
- /* the counter should never reach 0 */
- WARN_ON_ONCE(!beacon->cntdwn_current_counter);
+ beacon->cntdwn_current_counter--;
return beacon->cntdwn_current_counter;
}
@@ -5052,7 +5065,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_i
if (!beacon)
goto unlock;
- count = __ieee80211_beacon_update_cntdwn(beacon);
+ count = __ieee80211_beacon_update_cntdwn(link, beacon);
unlock:
rcu_read_unlock();
@@ -5450,7 +5463,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- __ieee80211_beacon_update_cntdwn(beacon);
+ __ieee80211_beacon_update_cntdwn(link, beacon);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
@@ -5482,7 +5495,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
* for now we leave it consistent with overall
* mac80211's behavior.
*/
- __ieee80211_beacon_update_cntdwn(beacon);
+ __ieee80211_beacon_update_cntdwn(link, beacon);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 27d414efa3fd..e66da651678a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2144,11 +2144,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
wake_up:
-
- if (local->virt_monitors > 0 &&
- local->virt_monitors == local->open_count)
- ieee80211_add_virtual_monitor(local);
-
/*
* Clear the WLAN_STA_BLOCK_BA flag so new aggregation
* sessions can be established after a resume.
@@ -2202,6 +2197,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
}
+ if (local->virt_monitors > 0 &&
+ local->virt_monitors == local->open_count)
+ ieee80211_add_virtual_monitor(local);
+
if (!suspended)
return 0;
@@ -3884,7 +3883,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
{
u64 tsf = drv_get_tsf(local, sdata);
u64 dtim_count = 0;
- u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
+ u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
u8 dtim_period = sdata->vif.bss_conf.dtim_period;
struct ps_data *ps;
u8 bcns_from_dtim;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d536c97144e9..47d7dfd9ad09 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
if (index < net->mpls.platform_labels) {
struct mpls_route __rcu **platform_label =
- rcu_dereference(net->mpls.platform_label);
- rt = rcu_dereference(platform_label[index]);
+ rcu_dereference_rtnl(net->mpls.platform_label);
+ rt = rcu_dereference_rtnl(platform_label[index]);
}
return rt;
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 421ced031289..1f898888b223 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -978,8 +978,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
+ if (!mptcp_try_fallback(ssk))
+ goto reset;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
return false;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index feb01747d7d8..420d416e2603 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -765,8 +765,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
pr_debug("fail_seq=%llu\n", fail_seq);
- if (!READ_ONCE(msk->allow_infinite_fallback))
+ /* After accepting the fail, we can't create any other subflows */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
return;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map\n");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index edf14c2c2062..6a817a13b154 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -560,10 +560,9 @@ static bool mptcp_check_data_fin(struct sock *sk)
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
{
- if (READ_ONCE(msk->allow_infinite_fallback)) {
+ if (mptcp_try_fallback(ssk)) {
MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_DSSCORRUPTIONFALLBACK);
- mptcp_do_fallback(ssk);
} else {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
mptcp_subflow_reset(ssk);
@@ -792,7 +791,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
{
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
@@ -803,6 +802,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_state != TCP_ESTABLISHED)
return false;
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
+
/* attach to msk socket only after we are sure we will deal with it
* at close time
*/
@@ -811,7 +818,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
- mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
__mptcp_propagate_sndbuf(sk, ssk);
return true;
@@ -1136,10 +1142,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mpext->infinite_map = 1;
mpext->data_len = 0;
+ if (!mptcp_try_fallback(ssk)) {
+ mptcp_subflow_reset(ssk);
+ return;
+ }
+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
}
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -2543,9 +2553,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
+ struct mptcp_sendmsg_info info = { .data_lock_held = true, };
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
- struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
struct sock *ssk;
int ret, err;
@@ -2590,6 +2600,18 @@ static void __mptcp_retrans(struct sock *sk)
info.sent = 0;
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
dfrag->already_sent;
+
+ /*
+ * make the whole retrans decision, xmit, disallow
+ * fallback atomic
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ if (__mptcp_check_fallback(msk)) {
+ spin_unlock_bh(&msk->fallback_lock);
+ release_sock(ssk);
+ return;
+ }
+
while (info.sent < info.limit) {
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
@@ -2603,8 +2625,9 @@ static void __mptcp_retrans(struct sock *sk)
len = max(copied, len);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
}
+ spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
}
@@ -2730,7 +2753,8 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
- WRITE_ONCE(msk->allow_infinite_fallback, true);
+ msk->allow_infinite_fallback = true;
+ msk->allow_subflows = true;
msk->recovery = false;
msk->subflow_id = 1;
msk->last_data_sent = tcp_jiffies32;
@@ -2738,6 +2762,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
+ spin_lock_init(&msk->fallback_lock);
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3117,7 +3142,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+
+ /* The first subflow is already in TCP_CLOSE status, the following
+ * can't overlap with a fallback anymore
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ msk->allow_subflows = true;
+ msk->allow_infinite_fallback = true;
WRITE_ONCE(msk->flags, 0);
+ spin_unlock_bh(&msk->fallback_lock);
+
msk->cb_flags = 0;
msk->recovery = false;
WRITE_ONCE(msk->can_ack, false);
@@ -3524,7 +3558,13 @@ bool mptcp_finish_join(struct sock *ssk)
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
mptcp_propagate_sndbuf(parent, ssk);
return true;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3dd11dd3ba16..6ec245fd2778 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -346,10 +346,16 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
u8 scaling_ratio;
+ bool allow_subflows;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+
+ spinlock_t fallback_lock; /* protects fallback,
+ * allow_infinite_fallback and
+ * allow_join
+ */
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -1216,15 +1222,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
return __mptcp_check_fallback(msk);
}
-static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+static inline bool __mptcp_try_fallback(struct mptcp_sock *msk)
{
if (__mptcp_check_fallback(msk)) {
pr_debug("TCP fallback already done (msk=%p)\n", msk);
- return;
+ return true;
}
- if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback)))
- return;
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+
+ msk->allow_subflows = false;
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+ spin_unlock_bh(&msk->fallback_lock);
+ return true;
}
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
@@ -1236,14 +1249,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
TCPF_SYN_RECV | TCPF_LISTEN));
}
-static inline void mptcp_do_fallback(struct sock *ssk)
+static inline bool mptcp_try_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
struct mptcp_sock *msk;
msk = mptcp_sk(sk);
- __mptcp_do_fallback(msk);
+ if (!__mptcp_try_fallback(msk))
+ return false;
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
gfp_t saved_allocation = ssk->sk_allocation;
@@ -1255,6 +1269,7 @@ static inline void mptcp_do_fallback(struct sock *ssk)
tcp_shutdown(ssk, SEND_SHUTDOWN);
ssk->sk_allocation = saved_allocation;
}
+ return true;
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
@@ -1264,7 +1279,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
{
pr_fallback(msk);
subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
+ WARN_ON_ONCE(!__mptcp_try_fallback(msk));
}
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 15613d691bfe..1802bc5435a1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -544,9 +544,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
+ if (!mptcp_try_fallback(sk))
+ goto do_reset;
+
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
pr_fallback(msk);
goto fallback;
}
@@ -1300,20 +1302,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
mptcp_schedule_work(sk);
}
-static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
+ /* we are really failing, prevent any later subflow join */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
+
/* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
- return;
+ return false;
/* since the close timeout take precedence on the fail one,
* no need to start the latter when the first is already set
*/
if (sock_flag((struct sock *)msk, SOCK_DEAD))
- return;
+ return true;
/* we don't need extreme accuracy here, use a zero fail_tout as special
* value meaning no fail timeout at all;
@@ -1325,6 +1336,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
tcp_send_ack(ssk);
mptcp_reset_tout_timer(msk, subflow->fail_tout);
+ return true;
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1385,17 +1397,16 @@ fallback:
(subflow->mp_join || subflow->valid_csum_seen)) {
subflow->send_mp_fail = 1;
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_subflow_fail(msk, ssk)) {
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
goto reset;
}
- mptcp_subflow_fail(msk, ssk);
WRITE_ONCE(subflow->data_avail, true);
return true;
}
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_try_fallback(ssk)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1413,8 +1424,6 @@ reset:
WRITE_ONCE(subflow->data_avail, false);
return false;
}
-
- mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1679,7 +1688,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0;
@@ -1851,7 +1859,7 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_do_fallback(sk);
+ WARN_ON_ONCE(!mptcp_try_fallback(sk));
pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk, subflow, NULL);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 201d3c4ec623..e51f0b441109 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1124,6 +1124,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
&nf_conntrack_hash[repl_idx]);
+ /* confirmed bit must be set after hlist add, not before:
+ * loser_ct can still be visible to other cpu due to
+ * SLAB_TYPESAFE_BY_RCU.
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &loser_ct->status);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
@@ -1260,8 +1266,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- ct->status |= IPS_CONFIRMED;
-
if (unlikely(nf_ct_is_dying(ct))) {
NF_CT_STAT_INC(net, insert_failed);
goto dying;
@@ -1293,7 +1297,7 @@ chaintoolong:
}
}
- /* Timer relative to confirmation time, not original
+ /* Timeout is relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
@@ -1301,11 +1305,21 @@ chaintoolong:
__nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
- * starting the timer and setting the CONFIRMED bit. The RCU barriers
- * guarantee that no other CPU can find the conntrack before the above
- * stores are visible.
+ * setting ct->timeout. The RCU barriers guarantee that no other CPU
+ * can find the conntrack before the above stores are visible.
*/
__nf_conntrack_hash_insert(ct, hash, reply_hash);
+
+ /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups
+ * skip entries that lack this bit. This happens when a CPU is looking
+ * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU
+ * or when another CPU encounters this entry right after the insertion
+ * but before the set-confirm-bit below. This bit must not be set until
+ * after __nf_conntrack_hash_insert().
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24c71ecb2179..a7240736f98e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9686,64 +9686,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
}
EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu);
-static void
-nf_tables_device_notify(const struct nft_table *table, int attr,
- const char *name, const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- struct net *net = dev_net(dev);
- struct nlmsghdr *nlh;
- struct sk_buff *skb;
- u16 flags = 0;
-
- if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV))
- return;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- goto err;
-
- event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family,
- NFNETLINK_V0, nft_base_seq(net));
- if (!nlh)
- goto err;
-
- if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) ||
- nla_put_string(skb, attr, name) ||
- nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) ||
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
- goto err;
-
- nlmsg_end(skb, nlh);
- nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV,
- nlmsg_report(nlh), GFP_KERNEL);
- return;
-err:
- if (skb)
- kfree_skb(skb);
- nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS);
-}
-
-void
-nf_tables_chain_device_notify(const struct nft_chain *chain,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN,
- chain->name, hook, dev, event);
-}
-
-static void
-nf_tables_flowtable_device_notify(const struct nft_flowtable *ft,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE,
- ft->name, hook, dev, event);
-}
-
static int nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable, bool changename)
{
@@ -9791,7 +9733,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_flowtable_device_notify(flowtable, hook, dev, event);
break;
}
return 0;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index ae3fe87195ab..a88abae5a9de 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
return -1;
+ /* Kernel implementation detail, withhold this from userspace for now */
+ status &= ~IPS_NAT_CLASH;
+
if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
return -1;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ac77fc21632d..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
[NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
- [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES,
};
static struct nfnl_net *nfnl_pernet(struct net *net)
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 846d48ba8965..b16185e9a6dd 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_chain_device_notify(&basechain->chain,
- hook, dev, event);
break;
}
return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e8972a857e51..6332a0e06596 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
WARN_ON(skb->sk != NULL);
skb->sk = sk;
skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
@@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
+ DECLARE_WAITQUEUE(wait, current);
struct netlink_sock *nlk;
+ unsigned int rmem;
nlk = nlk_sk(sk);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
- netlink_overrun(sk);
- sock_put(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&nlk->wait, &wait);
+ if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ return 0;
+ }
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !sock_flag(sk, SOCK_DEAD))
- *timeo = schedule_timeout(*timeo);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&nlk->wait, &wait);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+ netlink_overrun(sk);
sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
- if (signal_pending(current)) {
- kfree_skb(skb);
- return sock_intr_errno(*timeo);
- }
- return 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&nlk->wait, &wait);
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+
+ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+ !sock_flag(sk, SOCK_DEAD))
+ *timeo = schedule_timeout(*timeo);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&nlk->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return sock_intr_errno(*timeo);
}
- netlink_skb_set_owner_r(skb, sk);
- return 0;
+
+ return 1;
}
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int rmem, rcvbuf;
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+ if ((rmem == skb->truesize || rmem <= rcvbuf) &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+ return rmem > (rcvbuf >> 1);
}
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
return -1;
}
@@ -2245,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ unsigned int rmem, rcvbuf;
size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
@@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
}
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
/* NLMSG_GOODSIZE is small to avoid high order allocations being
* required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
@@ -2283,6 +2294,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
if (!skb)
goto errout_skb;
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ if (rmem != skb->truesize && rmem >= rcvbuf) {
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ goto errout_skb;
+ }
+
/* Trim skb to allocated size. User is expected to provide buffer as
* large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index ed1508a9e093..aab107727f18 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
nu->tty = tty;
- tty->disc_data = nu;
skb_queue_head_init(&nu->tx_q);
INIT_WORK(&nu->write_work, nci_uart_write_work);
spin_lock_init(&nu->rx_lock);
ret = nu->ops.open(nu);
if (ret) {
- tty->disc_data = NULL;
kfree(nu);
+ return ret;
} else if (!try_module_get(nu->owner)) {
nu->ops.close(nu);
- tty->disc_data = NULL;
kfree(nu);
return -ENOENT;
}
- return ret;
+ tty->disc_data = nu;
+
+ return 0;
}
/* ------ LDISC part ------ */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e7269a3eec79..3add108340bf 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -39,16 +39,14 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
- .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
@@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
struct deferred_action *da;
da = action_fifo_put(fifo);
@@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
clone = clone_flow_key ? clone_key(key) : key;
if (clone) {
int err = 0;
-
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(ovs_pcpu_storage.exec_level);
+ __this_cpu_inc(ovs_pcpu_storage->exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
@@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
return err;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6a304ae2d959..b990dc83504f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
@@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
* avoided.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
ovs_pcpu->owner = current;
ovs_pcpu_locked = true;
}
@@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_dp_name(dp), error);
if (ovs_pcpu_locked) {
ovs_pcpu->owner = NULL;
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
}
stats_counter = &stats->n_hit;
@@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, current);
+ this_cpu_write(ovs_pcpu_storage->owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, NULL);
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ this_cpu_write(ovs_pcpu_storage->owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = {
.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
};
+static int __init ovs_alloc_percpu_storage(void)
+{
+ unsigned int cpu;
+
+ ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+ if (!ovs_pcpu_storage)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct ovs_pcpu_storage *ovs_pcpu;
+
+ ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+ local_lock_init(&ovs_pcpu->bh_lock);
+ }
+ return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+ free_percpu(ovs_pcpu_storage);
+}
+
static int __init dp_init(void)
{
int err;
@@ -2753,6 +2775,10 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
+ err = ovs_alloc_percpu_storage();
+ if (err)
+ goto error;
+
err = ovs_internal_dev_rtnl_link_register();
if (err)
goto error;
@@ -2799,6 +2825,7 @@ error_flow_exit:
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
error:
+ ovs_free_percpu_storage();
return err;
}
@@ -2813,6 +2840,7 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
+ ovs_free_percpu_storage();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1b5348b0f559..cfeb817a1889 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -220,7 +220,8 @@ struct ovs_pcpu_storage {
struct task_struct *owner;
local_lock_t bh_lock;
};
-DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3d43f3eae759..be608f07441f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2785,7 +2785,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
- long timeo = 0;
+ long timeo;
mutex_lock(&po->pg_vec_lock);
@@ -2839,22 +2839,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
reinit_completion(&po->skb_completion);
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && skb) {
- timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ /* Note: packet_read_pending() might be slow if we
+ * have to call it as it's per_cpu variable, but in
+ * fast-path we don't have to call it, only when ph
+ * is NULL, we need to check the pending_refcnt.
+ */
+ if (need_wait && packet_read_pending(&po->tx_ring)) {
timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
if (timeo <= 0) {
err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
goto out_put;
}
- }
- /* check for additional frames */
- continue;
+ /* check for additional frames */
+ continue;
+ } else
+ break;
}
skb = NULL;
@@ -2943,14 +2949,7 @@ tpacket_error:
}
packet_increment_head(&po->tx_ring);
len_sum += tp_len;
- } while (likely((ph != NULL) ||
- /* Note: packet_read_pending() might be slow if we have
- * to call it as it's per_cpu variable, but in fast-path
- * we already short-circuit the loop with the first
- * condition, and luckily don't have to go that path
- * anyway.
- */
- (need_wait && packet_read_pending(&po->tx_ring))));
+ } while (1);
err = len_sum;
goto out_put;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 53a858478e22..62527e1ebb88 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -826,6 +826,7 @@ static struct sock *pep_sock_accept(struct sock *sk,
}
/* Check for duplicate pipe handle */
+ pn_skb_get_dst_sockaddr(skb, &dst);
newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
if (unlikely(newsk)) {
__sock_put(newsk);
@@ -850,7 +851,6 @@ static struct sock *pep_sock_accept(struct sock *sk,
newsk->sk_destruct = pipe_destruct;
newpn = pep_sk(newsk);
- pn_skb_get_dst_sockaddr(skb, &dst);
pn_skb_get_src_sockaddr(skb, &src);
newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 2dd6bd3a3011..b72bf8a08d48 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev)
t = rose_node;
rose_node = rose_node->next;
- for (i = 0; i < t->count; i++) {
+ for (i = t->count - 1; i >= 0; i--) {
if (t->neighbour[i] != s)
continue;
t->count--;
- switch (i) {
- case 0:
- t->neighbour[0] = t->neighbour[1];
- fallthrough;
- case 1:
- t->neighbour[1] = t->neighbour[2];
- break;
- case 2:
- break;
- }
+ memmove(&t->neighbour[i], &t->neighbour[i + 1],
+ sizeof(t->neighbour[0]) *
+ (t->count - i));
}
if (t->count <= 0)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5bd3922c310d..5b7342d43486 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -44,6 +44,7 @@ enum rxrpc_skb_mark {
RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
+ RXRPC_SKB_MARK_REJECT_CONN_ABORT, /* Reject with connection ABORT (code in skb->priority) */
};
/*
@@ -361,12 +362,15 @@ struct rxrpc_local {
struct list_head new_client_calls; /* Newly created client calls need connection */
spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
- /* Provide a kvec table sufficiently large to manage either a DATA
- * packet with a maximum set of jumbo subpackets or a PING ACK padded
- * out to 64K with zeropages for PMTUD.
- */
- struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ?
- 1 + RXRPC_MAX_NR_JUMBO : 3 + 16];
+ union {
+ /* Provide a kvec table sufficiently large to manage either a
+ * DATA packet with a maximum set of jumbo subpackets or a PING
+ * ACK padded out to 64K with zeropages for PMTUD.
+ */
+ struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ?
+ 1 + RXRPC_MAX_NR_JUMBO : 3 + 16];
+ struct bio_vec bvec[3 + 16];
+ };
};
/*
@@ -1250,6 +1254,8 @@ int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
void rxrpc_error_report(struct sock *);
bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
s32 abort_code, int err);
+bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
int rxrpc_io_thread(void *data);
void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
@@ -1380,6 +1386,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
enum rxrpc_peer_trace);
void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a4b363b47cca..00982a030744 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
id_in_use:
write_unlock(&rx->call_lock);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT);
rxrpc_cleanup_call(call);
_leave(" = -EBADSLT");
return -EBADSLT;
@@ -218,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
+ rxrpc_see_call(call, rxrpc_call_see_discard);
rcu_assign_pointer(call->socket, rx);
if (rx->app_ops &&
rx->app_ops->discard_new_call) {
@@ -254,6 +256,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
unsigned short call_tail, conn_tail, peer_tail;
unsigned short call_count, conn_count;
+ if (!b)
+ return NULL;
+
/* #calls >= #conns >= #peers must hold true. */
call_head = smp_load_acquire(&b->call_backlog_head);
call_tail = b->call_backlog_tail;
@@ -369,8 +374,8 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
- RX_INVALID_OPERATION, -ESHUTDOWN);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
goto no_call;
}
@@ -402,6 +407,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&rx->incoming_lock);
read_unlock_irq(&local->services_lock);
+ rxrpc_assess_MTU_size(local, call->peer);
if (hlist_unhashed(&call->error_link)) {
spin_lock_irq(&call->peer->lock);
@@ -416,12 +422,12 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
unsupported_service:
read_unlock_irq(&local->services_lock);
- return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
- RX_INVALID_OPERATION, -EOPNOTSUPP);
+ return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
unsupported_security:
read_unlock_irq(&local->services_lock);
- return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
- RX_INVALID_OPERATION, -EKEYREJECTED);
+ return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
read_unlock_irq(&local->services_lock);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 15067ff7b1f2..918f41d97a2f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false, putu = false;
+ bool putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
@@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
- /* Make sure we don't get any more notifications */
+ /* Note that at this point, the call may still be on or may have been
+ * added back on to the socket receive queue. recvmsg() must discard
+ * released calls. The CALL_RELEASED flag should prevent further
+ * notifications.
+ */
spin_lock_irq(&rx->recvmsg_lock);
-
- if (!list_empty(&call->recvmsg_link)) {
- _debug("unlinking once-pending call %p { e=%lx f=%lx }",
- call, call->events, call->flags);
- list_del(&call->recvmsg_link);
- put = true;
- }
-
- /* list_empty() must return false in rxrpc_notify_socket() */
- call->recvmsg_link.next = NULL;
- call->recvmsg_link.prev = NULL;
-
spin_unlock_irq(&rx->recvmsg_lock);
- if (put)
- rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
@@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
+ while ((call = list_first_entry_or_null(&rx->recvmsg_q,
+ struct rxrpc_call, recvmsg_link))) {
+ list_del_init(&call->recvmsg_link);
+ rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q);
+ }
+
_leave("");
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 27b650d30f4d..e939ecf417c4 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -97,6 +97,20 @@ bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
return false;
}
+/*
+ * Directly produce a connection abort from a packet.
+ */
+bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, 0, sp->hdr.seq, abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_CONN_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
{
return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 0af19bcdc80a..8b5903b6e481 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -814,6 +814,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
__be32 code;
int ret, ioc;
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return; /* Never abort an abort. */
+
rxrpc_see_skb(skb, rxrpc_skb_see_reject);
iov[0].iov_base = &whdr;
@@ -826,7 +829,13 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
msg.msg_controllen = 0;
msg.msg_flags = 0;
- memset(&whdr, 0, sizeof(whdr));
+ whdr = (struct rxrpc_wire_header) {
+ .epoch = htonl(sp->hdr.epoch),
+ .cid = htonl(sp->hdr.cid),
+ .callNumber = htonl(sp->hdr.callNumber),
+ .serviceId = htons(sp->hdr.serviceId),
+ .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED,
+ };
switch (skb->mark) {
case RXRPC_SKB_MARK_REJECT_BUSY:
@@ -834,6 +843,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
size = sizeof(whdr);
ioc = 1;
break;
+ case RXRPC_SKB_MARK_REJECT_CONN_ABORT:
+ whdr.callNumber = 0;
+ fallthrough;
case RXRPC_SKB_MARK_REJECT_ABORT:
whdr.type = RXRPC_PACKET_TYPE_ABORT;
code = htonl(skb->priority);
@@ -847,14 +859,6 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
msg.msg_namelen = srx.transport_len;
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.serviceId = htons(sp->hdr.serviceId);
- whdr.flags = sp->hdr.flags;
- whdr.flags ^= RXRPC_CLIENT_INITIATED;
- whdr.flags &= RXRPC_CLIENT_INITIATED;
-
iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
ret = do_udp_sendmsg(local->socket, &msg, size);
if (ret < 0)
@@ -924,7 +928,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response
{
struct rxrpc_skb_priv *sp = rxrpc_skb(response);
struct scatterlist sg[16];
- struct bio_vec bvec[16];
+ struct bio_vec *bvec = conn->local->bvec;
struct msghdr msg;
size_t len = sp->resp.len;
__be32 wserial;
@@ -938,6 +942,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response
if (ret < 0)
goto fail;
nr_sg = ret;
+ ret = -EIO;
+ if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec)))
+ goto fail;
for (int i = 0; i < nr_sg; i++)
bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index e2f35e6c04d6..366431b0736c 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -149,8 +149,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
* assess the MTU size for the network interface through which this peer is
* reached
*/
-static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
- struct rxrpc_peer *peer)
+void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
struct net *net = local->net;
struct dst_entry *dst;
@@ -277,8 +276,6 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
peer->hdrsize += sizeof(struct rxrpc_wire_header);
peer->max_data = peer->if_mtu - peer->hdrsize;
-
- rxrpc_assess_MTU_size(local, peer);
}
/*
@@ -297,6 +294,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
rxrpc_init_peer(local, peer, hash_key);
+ rxrpc_assess_MTU_size(local, peer);
}
_leave(" = %p", peer);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 86a27fb55a1c..7fa7e77f6bb9 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
if (!list_empty(&call->recvmsg_link))
return;
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_notify_released);
+ return;
+ }
rcu_read_lock();
@@ -447,6 +451,16 @@ try_again:
goto try_again;
}
+ rxrpc_see_call(call, rxrpc_call_see_recvmsg);
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ list_del_init(&call->recvmsg_link);
+ spin_unlock_irq(&rx->recvmsg_lock);
+ release_sock(&rx->sk);
+ trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
@@ -470,8 +484,13 @@ try_again:
release_sock(&rx->sk);
- if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
- BUG();
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ mutex_unlock(&call->user_mutex);
+ if (!(flags & MSG_PEEK))
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
ret = rxrpc_recvmsg_user_id(call, msg, flags);
if (ret < 0)
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 078d91a6b77f..2bfbf2b2bb37 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -140,15 +140,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
- RX_INVALID_OPERATION, -EKEYREJECTED);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
return NULL;
}
if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
!rx->securities) {
- rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
- sec->no_key_abort, -EKEYREJECTED);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
return NULL;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c5e3673aadbe..d7c767b861a4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -336,17 +336,22 @@ out:
return q;
}
-static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
+ struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
- if (cops == NULL)
- return NULL;
+ if (cops == NULL) {
+ NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
cl = cops->find(p, classid);
- if (cl == 0)
- return NULL;
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
+ return ERR_PTR(-ENOENT);
+ }
return cops->leaf(p, cl);
}
@@ -596,16 +601,6 @@ out:
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
-{
- if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
- pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
- txt, qdisc->ops->id, qdisc->handle >> 16);
- qdisc->flags |= TCQ_F_WARN_NONWC;
- }
-}
-EXPORT_SYMBOL(qdisc_warn_nonwc);
-
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -780,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
@@ -797,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -816,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
@@ -1499,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1510,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
+ if (IS_ERR(q))
+ return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@@ -1611,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 2c069f0181c6..037f764822b9 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
for (i = q->nbands; i < oldbands; i++) {
if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
list_del_init(&q->classes[i].alist);
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
+ qdisc_purge_queue(q->classes[i].qdisc);
}
WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5a7745170e84..d8fd35da32a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
}
}
-static unsigned int
-qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- unsigned int len;
-
- skb = sch->ops->peek(sch);
- if (unlikely(skb == NULL)) {
- qdisc_warn_nonwc("qdisc_peek_len", sch);
- return 0;
- }
- len = qdisc_pkt_len(skb);
-
- return len;
-}
-
static void
hfsc_adjust_levels(struct hfsc_class *cl)
{
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 14bf71f57057..c968ea763774 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -821,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_ON(!hprio->row.rb_node);
+ if (unlikely(!hprio->row.rb_node))
+ return NULL;
+
sp->root = hprio->row.rb_node;
sp->pptr = &hprio->ptr;
sp->pid = &hprio->last_ptr_id;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index cc30f7a32f1a..9e2b9a490db2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i = q->bands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->queues[i]);
+ qdisc_purge_queue(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bf1282cb22eb..2255355e51d3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -412,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
bool existing = false;
struct nlattr *tb[TCA_QFQ_MAX + 1];
struct qfq_aggregate *new_agg = NULL;
- u32 weight, lmax, inv_w;
+ u32 weight, lmax, inv_w, old_weight, old_lmax;
int err;
int delta_w;
@@ -443,12 +443,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
- if (cl != NULL &&
- lmax == cl->agg->lmax &&
- weight == cl->agg->class_weight)
- return 0; /* nothing to change */
+ if (cl != NULL) {
+ sch_tree_lock(sch);
+ old_weight = cl->agg->class_weight;
+ old_lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (lmax == old_lmax && weight == old_weight)
+ return 0; /* nothing to change */
+ }
- delta_w = weight - (cl ? cl->agg->class_weight : 0);
+ delta_w = weight - (cl ? old_weight : 0);
if (q->wsum + delta_w > QFQ_MAX_WSUM) {
NL_SET_ERR_MSG_FMT_MOD(extack,
@@ -532,9 +536,6 @@ destroy_class:
static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
{
- struct qfq_sched *q = qdisc_priv(sch);
-
- qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
qdisc_put(cl->qdisc);
kfree(cl);
@@ -555,6 +556,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
+ qfq_rm_from_agg(q, cl);
sch_tree_unlock(sch);
@@ -625,6 +627,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
{
struct qfq_class *cl = (struct qfq_class *)arg;
struct nlattr *nest;
+ u32 class_weight, lmax;
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
@@ -633,8 +636,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
- nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
+
+ sch_tree_lock(sch);
+ class_weight = cl->agg->class_weight;
+ lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) ||
+ nla_put_u32(skb, TCA_QFQ_LMAX, lmax))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -651,8 +659,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
memset(&xstats, 0, sizeof(xstats));
+ sch_tree_lock(sch);
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -989,7 +999,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
list_del_init(&cl->alist);
- else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
+ else if (cl->deficit < qdisc_peek_len(cl->qdisc)) {
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
@@ -1491,6 +1501,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
common.hnode) {
+ qfq_rm_from_agg(q, cl);
qfq_destroy_class(sch, cl);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 339d70b4a4c5..479c42d11083 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a8081492d671..96eb2f122973 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -310,7 +310,10 @@ drop:
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x];
- q->tail->next = slot->next;
+ if (slot->next == x)
+ q->tail = NULL; /* no more active slots */
+ else
+ q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
goto drop;
}
@@ -653,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -669,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 14021b812329..2b14c81a87e5 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1328,13 +1328,15 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
stab = rtnl_dereference(q->root->stab);
- oper = rtnl_dereference(q->oper_sched);
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
if (oper)
taprio_update_queue_max_sdu(q, oper, stab);
- admin = rtnl_dereference(q->admin_sched);
+ admin = rcu_dereference(q->admin_sched);
if (admin)
taprio_update_queue_max_sdu(q, admin, stab);
+ rcu_read_unlock();
break;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dc26b22d53c7..4c977f049670 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old = q->qdisc;
q->qdisc = child;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 3760131f1484..1882bab8e00e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -30,6 +30,10 @@
#include <linux/splice.h>
#include <net/sock.h>
+#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#endif
#include <net/tcp.h>
#include <net/smc.h>
#include <asm/ioctls.h>
@@ -360,6 +364,16 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet_sock_destruct(sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ inet6_sock_destruct(sk);
+ break;
+#endif
+ }
}
static struct lock_class_key smc_key;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 78ae10d06ed2..2c9084963739 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -283,10 +283,10 @@ struct smc_connection {
};
struct smc_sock { /* smc sock container */
- struct sock sk;
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6_pinfo *pinet6;
-#endif
+ union {
+ struct sock sk;
+ struct inet_sock icsk_inet;
+ };
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_state_change)(struct sock *sk);
/* original stat_change fct. */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 0fa244f16876..5c095cb8cb20 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -887,25 +887,16 @@ static void gss_pipe_dentry_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *gss_pipe = pdo->pdo_data;
- struct rpc_pipe *pipe = gss_pipe->pipe;
- if (pipe->dentry != NULL) {
- rpc_unlink(pipe->dentry);
- pipe->dentry = NULL;
- }
+ rpc_unlink(gss_pipe->pipe);
}
static int gss_pipe_dentry_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *p = pdo->pdo_data;
- struct dentry *dentry;
- dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- p->pipe->dentry = dentry;
- return 0;
+ return rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
}
static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
@@ -1724,7 +1715,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len);
/* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */
while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count))
- maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len);
+ maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat)
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73a90ad873fb..e82212f6b562 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1628,7 +1628,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- rqstp->rq_auth_stat = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_failed;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
if (!svcdata)
@@ -1638,6 +1638,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
svcdata->rsci = NULL;
gc = &svcdata->clcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart))
goto auth_err;
if (gc->gc_v != RPC_GSS_VERSION)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 21426c3049d3..8ca354ecfd02 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -112,47 +112,46 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
}
}
-static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
+static int rpc_setup_pipedir_sb(struct super_block *sb,
struct rpc_clnt *clnt)
{
static uint32_t clntid;
const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, dir_name);
if (dir == NULL) {
pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
- return dir;
+ return -ENOENT;
}
for (;;) {
snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
name[sizeof(name) - 1] = '\0';
- dentry = rpc_create_client_dir(dir, name, clnt);
- if (!IS_ERR(dentry))
+ err = rpc_create_client_dir(dir, name, clnt);
+ if (!err)
break;
- if (dentry == ERR_PTR(-EEXIST))
+ if (err == -EEXIST)
continue;
printk(KERN_INFO "RPC: Couldn't create pipefs entry"
- " %s/%s, error %ld\n",
- dir_name, name, PTR_ERR(dentry));
+ " %s/%s, error %d\n",
+ dir_name, name, err);
break;
}
dput(dir);
- return dentry;
+ return err;
}
static int
rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
- struct dentry *dentry;
-
clnt->pipefs_sb = pipefs_sb;
if (clnt->cl_program->pipe_dir_name != NULL) {
- dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ int err = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+ if (err && err != -ENOENT)
+ return err;
}
return 0;
}
@@ -180,16 +179,9 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
- struct dentry *dentry;
-
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = rpc_setup_pipedir_sb(sb, clnt);
- if (!dentry)
- return -ENOENT;
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- break;
+ return rpc_setup_pipedir_sb(sb, clnt);
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 98f78cd55905..0bd1df2ebb47 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -168,8 +168,9 @@ rpc_inode_setowner(struct inode *inode, void *private)
}
static void
-rpc_close_pipes(struct inode *inode)
+rpc_close_pipes(struct dentry *dentry)
{
+ struct inode *inode = dentry->d_inode;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
int need_release;
LIST_HEAD(free_list);
@@ -484,60 +485,6 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return inode;
}
-static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- struct inode *inode;
-
- d_drop(dentry);
- inode = rpc_get_inode(dir->i_sb, mode);
- if (!inode)
- goto out_err;
- inode->i_ino = iunique(dir->i_sb, 100);
- if (i_fop)
- inode->i_fop = i_fop;
- if (private)
- rpc_inode_setowner(inode, private);
- d_add(dentry, inode);
- return 0;
-out_err:
- printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
- __FILE__, __func__, dentry);
- dput(dentry);
- return -ENOMEM;
-}
-
-static int __rpc_create(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private);
- if (err)
- return err;
- fsnotify_create(dir, dentry);
- return 0;
-}
-
-static int __rpc_mkdir(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private);
- if (err)
- return err;
- inc_nlink(dir);
- fsnotify_mkdir(dir, dentry);
- return 0;
-}
-
static void
init_pipe(struct rpc_pipe *pipe)
{
@@ -574,119 +521,60 @@ struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags)
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_data);
-static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private,
- struct rpc_pipe *pipe)
+static int rpc_new_file(struct dentry *parent,
+ const char *name,
+ umode_t mode,
+ const struct file_operations *i_fop,
+ void *private)
{
- struct rpc_inode *rpci;
- int err;
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
- err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
- if (err)
- return err;
- rpci = RPC_I(d_inode(dentry));
- rpci->private = private;
- rpci->pipe = pipe;
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ inode = rpc_get_inode(dir->i_sb, S_IFREG | mode);
+ if (unlikely(!inode)) {
+ dput(dentry);
+ inode_unlock(dir);
+ return -ENOMEM;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ if (i_fop)
+ inode->i_fop = i_fop;
+ rpc_inode_setowner(inode, private);
+ d_instantiate(dentry, inode);
fsnotify_create(dir, dentry);
+ inode_unlock(dir);
return 0;
}
-static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
-{
- int ret;
-
- dget(dentry);
- ret = simple_rmdir(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_rmdir(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
-{
- int ret;
-
- dget(dentry);
- ret = simple_unlink(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_unlink(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
+static struct dentry *rpc_new_dir(struct dentry *parent,
+ const char *name,
+ umode_t mode)
{
- struct inode *inode = d_inode(dentry);
-
- rpc_close_pipes(inode);
- return __rpc_unlink(dir, dentry);
-}
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
-static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
- const char *name)
-{
- struct qstr q = QSTR(name);
- struct dentry *dentry = try_lookup_noperm(&q, parent);
- if (!dentry) {
- dentry = d_alloc(parent, &q);
- if (!dentry)
- return ERR_PTR(-ENOMEM);
- }
- if (d_really_is_negative(dentry))
+ if (IS_ERR(dentry))
return dentry;
- dput(dentry);
- return ERR_PTR(-EEXIST);
-}
-/*
- * FIXME: This probably has races.
- */
-static void __rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
- struct dentry *dentry;
- struct qstr name;
- int i;
-
- for (i = start; i < eof; i++) {
- name.name = files[i].name;
- name.len = strlen(files[i].name);
- dentry = try_lookup_noperm(&name, parent);
-
- if (dentry == NULL)
- continue;
- if (d_really_is_negative(dentry))
- goto next;
- switch (d_inode(dentry)->i_mode & S_IFMT) {
- default:
- BUG();
- case S_IFREG:
- __rpc_unlink(dir, dentry);
- break;
- case S_IFDIR:
- __rpc_rmdir(dir, dentry);
- }
-next:
+ inode = rpc_get_inode(dir->i_sb, S_IFDIR | mode);
+ if (unlikely(!inode)) {
dput(dentry);
+ inode_unlock(dir);
+ return ERR_PTR(-ENOMEM);
}
-}
-static void rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
-
- inode_lock_nested(dir, I_MUTEX_CHILD);
- __rpc_depopulate(parent, files, start, eof);
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inc_nlink(dir);
+ d_instantiate(dentry, inode);
+ fsnotify_mkdir(dir, dentry);
inode_unlock(dir);
+
+ return dentry;
}
static int rpc_populate(struct dentry *parent,
@@ -694,92 +582,39 @@ static int rpc_populate(struct dentry *parent,
int start, int eof,
void *private)
{
- struct inode *dir = d_inode(parent);
struct dentry *dentry;
int i, err;
- inode_lock(dir);
for (i = start; i < eof; i++) {
- dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_bad;
switch (files[i].mode & S_IFMT) {
default:
BUG();
case S_IFREG:
- err = __rpc_create(dir, dentry,
+ err = rpc_new_file(parent,
+ files[i].name,
files[i].mode,
files[i].i_fop,
private);
+ if (err)
+ goto out_bad;
break;
case S_IFDIR:
- err = __rpc_mkdir(dir, dentry,
- files[i].mode,
- NULL,
- private);
+ dentry = rpc_new_dir(parent,
+ files[i].name,
+ files[i].mode);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_bad;
+ }
}
- if (err != 0)
- goto out_bad;
}
- inode_unlock(dir);
return 0;
out_bad:
- __rpc_depopulate(parent, files, start, eof);
- inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
}
-static struct dentry *rpc_mkdir_populate(struct dentry *parent,
- const char *name, umode_t mode, void *private,
- int (*populate)(struct dentry *, void *), void *args_populate)
-{
- struct dentry *dentry;
- struct inode *dir = d_inode(parent);
- int error;
-
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- error = __rpc_mkdir(dir, dentry, mode, NULL, private);
- if (error != 0)
- goto out_err;
- if (populate != NULL) {
- error = populate(dentry, args_populate);
- if (error)
- goto err_rmdir;
- }
-out:
- inode_unlock(dir);
- return dentry;
-err_rmdir:
- __rpc_rmdir(dir, dentry);
-out_err:
- dentry = ERR_PTR(error);
- goto out;
-}
-
-static int rpc_rmdir_depopulate(struct dentry *dentry,
- void (*depopulate)(struct dentry *))
-{
- struct dentry *parent;
- struct inode *dir;
- int error;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- if (depopulate != NULL)
- depopulate(dentry);
- error = __rpc_rmdir(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
-}
-
/**
* rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace
* communication
@@ -799,11 +634,13 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
* The @private argument passed here will be available to all these methods
* from the file pointer, via RPC_I(file_inode(file))->private.
*/
-struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
+int rpc_mkpipe_dentry(struct dentry *parent, const char *name,
void *private, struct rpc_pipe *pipe)
{
- struct dentry *dentry;
struct inode *dir = d_inode(parent);
+ struct dentry *dentry;
+ struct inode *inode;
+ struct rpc_inode *rpci;
umode_t umode = S_IFIFO | 0600;
int err;
@@ -812,48 +649,53 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~0222;
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
- private, pipe);
- if (err)
- goto out_err;
-out:
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto failed;
+ }
+
+ inode = rpc_get_inode(dir->i_sb, umode);
+ if (unlikely(!inode)) {
+ dput(dentry);
+ inode_unlock(dir);
+ err = -ENOMEM;
+ goto failed;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inode->i_fop = &rpc_pipe_fops;
+ rpci = RPC_I(inode);
+ rpci->private = private;
+ rpci->pipe = pipe;
+ rpc_inode_setowner(inode, private);
+ d_instantiate(dentry, inode);
+ pipe->dentry = dentry;
+ fsnotify_create(dir, dentry);
inode_unlock(dir);
- return dentry;
-out_err:
- dentry = ERR_PTR(err);
- printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
- __FILE__, __func__, parent, name,
- err);
- goto out;
+ return 0;
+
+failed:
+ pr_warn("%s() failed to create pipe %pd/%s (errno = %d)\n",
+ __func__, parent, name, err);
+ return err;
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry);
/**
* rpc_unlink - remove a pipe
- * @dentry: dentry for the pipe, as returned from rpc_mkpipe
+ * @pipe: the pipe to be removed
*
* After this call, lookups will no longer find the pipe, and any
* attempts to read or write using preexisting opens of the pipe will
* return -EPIPE.
*/
-int
-rpc_unlink(struct dentry *dentry)
+void
+rpc_unlink(struct rpc_pipe *pipe)
{
- struct dentry *parent;
- struct inode *dir;
- int error = 0;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- error = __rpc_rmpipe(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
+ if (pipe->dentry) {
+ simple_recursive_removal(pipe->dentry, rpc_close_pipes);
+ pipe->dentry = NULL;
+ }
}
EXPORT_SYMBOL_GPL(rpc_unlink);
@@ -1010,31 +852,6 @@ rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
pdo->pdo_ops->destroy(dir, pdo);
}
-enum {
- RPCAUTH_info,
- RPCAUTH_EOF
-};
-
-static const struct rpc_filelist authfiles[] = {
- [RPCAUTH_info] = {
- .name = "info",
- .i_fop = &rpc_info_operations,
- .mode = S_IFREG | 0400,
- },
-};
-
-static int rpc_clntdir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- authfiles, RPCAUTH_info, RPCAUTH_EOF,
- private);
-}
-
-static void rpc_clntdir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF);
-}
-
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
* @dentry: the parent of new directory
@@ -1046,19 +863,27 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
* information about the client, together with any "pipes" that may
* later be created using rpc_mkpipe().
*/
-struct dentry *rpc_create_client_dir(struct dentry *dentry,
- const char *name,
- struct rpc_clnt *rpc_client)
+int rpc_create_client_dir(struct dentry *dentry,
+ const char *name,
+ struct rpc_clnt *rpc_client)
{
struct dentry *ret;
+ int err;
- ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
- rpc_clntdir_populate, rpc_client);
- if (!IS_ERR(ret)) {
- rpc_client->cl_pipedir_objects.pdh_dentry = ret;
- rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ ret = rpc_new_dir(dentry, name, 0555);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+ err = rpc_new_file(ret, "info", S_IFREG | 0400,
+ &rpc_info_operations, rpc_client);
+ if (err) {
+ pr_warn("%s failed to populate directory %pd\n",
+ __func__, ret);
+ simple_recursive_removal(ret, NULL);
+ return err;
}
- return ret;
+ rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+ rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ return 0;
}
/**
@@ -1073,7 +898,8 @@ int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
return 0;
rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
- return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
+ simple_recursive_removal(dentry, NULL);
+ return 0;
}
static const struct rpc_filelist cache_pipefs_files[3] = {
@@ -1094,28 +920,25 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
},
};
-static int rpc_cachedir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- cache_pipefs_files, 0, 3,
- private);
-}
-
-static void rpc_cachedir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
-}
-
struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name,
umode_t umode, struct cache_detail *cd)
{
- return rpc_mkdir_populate(parent, name, umode, NULL,
- rpc_cachedir_populate, cd);
+ struct dentry *dentry;
+
+ dentry = rpc_new_dir(parent, name, umode);
+ if (!IS_ERR(dentry)) {
+ int error = rpc_populate(dentry, cache_pipefs_files, 0, 3, cd);
+ if (error) {
+ simple_recursive_removal(dentry, NULL);
+ return ERR_PTR(error);
+ }
+ }
+ return dentry;
}
void rpc_remove_cache_dir(struct dentry *dentry)
{
- rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate);
+ simple_recursive_removal(dentry, NULL);
}
/*
@@ -1141,7 +964,6 @@ enum {
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
RPCAUTH_nfsd,
- RPCAUTH_gssd,
RPCAUTH_RootEOF
};
@@ -1178,10 +1000,6 @@ static const struct rpc_filelist files[] = {
.name = "nfsd",
.mode = S_IFDIR | 0555,
},
- [RPCAUTH_gssd] = {
- .name = "gssd",
- .mode = S_IFDIR | 0555,
- },
};
/*
@@ -1241,13 +1059,6 @@ void rpc_put_sb_net(const struct net *net)
}
EXPORT_SYMBOL_GPL(rpc_put_sb_net);
-static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
- [0] = {
- .name = "clntXX",
- .mode = S_IFDIR | 0555,
- },
-};
-
static ssize_t
dummy_downcall(struct file *filp, const char __user *src, size_t len)
{
@@ -1276,14 +1087,6 @@ rpc_dummy_info_show(struct seq_file *m, void *v)
}
DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
-static const struct rpc_filelist gssd_dummy_info_file[] = {
- [0] = {
- .name = "info",
- .i_fop = &rpc_dummy_info_fops,
- .mode = S_IFREG | 0400,
- },
-};
-
/**
* rpc_gssd_dummy_populate - create a dummy gssd pipe
* @root: root of the rpc_pipefs filesystem
@@ -1292,69 +1095,32 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
* Create a dummy set of directories and a pipe that gssd can hold open to
* indicate that it is up and running.
*/
-static struct dentry *
+static int
rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
{
- int ret = 0;
- struct dentry *gssd_dentry;
- struct dentry *clnt_dentry = NULL;
- struct dentry *pipe_dentry = NULL;
-
- /* We should never get this far if "gssd" doesn't exist */
- gssd_dentry = try_lookup_noperm(&QSTR(files[RPCAUTH_gssd].name), root);
- if (!gssd_dentry)
- return ERR_PTR(-ENOENT);
-
- ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL);
- if (ret) {
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
+ struct dentry *gssd_dentry, *clnt_dentry;
+ int err;
- clnt_dentry = try_lookup_noperm(&QSTR(gssd_dummy_clnt_dir[0].name),
- gssd_dentry);
- if (!clnt_dentry) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(-ENOENT);
- goto out;
- }
+ gssd_dentry = rpc_new_dir(root, "gssd", 0555);
+ if (IS_ERR(gssd_dentry))
+ return -ENOENT;
- ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL);
- if (ret) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
+ clnt_dentry = rpc_new_dir(gssd_dentry, "clntXX", 0555);
+ if (IS_ERR(clnt_dentry))
+ return -ENOENT;
- pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
- if (IS_ERR(pipe_dentry)) {
- __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- }
-out:
- dput(clnt_dentry);
- dput(gssd_dentry);
- return pipe_dentry;
-}
-
-static void
-rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
-{
- struct dentry *clnt_dir = pipe_dentry->d_parent;
- struct dentry *gssd_dir = clnt_dir->d_parent;
-
- dget(pipe_dentry);
- __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
- __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
- dput(pipe_dentry);
+ err = rpc_new_file(clnt_dentry, "info", 0400,
+ &rpc_dummy_info_fops, NULL);
+ if (!err)
+ err = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
+ return err;
}
static int
rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
- struct dentry *root, *gssd_dentry;
+ struct dentry *root;
struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1363,7 +1129,7 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RPCAUTH_GSSMAGIC;
sb->s_op = &s_ops;
- sb->s_d_op = &simple_dentry_operations;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_time_gran = 1;
inode = rpc_get_inode(sb, S_IFDIR | 0555);
@@ -1373,11 +1139,9 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
- gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
- if (IS_ERR(gssd_dentry)) {
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
- return PTR_ERR(gssd_dentry);
- }
+ err = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
+ if (err)
+ return err;
dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
net->ns.inum, NET_NAME(net));
@@ -1386,18 +1150,6 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
sb);
- if (err)
- goto err_depopulate;
- mutex_unlock(&sn->pipefs_sb_lock);
- return 0;
-
-err_depopulate:
- rpc_gssd_dummy_depopulate(gssd_dentry);
- blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
- RPC_PIPEFS_UMOUNT,
- sb);
- sn->pipefs_sb = NULL;
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
mutex_unlock(&sn->pipefs_sb_lock);
return err;
}
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1b2b84feeec6..4e92e2a50168 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -27,97 +27,60 @@
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
+ bool need_checksum;
size_t count;
__wsum csum;
};
-typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
- size_t len);
-
/**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
- * Possibly called several times to iterate over an sk_buff and copy
- * data out of it.
+ * Possibly called several times to iterate over an sk_buff and copy data out of
+ * it.
*/
static size_t
xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
- if (len > desc->count)
- len = desc->count;
- if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
- return 0;
- desc->count -= len;
- desc->offset += len;
- return len;
-}
+ len = min(len, desc->count);
+
+ if (desc->need_checksum) {
+ __wsum csum;
+
+ csum = skb_copy_and_csum_bits(desc->skb, desc->offset, to, len);
+ desc->csum = csum_block_add(desc->csum, csum, desc->offset);
+ } else {
+ if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
+ return 0;
+ }
-/**
- * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
- * @desc: sk_buff copy helper
- * @to: copy destination
- * @len: number of bytes to copy
- *
- * Same as skb_read_bits, but calculate a checksum at the same time.
- */
-static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
-{
- unsigned int pos;
- __wsum csum2;
-
- if (len > desc->count)
- len = desc->count;
- pos = desc->offset;
- csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
- desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
-/**
- * xdr_partial_copy_from_skb - copy data out of an skb
- * @xdr: target XDR buffer
- * @base: starting offset
- * @desc: sk_buff copy helper
- * @copy_actor: virtual method for copying data
- *
- */
static ssize_t
-xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, struct xdr_skb_reader *desc)
{
- struct page **ppage = xdr->pages;
- unsigned int len, pglen = xdr->page_len;
- ssize_t copied = 0;
- size_t ret;
-
- len = xdr->head[0].iov_len;
- if (base < len) {
- len -= base;
- ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
- copied += ret;
- if (ret != len || !desc->count)
- goto out;
- base = 0;
- } else
- base -= len;
-
- if (unlikely(pglen == 0))
- goto copy_tail;
- if (unlikely(base >= pglen)) {
- base -= pglen;
- goto copy_tail;
- }
- if (base || xdr->page_base) {
- pglen -= base;
- base += xdr->page_base;
- ppage += base >> PAGE_SHIFT;
- base &= ~PAGE_MASK;
- }
- do {
+ struct page **ppage = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ unsigned int poff = xdr->page_base & ~PAGE_MASK;
+ unsigned int pglen = xdr->page_len;
+ ssize_t copied = 0;
+ size_t ret;
+
+ if (xdr->head[0].iov_len == 0)
+ return 0;
+
+ ret = xdr_skb_read_bits(desc, xdr->head[0].iov_base,
+ xdr->head[0].iov_len);
+ if (ret != xdr->head[0].iov_len || !desc->count)
+ return ret;
+ copied += ret;
+
+ while (pglen) {
+ unsigned int len = min(PAGE_SIZE - poff, pglen);
char *kaddr;
/* ACL likes to be lazy in allocating pages - ACLs
@@ -126,36 +89,29 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
- copied = -ENOMEM;
- goto out;
+ return -ENOMEM;
+ return copied;
}
}
- len = PAGE_SIZE;
kaddr = kmap_atomic(*ppage);
- if (base) {
- len -= base;
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr + base, len);
- base = 0;
- } else {
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr, len);
- }
+ ret = xdr_skb_read_bits(desc, kaddr + poff, len);
flush_dcache_page(*ppage);
kunmap_atomic(kaddr);
+
copied += ret;
if (ret != len || !desc->count)
- goto out;
+ return copied;
ppage++;
- } while ((pglen -= len) != 0);
-copy_tail:
- len = xdr->tail[0].iov_len;
- if (base < len)
- copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
-out:
+ pglen -= len;
+ poff = 0;
+ }
+
+ if (xdr->tail[0].iov_len) {
+ copied += xdr_skb_read_bits(desc, xdr->tail[0].iov_base,
+ xdr->tail[0].iov_len);
+ }
+
return copied;
}
@@ -169,17 +125,22 @@ out:
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
- struct xdr_skb_reader desc;
-
- desc.skb = skb;
- desc.offset = 0;
- desc.count = skb->len - desc.offset;
+ struct xdr_skb_reader desc = {
+ .skb = skb,
+ .count = skb->len - desc.offset,
+ };
- if (skb_csum_unnecessary(skb))
- goto no_checksum;
+ if (skb_csum_unnecessary(skb)) {
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
+ return -1;
+ if (desc.count)
+ return -1;
+ return 0;
+ }
+ desc.need_checksum = true;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
return -1;
if (desc.offset != skb->len) {
__wsum csum2;
@@ -194,14 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev, skb);
return 0;
-no_checksum:
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
}
-EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
size_t seek)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 939b6239df8a..b1fab3a69544 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -638,8 +638,6 @@ EXPORT_SYMBOL_GPL(svc_destroy);
static bool
svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
{
- unsigned long ret;
-
rqstp->rq_maxpages = svc_serv_maxpages(serv);
/* rq_pages' last entry is NULL for historical reasons. */
@@ -649,9 +647,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
if (!rqstp->rq_pages)
return false;
- ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages,
- rqstp->rq_pages);
- return ret == rqstp->rq_maxpages;
+ return true;
}
/*
@@ -755,14 +751,16 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool)
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
if (!task_is_running(rqstp->rq_task)) {
wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
+ trace_svc_pool_thread_wake(pool, rqstp->rq_task->pid);
percpu_counter_inc(&pool->sp_threads_woken);
+ } else {
+ trace_svc_pool_thread_running(pool, rqstp->rq_task->pid);
}
rcu_read_unlock();
return;
}
rcu_read_unlock();
-
+ trace_svc_pool_thread_noidle(pool, 0);
}
EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
@@ -1336,6 +1334,9 @@ svc_process_common(struct svc_rqst *rqstp)
int pr, rc;
__be32 *p;
+ /* Reset the accept_stat for the RPC */
+ rqstp->rq_accept_statp = NULL;
+
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
@@ -1375,9 +1376,8 @@ svc_process_common(struct svc_rqst *rqstp)
case SVC_OK:
break;
case SVC_GARBAGE:
- goto err_garbage_args;
- case SVC_SYSERR:
- goto err_system_err;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ goto err_bad_auth;
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
@@ -1388,7 +1388,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto sendit;
default:
pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
- goto err_system_err;
+ rqstp->rq_auth_stat = rpc_autherr_failed;
+ goto err_bad_auth;
}
if (progp == NULL)
@@ -1515,20 +1516,6 @@ err_bad_proc:
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
-
-err_garbage_args:
- svc_printk(rqstp, "failed to decode RPC header\n");
-
- if (serv->sv_stats)
- serv->sv_stats->rpcbadfmt++;
- *rqstp->rq_accept_statp = rpc_garbage_args;
- goto sendit;
-
-err_system_err:
- if (serv->sv_stats)
- serv->sv_stats->rpcbadfmt++;
- *rqstp->rq_accept_statp = rpc_system_err;
- goto sendit;
}
/*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e1c85123b445..46c156b121db 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1197,7 +1197,7 @@ err_noclose:
* that the pages backing @xdr are unchanging.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
- rpc_fraghdr marker, unsigned int *sentp)
+ rpc_fraghdr marker, int *sentp)
{
struct msghdr msg = {
.msg_flags = MSG_SPLICE_PAGES,
@@ -1247,8 +1247,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- unsigned int sent;
- int err;
+ int sent, err;
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2ea00e354ba6..1346fdf33835 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -993,21 +993,18 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer into which to encode data
- * @pages: list of pages to decode into
- * @rqst: pointer to controlling rpc_rqst, for debugging
*
*/
-void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
- struct page **pages, struct rpc_rqst *rqst)
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf)
{
xdr_reset_scratch_buffer(xdr);
xdr->buf = buf;
- xdr->page_ptr = pages;
+ xdr->page_ptr = buf->pages;
xdr->iov = NULL;
- xdr->p = page_address(*pages);
+ xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
- xdr->rqst = rqst;
+ xdr->rqst = NULL;
}
EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 8ee0c07d00e9..ffe577bf6b51 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net)
for (id = 0; srv->idr_in_use; id++) {
con = idr_find(&srv->conn_idr, id);
if (con) {
+ conn_get(con);
spin_unlock_bh(&srv->idr_lock);
tipc_conn_close(con);
+ conn_put(con);
spin_lock_bh(&srv->idr_lock);
}
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 108a4cc2e001..258d6aa4f21a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = tipc_bearer_find(net, bname);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
@@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = rtnl_dereference(tn->bearer_list[bid]);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 65b0da6fdf6a..095cf31bae0b 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -512,9 +512,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
if (inq < strp->stm.full_len)
return tls_strp_read_copy(strp, true);
+ tls_strp_load_anchor_with_queue(strp, inq);
if (!strp->stm.full_len) {
- tls_strp_load_anchor_with_queue(strp, inq);
-
sz = tls_rx_msg_size(strp, strp->anchor);
if (sz < 0) {
tls_strp_abort_strp(strp, sz);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2e2e9997a68e..52b155123985 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -660,6 +660,11 @@ static void unix_sock_destructor(struct sock *sk)
#endif
}
+static unsigned int unix_skb_len(const struct sk_buff *skb)
+{
+ return skb->len - UNIXCB(skb).consumed;
+}
+
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
@@ -694,10 +699,16 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb && !unix_skb_len(skb))
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+#endif
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
+ if (skb || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
@@ -1971,7 +1982,8 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
if (UNIXCB(skb).pid)
return;
- if (unix_may_passcred(sk) || unix_may_passcred(other)) {
+ if (unix_may_passcred(sk) || unix_may_passcred(other) ||
+ !other->sk_socket) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
@@ -2660,11 +2672,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
return timeo;
}
-static unsigned int unix_skb_len(const struct sk_buff *skb)
-{
- return skb->len - UNIXCB(skb).consumed;
-}
-
struct unix_stream_read_state {
int (*recv_actor)(struct sk_buff *, int, int,
struct unix_stream_read_state *);
@@ -2679,11 +2686,11 @@ struct unix_stream_read_state {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
static int unix_stream_recv_urg(struct unix_stream_read_state *state)
{
+ struct sk_buff *oob_skb, *read_skb = NULL;
struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
int chunk = 1;
- struct sk_buff *oob_skb;
mutex_lock(&u->iolock);
unix_state_lock(sk);
@@ -2698,9 +2705,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK))
+ if (!(state->flags & MSG_PEEK)) {
WRITE_ONCE(u->oob_skb, NULL);
+ if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
+ !unix_skb_len(oob_skb->prev)) {
+ read_skb = oob_skb->prev;
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
+ }
+ }
+
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2711,6 +2725,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_unlock(&u->iolock);
+ consume_skb(read_skb);
+
if (chunk < 0)
return -EFAULT;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2e7a3034e965..1053662725f8 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
static bool vsock_use_local_transport(unsigned int remote_cid)
{
+ lockdep_assert_held(&vsock_register_mutex);
+
if (!transport_local)
return false;
@@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
remote_flags = vsk->remote_addr.svm_flags;
+ mutex_lock(&vsock_register_mutex);
+
switch (sk->sk_type) {
case SOCK_DGRAM:
new_transport = transport_dgram;
@@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_h2g;
break;
default:
- return -ESOCKTNOSUPPORT;
+ ret = -ESOCKTNOSUPPORT;
+ goto err;
}
if (vsk->transport) {
- if (vsk->transport == new_transport)
- return 0;
+ if (vsk->transport == new_transport) {
+ ret = 0;
+ goto err;
+ }
/* transport->release() must be called with sock lock acquired.
* This path can only be taken during vsock_connect(), where we
@@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
/* We increase the module refcnt to prevent the transport unloading
* while there are open sockets assigned to it.
*/
- if (!new_transport || !try_module_get(new_transport->module))
- return -ENODEV;
+ if (!new_transport || !try_module_get(new_transport->module)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ /* It's safe to release the mutex after a successful try_module_get().
+ * Whichever transport `new_transport` points at, it won't go away until
+ * the last module_put() below or in vsock_deassign_transport().
+ */
+ mutex_unlock(&vsock_register_mutex);
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
@@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->transport = new_transport;
return 0;
+err:
+ mutex_unlock(&vsock_register_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+/*
+ * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks.
+ * Otherwise we may race with module removal. Do not use on `vsk->transport`.
+ */
+static u32 vsock_registered_transport_cid(const struct vsock_transport **transport)
+{
+ u32 cid = VMADDR_CID_ANY;
+
+ mutex_lock(&vsock_register_mutex);
+ if (*transport)
+ cid = (*transport)->get_local_cid();
+ mutex_unlock(&vsock_register_mutex);
+
+ return cid;
+}
+
bool vsock_find_cid(unsigned int cid)
{
- if (transport_g2h && cid == transport_g2h->get_local_cid())
+ if (cid == vsock_registered_transport_cid(&transport_g2h))
return true;
if (transport_h2g && cid == VMADDR_CID_HOST)
@@ -2536,18 +2570,19 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
- u32 cid = VMADDR_CID_ANY;
int retval = 0;
+ u32 cid;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
/* To be compatible with the VMCI behavior, we prioritize the
* guest CID instead of well-know host CID (VMADDR_CID_HOST).
*/
- if (transport_g2h)
- cid = transport_g2h->get_local_cid();
- else if (transport_h2g)
- cid = transport_h2g->get_local_cid();
+ cid = vsock_registered_transport_cid(&transport_g2h);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_h2g);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_local);
if (put_user(cid, p) != 0)
retval = -EFAULT;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b370070194fa..7eccd6708d66 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
u16 proto,
struct vmci_handle handle)
{
+ memset(pkt, 0, sizeof(*pkt));
+
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
@@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
- memset(&pkt->proto, 0, sizeof(pkt->proto));
- memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fd5f79266471..50202d170f3a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr,
unsigned int len = nla_len(attr);
const struct element *elem;
const struct ieee80211_mgmt *mgmt = (void *)data;
+ const struct ieee80211_ext *ext;
unsigned int fixedlen, hdrlen;
bool s1g_bcn;
@@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr,
s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control);
if (s1g_bcn) {
- fixedlen = offsetof(struct ieee80211_ext,
- u.s1g_beacon.variable);
+ ext = (struct ieee80211_ext *)mgmt;
+ fixedlen =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ ieee80211_s1g_optional_len(ext->frame_control);
hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon);
} else {
fixedlen = offsetof(struct ieee80211_mgmt,
@@ -1583,7 +1586,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
return result;
error:
- kfree(result);
+ kfree_sensitive(result);
return ERR_PTR(err);
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ed868c0f7ca8..1ad5a6bdfd75 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr)
}
EXPORT_SYMBOL(ieee80211_is_valid_amsdu);
+
+/*
+ * Detects if an MSDU frame was maliciously converted into an A-MSDU
+ * frame by an adversary. This is done by parsing the received frame
+ * as if it were a regular MSDU, even though the A-MSDU flag is set.
+ *
+ * For non-mesh interfaces, detection involves checking whether the
+ * payload, when interpreted as an MSDU, begins with a valid RFC1042
+ * header. This is done by comparing the A-MSDU subheader's destination
+ * address to the start of the RFC1042 header.
+ *
+ * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field
+ * and an optional variable-length Mesh Address Extension field before
+ * the RFC1042 header. The position of the RFC1042 header must therefore
+ * be calculated based on the mesh header length.
+ *
+ * Since this function intentionally parses an A-MSDU frame as an MSDU,
+ * it only assumes that the A-MSDU subframe header is present, and
+ * beyond this it performs its own bounds checks under the assumption
+ * that the frame is instead parsed as a non-aggregated MSDU.
+ */
+static bool
+is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb,
+ enum nl80211_iftype iftype)
+{
+ int offset;
+
+ /* Non-mesh case can be directly compared */
+ if (iftype != NL80211_IFTYPE_MESH_POINT)
+ return ether_addr_equal(eth->h_dest, rfc1042_header);
+
+ offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]);
+ if (offset == 6) {
+ /* Mesh case with empty address extension field */
+ return ether_addr_equal(eth->h_source, rfc1042_header);
+ } else if (offset + ETH_ALEN <= skb->len) {
+ /* Mesh case with non-empty address extension field */
+ u8 temp[ETH_ALEN];
+
+ skb_copy_bits(skb, offset, temp, ETH_ALEN);
+ return ether_addr_equal(temp, rfc1042_header);
+ }
+
+ return false;
+}
+
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
@@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
/* the last MSDU has no padding */
if (subframe_len > remaining)
goto purge;
- /* mitigate A-MSDU aggregation injection attacks */
- if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header))
+ /* mitigate A-MSDU aggregation injection attacks, to be
+ * checked when processing first subframe (offset == 0).
+ */
+ if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype))
goto purge;
offset += sizeof(struct ethhdr);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 81fd486b5e56..d2819baea414 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -305,7 +305,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- xfrm_set_type_offload(x);
if (!x->type_offload) {
NL_SET_ERR_MSG(extack, "Type doesn't support offload");
dev_put(dev);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index cb1e12740c87..330a05286a56 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -875,7 +875,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
return -EINVAL;
}
- if (p.collect_md) {
+ if (p.collect_md || xi->p.collect_md) {
NL_SET_ERR_MSG(extack, "collect_md can't be changed");
return -EINVAL;
}
@@ -886,11 +886,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
} else {
if (xi->dev != dev)
return -EEXIST;
- if (xi->p.collect_md) {
- NL_SET_ERR_MSG(extack,
- "device can't be changed to collect_md");
- return -EINVAL;
- }
}
return xfrmi_update(xi, &p);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 907c3ccb440d..43fdc6ed8dd1 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -97,7 +97,7 @@ static int ipcomp_input_done2(struct sk_buff *skb, int err)
struct ip_comp_hdr *ipch = ip_comp_hdr(skb);
const int plen = skb->len;
- skb_reset_transport_header(skb);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
return ipcomp_post_acomp(skb, err, 0) ?:
skb->len < (plen + sizeof(ip_comp_hdr)) ? -EINVAL :
@@ -313,7 +313,6 @@ void ipcomp_destroy(struct xfrm_state *x)
struct ipcomp_data *ipcd = x->data;
if (!ipcd)
return;
- xfrm_state_delete_tunnel(x);
ipcomp_free_data(ipcd);
kfree(ipcd);
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 77cc418ad69e..97ff756191ba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -424,11 +424,10 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
-void xfrm_set_type_offload(struct xfrm_state *x)
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load)
{
const struct xfrm_type_offload *type = NULL;
struct xfrm_state_afinfo *afinfo;
- bool try_load = true;
retry:
afinfo = xfrm_state_get_afinfo(x->props.family);
@@ -593,7 +592,7 @@ void xfrm_state_free(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_free);
-static void ___xfrm_state_destroy(struct xfrm_state *x)
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
if (x->mode_cbs && x->mode_cbs->destroy_state)
x->mode_cbs->destroy_state(x);
@@ -607,6 +606,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
kfree(x->coaddr);
kfree(x->replay_esn);
kfree(x->preplay_esn);
+ xfrm_unset_type_offload(x);
if (x->type) {
x->type->destructor(x);
xfrm_put_type(x->type);
@@ -631,7 +631,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
- ___xfrm_state_destroy(x);
+ xfrm_state_gc_destroy(x);
}
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -780,8 +780,6 @@ void xfrm_dev_state_free(struct xfrm_state *x)
struct xfrm_dev_offload *xso = &x->xso;
struct net_device *dev = READ_ONCE(xso->dev);
- xfrm_unset_type_offload(x);
-
if (dev && dev->xfrmdev_ops) {
spin_lock_bh(&xfrm_state_dev_gc_lock);
if (!hlist_unhashed(&x->dev_gclist))
@@ -797,22 +795,18 @@ void xfrm_dev_state_free(struct xfrm_state *x)
}
#endif
-void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
+void __xfrm_state_destroy(struct xfrm_state *x)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
- if (sync) {
- synchronize_rcu();
- ___xfrm_state_destroy(x);
- } else {
- spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &xfrm_state_gc_list);
- spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&xfrm_state_gc_work);
- }
+ spin_lock_bh(&xfrm_state_gc_lock);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+ spin_unlock_bh(&xfrm_state_gc_lock);
+ schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
+static void xfrm_state_delete_tunnel(struct xfrm_state *x);
int __xfrm_state_delete(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -840,6 +834,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
xfrm_dev_state_delete(x);
+ xfrm_state_delete_tunnel(x);
+
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
* is what we are dropping here.
@@ -921,7 +917,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0, cnt = 0;
@@ -943,10 +939,7 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
- if (sync)
- xfrm_state_put_sync(x);
- else
- xfrm_state_put(x);
+ xfrm_state_put(x);
if (!err)
cnt++;
@@ -1307,14 +1300,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
const struct flowi *fl, unsigned short family,
struct xfrm_state **best, int *acq_in_progress,
- int *error)
+ int *error, unsigned int pcpu_id)
{
- /* We need the cpu id just as a lookup key,
- * we don't require it to be stable.
- */
- unsigned int pcpu_id = get_cpu();
- put_cpu();
-
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
* 2. Valid state with inappropriate selector. Skip.
@@ -1381,14 +1368,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
/* We need the cpu id just as a lookup key,
* we don't require it to be stable.
*/
- pcpu_id = get_cpu();
- put_cpu();
+ pcpu_id = raw_smp_processor_id();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
@@ -1400,7 +1388,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, encap_family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best)
@@ -1417,7 +1405,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
cached:
@@ -1429,8 +1417,6 @@ cached:
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
WARN_ON(1);
- xfrm_hash_ptrs_get(net, &state_ptrs);
-
h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1460,7 +1446,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best || acquire_in_progress)
goto found;
@@ -1495,7 +1481,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
found:
@@ -3077,20 +3063,17 @@ void xfrm_flush_gc(void)
}
EXPORT_SYMBOL(xfrm_flush_gc);
-/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
-void xfrm_state_delete_tunnel(struct xfrm_state *x)
+static void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
if (x->tunnel) {
struct xfrm_state *t = x->tunnel;
- if (atomic_read(&t->tunnel_users) == 2)
+ if (atomic_dec_return(&t->tunnel_users) == 1)
xfrm_state_delete(t);
- atomic_dec(&t->tunnel_users);
- xfrm_state_put_sync(t);
+ xfrm_state_put(t);
x->tunnel = NULL;
}
}
-EXPORT_SYMBOL(xfrm_state_delete_tunnel);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
@@ -3295,8 +3278,8 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&xfrm_state_gc_work);
- xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 59f258daf830..684239018bec 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -977,6 +977,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]);
/* configure the hardware if offload is requested */
if (attrs[XFRMA_OFFLOAD_DEV]) {
err = xfrm_dev_state_add(net, x,
@@ -2634,7 +2635,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
int err;
- err = xfrm_state_flush(net, p->proto, true, false);
+ err = xfrm_state_flush(net, p->proto, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
diff --git a/rust/Makefile b/rust/Makefile
index 27dec7904c3a..115b63b7d1e3 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -194,6 +194,7 @@ quiet_cmd_rustdoc_test = RUSTDOC T $<
RUST_MODFILE=test.rs \
OBJTREE=$(abspath $(objtree)) \
$(RUSTDOC) --test $(rust_common_flags) \
+ -Zcrate-attr='feature(used_with_arg)' \
@$(objtree)/include/generated/rustc_cfg \
$(rustc_target_flags) $(rustdoc_test_target_flags) \
$(rustdoc_test_quiet) \
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index bc494745f67b..8cbb660e2ec2 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -39,6 +39,7 @@
#include <linux/blk_types.h>
#include <linux/blkdev.h>
#include <linux/clk.h>
+#include <linux/completion.h>
#include <linux/configfs.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
diff --git a/rust/helpers/completion.c b/rust/helpers/completion.c
new file mode 100644
index 000000000000..b2443262a2ae
--- /dev/null
+++ b/rust/helpers/completion.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/completion.h>
+
+void rust_helper_init_completion(struct completion *x)
+{
+ init_completion(x);
+}
diff --git a/rust/helpers/cpu.c b/rust/helpers/cpu.c
new file mode 100644
index 000000000000..824e0adb19d4
--- /dev/null
+++ b/rust/helpers/cpu.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/smp.h>
+
+unsigned int rust_helper_raw_smp_processor_id(void)
+{
+ return raw_smp_processor_id();
+}
diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c
index 0f1b5d115985..b15b3cddad4e 100644
--- a/rust/helpers/helpers.c
+++ b/rust/helpers/helpers.c
@@ -13,6 +13,8 @@
#include "build_assert.c"
#include "build_bug.c"
#include "clk.c"
+#include "completion.c"
+#include "cpu.c"
#include "cpufreq.c"
#include "cpumask.c"
#include "cred.c"
diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs
index 10c5c3b25873..b75403b0eb56 100644
--- a/rust/kernel/cpu.rs
+++ b/rust/kernel/cpu.rs
@@ -6,6 +6,127 @@
use crate::{bindings, device::Device, error::Result, prelude::ENODEV};
+/// Returns the maximum number of possible CPUs in the current system configuration.
+#[inline]
+pub fn nr_cpu_ids() -> u32 {
+ #[cfg(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS))]
+ {
+ bindings::NR_CPUS
+ }
+
+ #[cfg(not(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS)))]
+ // SAFETY: `nr_cpu_ids` is a valid global provided by the kernel.
+ unsafe {
+ bindings::nr_cpu_ids
+ }
+}
+
+/// The CPU ID.
+///
+/// Represents a CPU identifier as a wrapper around an [`u32`].
+///
+/// # Invariants
+///
+/// The CPU ID lies within the range `[0, nr_cpu_ids())`.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::cpu::CpuId;
+///
+/// let cpu = 0;
+///
+/// // SAFETY: 0 is always a valid CPU number.
+/// let id = unsafe { CpuId::from_u32_unchecked(cpu) };
+///
+/// assert_eq!(id.as_u32(), cpu);
+/// assert!(CpuId::from_i32(0).is_some());
+/// assert!(CpuId::from_i32(-1).is_none());
+/// ```
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct CpuId(u32);
+
+impl CpuId {
+ /// Creates a new [`CpuId`] from the given `id` without checking bounds.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`).
+ #[inline]
+ pub unsafe fn from_i32_unchecked(id: i32) -> Self {
+ debug_assert!(id >= 0);
+ debug_assert!((id as u32) < nr_cpu_ids());
+
+ // INVARIANT: The function safety guarantees `id` is a valid CPU id.
+ Self(id as u32)
+ }
+
+ /// Creates a new [`CpuId`] from the given `id`, checking that it is valid.
+ pub fn from_i32(id: i32) -> Option<Self> {
+ if id < 0 || id as u32 >= nr_cpu_ids() {
+ None
+ } else {
+ // INVARIANT: `id` has just been checked as a valid CPU ID.
+ Some(Self(id as u32))
+ }
+ }
+
+ /// Creates a new [`CpuId`] from the given `id` without checking bounds.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`).
+ #[inline]
+ pub unsafe fn from_u32_unchecked(id: u32) -> Self {
+ debug_assert!(id < nr_cpu_ids());
+
+ // Ensure the `id` fits in an [`i32`] as it's also representable that way.
+ debug_assert!(id <= i32::MAX as u32);
+
+ // INVARIANT: The function safety guarantees `id` is a valid CPU id.
+ Self(id)
+ }
+
+ /// Creates a new [`CpuId`] from the given `id`, checking that it is valid.
+ pub fn from_u32(id: u32) -> Option<Self> {
+ if id >= nr_cpu_ids() {
+ None
+ } else {
+ // INVARIANT: `id` has just been checked as a valid CPU ID.
+ Some(Self(id))
+ }
+ }
+
+ /// Returns CPU number.
+ #[inline]
+ pub fn as_u32(&self) -> u32 {
+ self.0
+ }
+
+ /// Returns the ID of the CPU the code is currently running on.
+ ///
+ /// The returned value is considered unstable because it may change
+ /// unexpectedly due to preemption or CPU migration. It should only be
+ /// used when the context ensures that the task remains on the same CPU
+ /// or the users could use a stale (yet valid) CPU ID.
+ pub fn current() -> Self {
+ // SAFETY: raw_smp_processor_id() always returns a valid CPU ID.
+ unsafe { Self::from_u32_unchecked(bindings::raw_smp_processor_id()) }
+ }
+}
+
+impl From<CpuId> for u32 {
+ fn from(id: CpuId) -> Self {
+ id.as_u32()
+ }
+}
+
+impl From<CpuId> for i32 {
+ fn from(id: CpuId) -> Self {
+ id.as_u32() as i32
+ }
+}
+
/// Creates a new instance of CPU's device.
///
/// # Safety
@@ -17,9 +138,9 @@ use crate::{bindings, device::Device, error::Result, prelude::ENODEV};
/// Callers must ensure that the CPU device is not used after it has been unregistered.
/// This can be achieved, for example, by registering a CPU hotplug notifier and removing
/// any references to the CPU device within the notifier's callback.
-pub unsafe fn from_cpu(cpu: u32) -> Result<&'static Device> {
+pub unsafe fn from_cpu(cpu: CpuId) -> Result<&'static Device> {
// SAFETY: It is safe to call `get_cpu_device()` for any CPU.
- let ptr = unsafe { bindings::get_cpu_device(cpu) };
+ let ptr = unsafe { bindings::get_cpu_device(u32::from(cpu)) };
if ptr.is_null() {
return Err(ENODEV);
}
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs
index b0a9c6182aec..11b03e9d7e89 100644
--- a/rust/kernel/cpufreq.rs
+++ b/rust/kernel/cpufreq.rs
@@ -10,6 +10,7 @@
use crate::{
clk::Hertz,
+ cpu::CpuId,
cpumask,
device::{Bound, Device},
devres::Devres,
@@ -465,8 +466,9 @@ impl Policy {
/// Returns the primary CPU for the [`Policy`].
#[inline]
- pub fn cpu(&self) -> u32 {
- self.as_ref().cpu
+ pub fn cpu(&self) -> CpuId {
+ // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
+ unsafe { CpuId::from_u32_unchecked(self.as_ref().cpu) }
}
/// Returns the minimum frequency for the [`Policy`].
@@ -525,7 +527,7 @@ impl Policy {
#[inline]
pub fn generic_get(&self) -> Result<u32> {
// SAFETY: By the type invariant, the pointer stored in `self` is valid.
- Ok(unsafe { bindings::cpufreq_generic_get(self.cpu()) })
+ Ok(unsafe { bindings::cpufreq_generic_get(u32::from(self.cpu())) })
}
/// Provides a wrapper to the register with energy model using the OPP core.
@@ -678,9 +680,9 @@ impl Policy {
struct PolicyCpu<'a>(&'a mut Policy);
impl<'a> PolicyCpu<'a> {
- fn from_cpu(cpu: u32) -> Result<Self> {
+ fn from_cpu(cpu: CpuId) -> Result<Self> {
// SAFETY: It is safe to call `cpufreq_cpu_get` for any valid CPU.
- let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(cpu) })?;
+ let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(u32::from(cpu)) })?;
Ok(Self(
// SAFETY: The `ptr` is guaranteed to be valid and remains valid for the lifetime of
@@ -1055,8 +1057,11 @@ impl<T: Driver> Registration<T> {
impl<T: Driver> Registration<T> {
/// Driver's `init` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1070,8 +1075,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `exit` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
let policy = unsafe { Policy::from_raw_mut(ptr) };
@@ -1082,8 +1090,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `online` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1094,8 +1105,13 @@ impl<T: Driver> Registration<T> {
/// Driver's `offline` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn offline_callback(
+ ptr: *mut bindings::cpufreq_policy,
+ ) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1106,8 +1122,13 @@ impl<T: Driver> Registration<T> {
/// Driver's `suspend` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn suspend_callback(
+ ptr: *mut bindings::cpufreq_policy,
+ ) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1118,8 +1139,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `resume` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1130,8 +1154,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `ready` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
let policy = unsafe { Policy::from_raw_mut(ptr) };
@@ -1140,8 +1167,13 @@ impl<T: Driver> Registration<T> {
/// Driver's `verify` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn verify_callback(
+ ptr: *mut bindings::cpufreq_policy_data,
+ ) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1152,8 +1184,13 @@ impl<T: Driver> Registration<T> {
/// Driver's `setpolicy` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn setpolicy_callback(
+ ptr: *mut bindings::cpufreq_policy,
+ ) -> kernel::ffi::c_int {
from_result(|| {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
@@ -1164,8 +1201,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `target` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn target_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn target_callback(
ptr: *mut bindings::cpufreq_policy,
target_freq: u32,
relation: u32,
@@ -1180,8 +1220,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `target_index` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn target_index_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn target_index_callback(
ptr: *mut bindings::cpufreq_policy,
index: u32,
) -> kernel::ffi::c_int {
@@ -1200,8 +1243,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `fast_switch` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn fast_switch_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn fast_switch_callback(
ptr: *mut bindings::cpufreq_policy,
target_freq: u32,
) -> kernel::ffi::c_uint {
@@ -1212,21 +1258,31 @@ impl<T: Driver> Registration<T> {
}
/// Driver's `adjust_perf` callback.
- extern "C" fn adjust_perf_callback(
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ unsafe extern "C" fn adjust_perf_callback(
cpu: u32,
min_perf: usize,
target_perf: usize,
capacity: usize,
) {
- if let Ok(mut policy) = PolicyCpu::from_cpu(cpu) {
+ // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
+ let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
+
+ if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) {
T::adjust_perf(&mut policy, min_perf, target_perf, capacity);
}
}
/// Driver's `get_intermediate` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn get_intermediate_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn get_intermediate_callback(
ptr: *mut bindings::cpufreq_policy,
index: u32,
) -> kernel::ffi::c_uint {
@@ -1243,8 +1299,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `target_intermediate` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn target_intermediate_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn target_intermediate_callback(
ptr: *mut bindings::cpufreq_policy,
index: u32,
) -> kernel::ffi::c_int {
@@ -1262,12 +1321,24 @@ impl<T: Driver> Registration<T> {
}
/// Driver's `get` callback.
- extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint {
- PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f))
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint {
+ // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
+ let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) };
+
+ PolicyCpu::from_cpu(cpu_id).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f))
}
/// Driver's `update_limit` callback.
- extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) {
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
let policy = unsafe { Policy::from_raw_mut(ptr) };
@@ -1276,10 +1347,16 @@ impl<T: Driver> Registration<T> {
/// Driver's `bios_limit` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int {
+ // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number.
+ let cpu_id = unsafe { CpuId::from_i32_unchecked(cpu) };
+
from_result(|| {
- let mut policy = PolicyCpu::from_cpu(cpu as u32)?;
+ let mut policy = PolicyCpu::from_cpu(cpu_id)?;
// SAFETY: `limit` is guaranteed by the C code to be valid.
T::bios_limit(&mut policy, &mut (unsafe { *limit })).map(|()| 0)
@@ -1288,8 +1365,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `set_boost` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn set_boost_callback(
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn set_boost_callback(
ptr: *mut bindings::cpufreq_policy,
state: i32,
) -> kernel::ffi::c_int {
@@ -1303,8 +1383,11 @@ impl<T: Driver> Registration<T> {
/// Driver's `register_em` callback.
///
- /// SAFETY: Called from C. Inputs must be valid pointers.
- extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) {
+ /// # Safety
+ ///
+ /// - This function may only be called from the cpufreq C infrastructure.
+ /// - The pointer arguments must be valid pointers.
+ unsafe extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) {
// SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the
// lifetime of `policy`.
let policy = unsafe { Policy::from_raw_mut(ptr) };
diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs
index c90bfac9346a..19c607709b5f 100644
--- a/rust/kernel/cpumask.rs
+++ b/rust/kernel/cpumask.rs
@@ -6,6 +6,7 @@
use crate::{
alloc::{AllocError, Flags},
+ cpu::CpuId,
prelude::*,
types::Opaque,
};
@@ -35,9 +36,10 @@ use core::ops::{Deref, DerefMut};
///
/// ```
/// use kernel::bindings;
+/// use kernel::cpu::CpuId;
/// use kernel::cpumask::Cpumask;
///
-/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: u32, clear_cpu: i32) {
+/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: CpuId, clear_cpu: CpuId) {
/// // SAFETY: The `ptr` is valid for writing and remains valid for the lifetime of the
/// // returned reference.
/// let mask = unsafe { Cpumask::as_mut_ref(ptr) };
@@ -90,9 +92,9 @@ impl Cpumask {
/// This mismatches kernel naming convention and corresponds to the C
/// function `__cpumask_set_cpu()`.
#[inline]
- pub fn set(&mut self, cpu: u32) {
+ pub fn set(&mut self, cpu: CpuId) {
// SAFETY: By the type invariant, `self.as_raw` is a valid argument to `__cpumask_set_cpu`.
- unsafe { bindings::__cpumask_set_cpu(cpu, self.as_raw()) };
+ unsafe { bindings::__cpumask_set_cpu(u32::from(cpu), self.as_raw()) };
}
/// Clear `cpu` in the cpumask.
@@ -101,19 +103,19 @@ impl Cpumask {
/// This mismatches kernel naming convention and corresponds to the C
/// function `__cpumask_clear_cpu()`.
#[inline]
- pub fn clear(&mut self, cpu: i32) {
+ pub fn clear(&mut self, cpu: CpuId) {
// SAFETY: By the type invariant, `self.as_raw` is a valid argument to
// `__cpumask_clear_cpu`.
- unsafe { bindings::__cpumask_clear_cpu(cpu, self.as_raw()) };
+ unsafe { bindings::__cpumask_clear_cpu(i32::from(cpu), self.as_raw()) };
}
/// Test `cpu` in the cpumask.
///
/// Equivalent to the kernel's `cpumask_test_cpu` API.
#[inline]
- pub fn test(&self, cpu: i32) -> bool {
+ pub fn test(&self, cpu: CpuId) -> bool {
// SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_test_cpu`.
- unsafe { bindings::cpumask_test_cpu(cpu, self.as_raw()) }
+ unsafe { bindings::cpumask_test_cpu(i32::from(cpu), self.as_raw()) }
}
/// Set all CPUs in the cpumask.
@@ -178,21 +180,40 @@ impl Cpumask {
/// The following example demonstrates how to create and update a [`CpumaskVar`].
///
/// ```
+/// use kernel::cpu::CpuId;
/// use kernel::cpumask::CpumaskVar;
///
/// let mut mask = CpumaskVar::new_zero(GFP_KERNEL).unwrap();
///
/// assert!(mask.empty());
-/// mask.set(2);
-/// assert!(mask.test(2));
-/// mask.set(3);
-/// assert!(mask.test(3));
-/// assert_eq!(mask.weight(), 2);
+/// let mut count = 0;
+///
+/// let cpu2 = CpuId::from_u32(2);
+/// if let Some(cpu) = cpu2 {
+/// mask.set(cpu);
+/// assert!(mask.test(cpu));
+/// count += 1;
+/// }
+///
+/// let cpu3 = CpuId::from_u32(3);
+/// if let Some(cpu) = cpu3 {
+/// mask.set(cpu);
+/// assert!(mask.test(cpu));
+/// count += 1;
+/// }
+///
+/// assert_eq!(mask.weight(), count);
///
/// let mask2 = CpumaskVar::try_clone(&mask).unwrap();
-/// assert!(mask2.test(2));
-/// assert!(mask2.test(3));
-/// assert_eq!(mask2.weight(), 2);
+///
+/// if let Some(cpu) = cpu2 {
+/// assert!(mask2.test(cpu));
+/// }
+///
+/// if let Some(cpu) = cpu3 {
+/// assert!(mask2.test(cpu));
+/// }
+/// assert_eq!(mask2.weight(), count);
/// ```
pub struct CpumaskVar {
#[cfg(CONFIG_CPUMASK_OFFSTACK)]
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
index 0f79a2ec9474..57502534d985 100644
--- a/rust/kernel/devres.rs
+++ b/rust/kernel/devres.rs
@@ -12,26 +12,28 @@ use crate::{
error::{Error, Result},
ffi::c_void,
prelude::*,
- revocable::Revocable,
- sync::Arc,
+ revocable::{Revocable, RevocableGuard},
+ sync::{rcu, Arc, Completion},
types::ARef,
};
-use core::ops::Deref;
-
#[pin_data]
struct DevresInner<T> {
dev: ARef<Device>,
callback: unsafe extern "C" fn(*mut c_void),
#[pin]
data: Revocable<T>,
+ #[pin]
+ revoke: Completion,
}
/// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to
/// manage their lifetime.
///
/// [`Device`] bound resources should be freed when either the resource goes out of scope or the
-/// [`Device`] is unbound respectively, depending on what happens first.
+/// [`Device`] is unbound respectively, depending on what happens first. In any case, it is always
+/// guaranteed that revoking the device resource is completed before the corresponding [`Device`]
+/// is unbound.
///
/// To achieve that [`Devres`] registers a devres callback on creation, which is called once the
/// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]).
@@ -102,6 +104,7 @@ impl<T> DevresInner<T> {
dev: dev.into(),
callback: Self::devres_callback,
data <- Revocable::new(data),
+ revoke <- Completion::new(),
}),
flags,
)?;
@@ -130,26 +133,28 @@ impl<T> DevresInner<T> {
self as _
}
- fn remove_action(this: &Arc<Self>) {
+ fn remove_action(this: &Arc<Self>) -> bool {
// SAFETY:
// - `self.inner.dev` is a valid `Device`,
// - the `action` and `data` pointers are the exact same ones as given to devm_add_action()
// previously,
// - `self` is always valid, even if the action has been released already.
- let ret = unsafe {
+ let success = unsafe {
bindings::devm_remove_action_nowarn(
this.dev.as_raw(),
Some(this.callback),
this.as_ptr() as _,
)
- };
+ } == 0;
- if ret == 0 {
+ if success {
// SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if
// devm_remove_action_nowarn() was successful we can (and have to) claim back ownership
// of this reference.
let _ = unsafe { Arc::from_raw(this.as_ptr()) };
}
+
+ success
}
#[allow(clippy::missing_safety_doc)]
@@ -161,7 +166,12 @@ impl<T> DevresInner<T> {
// `DevresInner::new`.
let inner = unsafe { Arc::from_raw(ptr) };
- inner.data.revoke();
+ if !inner.data.revoke() {
+ // If `revoke()` returns false, it means that `Devres::drop` already started revoking
+ // `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it
+ // completed revoking `inner.data`.
+ inner.revoke.wait_for_completion();
+ }
}
}
@@ -218,20 +228,36 @@ impl<T> Devres<T> {
// SAFETY: `dev` being the same device as the device this `Devres` has been created for
// proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as
// long as `dev` lives; `dev` lives at least as long as `self`.
- Ok(unsafe { self.deref().access() })
+ Ok(unsafe { self.0.data.access() })
}
-}
-impl<T> Deref for Devres<T> {
- type Target = Revocable<T>;
+ /// [`Devres`] accessor for [`Revocable::try_access`].
+ pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> {
+ self.0.data.try_access()
+ }
+
+ /// [`Devres`] accessor for [`Revocable::try_access_with`].
+ pub fn try_access_with<R, F: FnOnce(&T) -> R>(&self, f: F) -> Option<R> {
+ self.0.data.try_access_with(f)
+ }
- fn deref(&self) -> &Self::Target {
- &self.0.data
+ /// [`Devres`] accessor for [`Revocable::try_access_with_guard`].
+ pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> {
+ self.0.data.try_access_with_guard(guard)
}
}
impl<T> Drop for Devres<T> {
fn drop(&mut self) {
- DevresInner::remove_action(&self.0);
+ // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data
+ // anymore, hence it is safe not to wait for the grace period to finish.
+ if unsafe { self.0.data.revoke_nosync() } {
+ // We revoked `self.0.data` before the devres action did, hence try to remove it.
+ if !DevresInner::remove_action(&self.0) {
+ // We could not remove the devres action, which means that it now runs concurrently,
+ // hence signal that `self.0.data` has been revoked successfully.
+ self.0.revoke.complete_all();
+ }
+ }
}
}
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 624d7a4c83ea..14c1aa402951 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -66,7 +66,7 @@ impl<T: drm::Driver> Device<T> {
open: Some(drm::File::<T::File>::open_callback),
postclose: Some(drm::File::<T::File>::postclose_callback),
unload: None,
- release: None,
+ release: Some(Self::release),
master_set: None,
master_drop: None,
debugfs_init: None,
@@ -162,6 +162,16 @@ impl<T: drm::Driver> Device<T> {
// SAFETY: `ptr` is valid by the safety requirements of this function.
unsafe { &*ptr.cast() }
}
+
+ extern "C" fn release(ptr: *mut bindings::drm_device) {
+ // SAFETY: `ptr` is a valid pointer to a `struct drm_device` and embedded in `Self`.
+ let this = unsafe { Self::from_drm_device(ptr) };
+
+ // SAFETY:
+ // - When `release` runs it is guaranteed that there is no further access to `this`.
+ // - `this` is valid for dropping.
+ unsafe { core::ptr::drop_in_place(this) };
+ }
}
impl<T: drm::Driver> Deref for Device<T> {
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index acb638086131..af93d46d03d3 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -10,7 +10,6 @@ use crate::{
drm,
error::{to_result, Result},
prelude::*,
- str::CStr,
types::ARef,
};
use macros::vtable;
diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs
index 2494c96e105f..4fe621f35716 100644
--- a/rust/kernel/firmware.rs
+++ b/rust/kernel/firmware.rs
@@ -202,7 +202,7 @@ macro_rules! module_firmware {
};
#[link_section = ".modinfo"]
- #[used]
+ #[used(compiler)]
static __MODULE_FIRMWARE: [u8; $($builder)*::create(__MODULE_FIRMWARE_PREFIX)
.build_length()] = $($builder)*::create(__MODULE_FIRMWARE_PREFIX).build();
};
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 8d228c237954..21ef202ab0db 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -231,14 +231,14 @@ macro_rules! try_init {
($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
$($fields:tt)*
}) => {
- ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? {
+ ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),*>)? {
$($fields)*
}? $crate::error::Error)
};
($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
$($fields:tt)*
}? $err:ty) => {
- ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? {
+ ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),*>)? {
$($fields)*
}? $err)
};
@@ -291,14 +291,14 @@ macro_rules! try_pin_init {
($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
$($fields:tt)*
}) => {
- ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? {
+ ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),*>)? {
$($fields)*
}? $crate::error::Error)
};
($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
$($fields:tt)*
}? $err:ty) => {
- ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? {
+ ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),*>)? {
$($fields)*
}? $err)
};
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 4b8cdcb21e77..b9e65905e121 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -302,7 +302,7 @@ macro_rules! kunit_unsafe_test_suite {
is_init: false,
};
- #[used]
+ #[used(compiler)]
#[allow(unused_unsafe)]
#[cfg_attr(not(target_os = "macos"), link_section = ".kunit_test_suites")]
static mut KUNIT_TEST_SUITE_ENTRY: *const ::kernel::bindings::kunit_suite =
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 6b4774b2b1c3..e13d6ed88fa6 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -34,6 +34,9 @@
// Expected to become stable.
#![feature(arbitrary_self_types)]
//
+// To be determined.
+#![feature(used_with_arg)]
+//
// `feature(derive_coerce_pointee)` is expected to become stable. Before Rust
// 1.84.0, it did not exist, so enable the predecessor features.
#![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))]
diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs
index db4aa46bb121..06a3cdfce344 100644
--- a/rust/kernel/revocable.rs
+++ b/rust/kernel/revocable.rs
@@ -154,8 +154,10 @@ impl<T> Revocable<T> {
/// # Safety
///
/// Callers must ensure that there are no more concurrent users of the revocable object.
- unsafe fn revoke_internal<const SYNC: bool>(&self) {
- if self.is_available.swap(false, Ordering::Relaxed) {
+ unsafe fn revoke_internal<const SYNC: bool>(&self) -> bool {
+ let revoke = self.is_available.swap(false, Ordering::Relaxed);
+
+ if revoke {
if SYNC {
// SAFETY: Just an FFI call, there are no further requirements.
unsafe { bindings::synchronize_rcu() };
@@ -165,6 +167,8 @@ impl<T> Revocable<T> {
// `compare_exchange` above that takes `is_available` from `true` to `false`.
unsafe { drop_in_place(self.data.get()) };
}
+
+ revoke
}
/// Revokes access to and drops the wrapped object.
@@ -172,10 +176,13 @@ impl<T> Revocable<T> {
/// Access to the object is revoked immediately to new callers of [`Revocable::try_access`],
/// expecting that there are no concurrent users of the object.
///
+ /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked
+ /// already.
+ ///
/// # Safety
///
/// Callers must ensure that there are no more concurrent users of the revocable object.
- pub unsafe fn revoke_nosync(&self) {
+ pub unsafe fn revoke_nosync(&self) -> bool {
// SAFETY: By the safety requirement of this function, the caller ensures that nobody is
// accessing the data anymore and hence we don't have to wait for the grace period to
// finish.
@@ -189,7 +196,10 @@ impl<T> Revocable<T> {
/// If there are concurrent users of the object (i.e., ones that called
/// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this
/// function waits for the concurrent access to complete before dropping the wrapped object.
- pub fn revoke(&self) {
+ ///
+ /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked
+ /// already.
+ pub fn revoke(&self) -> bool {
// SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to
// finish.
unsafe { self.revoke_internal::<true>() }
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
index 36a719015583..c23a12639924 100644
--- a/rust/kernel/sync.rs
+++ b/rust/kernel/sync.rs
@@ -10,6 +10,7 @@ use crate::types::Opaque;
use pin_init;
mod arc;
+pub mod completion;
mod condvar;
pub mod lock;
mod locked_by;
@@ -17,6 +18,7 @@ pub mod poll;
pub mod rcu;
pub use arc::{Arc, ArcBorrow, UniqueArc};
+pub use completion::Completion;
pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult};
pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy};
pub use lock::mutex::{new_mutex, Mutex, MutexGuard};
diff --git a/rust/kernel/sync/completion.rs b/rust/kernel/sync/completion.rs
new file mode 100644
index 000000000000..c50012a940a3
--- /dev/null
+++ b/rust/kernel/sync/completion.rs
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Completion support.
+//!
+//! Reference: <https://docs.kernel.org/scheduler/completion.html>
+//!
+//! C header: [`include/linux/completion.h`](srctree/include/linux/completion.h)
+
+use crate::{bindings, prelude::*, types::Opaque};
+
+/// Synchronization primitive to signal when a certain task has been completed.
+///
+/// The [`Completion`] synchronization primitive signals when a certain task has been completed by
+/// waking up other tasks that have been queued up to wait for the [`Completion`] to be completed.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::sync::{Arc, Completion};
+/// use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem};
+///
+/// #[pin_data]
+/// struct MyTask {
+/// #[pin]
+/// work: Work<MyTask>,
+/// #[pin]
+/// done: Completion,
+/// }
+///
+/// impl_has_work! {
+/// impl HasWork<Self> for MyTask { self.work }
+/// }
+///
+/// impl MyTask {
+/// fn new() -> Result<Arc<Self>> {
+/// let this = Arc::pin_init(pin_init!(MyTask {
+/// work <- new_work!("MyTask::work"),
+/// done <- Completion::new(),
+/// }), GFP_KERNEL)?;
+///
+/// let _ = workqueue::system().enqueue(this.clone());
+///
+/// Ok(this)
+/// }
+///
+/// fn wait_for_completion(&self) {
+/// self.done.wait_for_completion();
+///
+/// pr_info!("Completion: task complete\n");
+/// }
+/// }
+///
+/// impl WorkItem for MyTask {
+/// type Pointer = Arc<MyTask>;
+///
+/// fn run(this: Arc<MyTask>) {
+/// // process this task
+/// this.done.complete_all();
+/// }
+/// }
+///
+/// let task = MyTask::new()?;
+/// task.wait_for_completion();
+/// # Ok::<(), Error>(())
+/// ```
+#[pin_data]
+pub struct Completion {
+ #[pin]
+ inner: Opaque<bindings::completion>,
+}
+
+// SAFETY: `Completion` is safe to be send to any task.
+unsafe impl Send for Completion {}
+
+// SAFETY: `Completion` is safe to be accessed concurrently.
+unsafe impl Sync for Completion {}
+
+impl Completion {
+ /// Create an initializer for a new [`Completion`].
+ pub fn new() -> impl PinInit<Self> {
+ pin_init!(Self {
+ inner <- Opaque::ffi_init(|slot: *mut bindings::completion| {
+ // SAFETY: `slot` is a valid pointer to an uninitialized `struct completion`.
+ unsafe { bindings::init_completion(slot) };
+ }),
+ })
+ }
+
+ fn as_raw(&self) -> *mut bindings::completion {
+ self.inner.get()
+ }
+
+ /// Signal all tasks waiting on this completion.
+ ///
+ /// This method wakes up all tasks waiting on this completion; after this operation the
+ /// completion is permanently done, i.e. signals all current and future waiters.
+ pub fn complete_all(&self) {
+ // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`.
+ unsafe { bindings::complete_all(self.as_raw()) };
+ }
+
+ /// Wait for completion of a task.
+ ///
+ /// This method waits for the completion of a task; it is not interruptible and there is no
+ /// timeout.
+ ///
+ /// See also [`Completion::complete_all`].
+ pub fn wait_for_completion(&self) {
+ // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`.
+ unsafe { bindings::wait_for_completion(self.as_raw()) };
+ }
+}
diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs
index 9df3dcd2fa39..36e1290cd079 100644
--- a/rust/kernel/time/hrtimer.rs
+++ b/rust/kernel/time/hrtimer.rs
@@ -517,7 +517,7 @@ macro_rules! impl_has_hr_timer {
) -> *mut Self {
// SAFETY: As per the safety requirement of this function, `ptr`
// is pointing inside a `$timer_type`.
- unsafe { ::kernel::container_of!(ptr, $timer_type, $field).cast_mut() }
+ unsafe { ::kernel::container_of!(ptr, $timer_type, $field) }
}
}
}
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index 2ddd2eeb2852..75efc6eeeafc 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -57,7 +57,7 @@ impl<'a> ModInfoBuilder<'a> {
{cfg}
#[doc(hidden)]
#[cfg_attr(not(target_os = \"macos\"), link_section = \".modinfo\")]
- #[used]
+ #[used(compiler)]
pub static __{module}_{counter}: [u8; {length}] = *{string};
",
cfg = if builtin {
@@ -249,7 +249,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
// key or a new section. For the moment, keep it simple.
#[cfg(MODULE)]
#[doc(hidden)]
- #[used]
+ #[used(compiler)]
static __IS_RUST_MODULE: () = ();
static mut __MOD: ::core::mem::MaybeUninit<{type_}> =
@@ -273,7 +273,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[cfg(MODULE)]
#[doc(hidden)]
- #[used]
+ #[used(compiler)]
#[link_section = \".init.data\"]
static __UNIQUE_ID___addressable_init_module: unsafe extern \"C\" fn() -> i32 = init_module;
@@ -293,7 +293,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[cfg(MODULE)]
#[doc(hidden)]
- #[used]
+ #[used(compiler)]
#[link_section = \".exit.data\"]
static __UNIQUE_ID___addressable_cleanup_module: extern \"C\" fn() = cleanup_module;
@@ -303,7 +303,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
#[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))]
#[doc(hidden)]
#[link_section = \"{initcall_section}\"]
- #[used]
+ #[used(compiler)]
pub static __{ident}_initcall: extern \"C\" fn() ->
::kernel::ffi::c_int = __{ident}_init;
diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c
index 36d2cd933f5a..c94254b77fc9 100644
--- a/samples/damon/mtier.c
+++ b/samples/damon/mtier.c
@@ -164,8 +164,12 @@ static int damon_sample_mtier_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_mtier_start();
+ if (enable) {
+ err = damon_sample_mtier_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_mtier_stop();
return 0;
}
diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c
index 056b1b21a0fe..5597e6a08ab2 100644
--- a/samples/damon/prcl.c
+++ b/samples/damon/prcl.c
@@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_prcl_start();
+ if (enable) {
+ err = damon_sample_prcl_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_prcl_stop();
return 0;
}
diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c
index 11be25803274..e20238a249e7 100644
--- a/samples/damon/wsse.c
+++ b/samples/damon/wsse.c
@@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_wsse_start();
+ if (enable) {
+ err = damon_sample_wsse_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_wsse_stop();
return 0;
}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a6461ea411f7..ba71b27aa363 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -312,10 +312,11 @@ $(obj)/%.lst: $(obj)/%.c FORCE
# - Stable since Rust 1.82.0: `feature(asm_const)`, `feature(raw_ref_op)`.
# - Stable since Rust 1.87.0: `feature(asm_goto)`.
# - Expected to become stable: `feature(arbitrary_self_types)`.
+# - To be determined: `feature(used_with_arg)`.
#
# Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on
# the unstable features in use.
-rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op
+rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op,used_with_arg
# `--out-dir` is required to avoid temporaries being created by `rustc` in the
# current working directory, which may be not accessible in the out-of-tree
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index fd6bd69c5096..f795302ddfa8 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -20,6 +20,7 @@
#include <linux/of_fdt.h>
#include <linux/page_ext.h>
#include <linux/radix-tree.h>
+#include <linux/maple_tree.h>
#include <linux/slab.h>
#include <linux/threads.h>
#include <linux/vmalloc.h>
@@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE)
LX_GDBPARSED(RADIX_TREE_MAP_SHIFT)
LX_GDBPARSED(RADIX_TREE_MAP_MASK)
+/* linux/maple_tree.h */
+LX_VALUE(MAPLE_NODE_SLOTS)
+LX_VALUE(MAPLE_RANGE64_SLOTS)
+LX_VALUE(MAPLE_ARANGE64_SLOTS)
+LX_GDBPARSED(MAPLE_NODE_MASK)
+
/* linux/vmalloc.h */
LX_VALUE(VM_IOREMAP)
LX_VALUE(VM_ALLOC)
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index 616a5f26377a..f4f715a8f0e3 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -7,7 +7,7 @@ import gdb
from linux import constants
from linux import cpus
from linux import utils
-from linux import radixtree
+from linux import mapletree
irq_desc_type = utils.CachedType("struct irq_desc")
@@ -23,12 +23,12 @@ def irqd_is_level(desc):
def show_irq_desc(prec, irq):
text = ""
- desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq)
+ desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq)
if desc is None:
return text
- desc = desc.cast(irq_desc_type.get_type())
- if desc is None:
+ desc = desc.cast(irq_desc_type.get_type().pointer())
+ if desc == 0:
return text
if irq_settings_is_hidden(desc):
@@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc):
pvar = gdb.parse_and_eval(var)
text = "%*s: " % (prec, pfx)
for cpu in cpus.each_online_cpu():
- text += "%10u " % (cpus.per_cpu(pvar, cpu))
+ text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference())
text += " %s\n" % (desc)
return text
@@ -142,7 +142,7 @@ def x86_show_interupts(prec):
if constants.LX_CONFIG_X86_MCE:
text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
- text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
+ text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
text += show_irq_err_count(prec)
@@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command):
gdb.write("CPU%-8d" % cpu)
gdb.write("\n")
- if utils.gdb_eval_or_none("&irq_desc_tree") is None:
- return
+ if utils.gdb_eval_or_none("&sparse_irqs") is None:
+ raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?")
for irq in range(nr_irqs):
gdb.write(show_irq_desc(prec, irq))
diff --git a/scripts/gdb/linux/mapletree.py b/scripts/gdb/linux/mapletree.py
new file mode 100644
index 000000000000..d52d51c0a03f
--- /dev/null
+++ b/scripts/gdb/linux/mapletree.py
@@ -0,0 +1,252 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Maple tree helpers
+#
+# Copyright (c) 2025 Broadcom
+#
+# Authors:
+# Florian Fainelli <florian.fainelli@broadcom.com>
+
+import gdb
+
+from linux import utils
+from linux import constants
+from linux import xarray
+
+maple_tree_root_type = utils.CachedType("struct maple_tree")
+maple_node_type = utils.CachedType("struct maple_node")
+maple_enode_type = utils.CachedType("void")
+
+maple_dense = 0
+maple_leaf_64 = 1
+maple_range_64 = 2
+maple_arange_64 = 3
+
+class Mas(object):
+ ma_active = 0
+ ma_start = 1
+ ma_root = 2
+ ma_none = 3
+ ma_pause = 4
+ ma_overflow = 5
+ ma_underflow = 6
+ ma_error = 7
+
+ def __init__(self, mt, first, end):
+ if mt.type == maple_tree_root_type.get_type().pointer():
+ self.tree = mt.dereference()
+ elif mt.type != maple_tree_root_type.get_type():
+ raise gdb.GdbError("must be {} not {}"
+ .format(maple_tree_root_type.get_type().pointer(), mt.type))
+ self.tree = mt
+ self.index = first
+ self.last = end
+ self.node = None
+ self.status = self.ma_start
+ self.min = 0
+ self.max = -1
+
+ def is_start(self):
+ # mas_is_start()
+ return self.status == self.ma_start
+
+ def is_ptr(self):
+ # mas_is_ptr()
+ return self.status == self.ma_root
+
+ def is_none(self):
+ # mas_is_none()
+ return self.status == self.ma_none
+
+ def root(self):
+ # mas_root()
+ return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer())
+
+ def start(self):
+ # mas_start()
+ if self.is_start() is False:
+ return None
+
+ self.min = 0
+ self.max = ~0
+
+ while True:
+ self.depth = 0
+ root = self.root()
+ if xarray.xa_is_node(root):
+ self.depth = 0
+ self.status = self.ma_active
+ self.node = mte_safe_root(root)
+ self.offset = 0
+ if mte_dead_node(self.node) is True:
+ continue
+
+ return None
+
+ self.node = None
+ # Empty tree
+ if root is None:
+ self.status = self.ma_none
+ self.offset = constants.LX_MAPLE_NODE_SLOTS
+ return None
+
+ # Single entry tree
+ self.status = self.ma_root
+ self.offset = constants.LX_MAPLE_NODE_SLOTS
+
+ if self.index != 0:
+ return None
+
+ return root
+
+ return None
+
+ def reset(self):
+ # mas_reset()
+ self.status = self.ma_start
+ self.node = None
+
+def mte_safe_root(node):
+ if node.type != maple_enode_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type))
+ ulong_type = utils.get_ulong_type()
+ indirect_ptr = node.cast(ulong_type) & ~0x2
+ val = indirect_ptr.cast(maple_enode_type.get_type().pointer())
+ return val
+
+def mte_node_type(entry):
+ ulong_type = utils.get_ulong_type()
+ val = None
+ if entry.type == maple_enode_type.get_type().pointer():
+ val = entry.cast(ulong_type)
+ elif entry.type == ulong_type:
+ val = entry
+ else:
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type))
+ return (val >> 0x3) & 0xf
+
+def ma_dead_node(node):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type))
+ ulong_type = utils.get_ulong_type()
+ parent = node['parent']
+ indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK
+ return indirect_ptr == node
+
+def mte_to_node(enode):
+ ulong_type = utils.get_ulong_type()
+ if enode.type == maple_enode_type.get_type().pointer():
+ indirect_ptr = enode.cast(ulong_type)
+ elif enode.type == ulong_type:
+ indirect_ptr = enode
+ else:
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type))
+ indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK
+ return indirect_ptr.cast(maple_node_type.get_type().pointer())
+
+def mte_dead_node(enode):
+ if enode.type != maple_enode_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type))
+ node = mte_to_node(enode)
+ return ma_dead_node(node)
+
+def ma_is_leaf(tp):
+ result = tp < maple_range_64
+ return tp < maple_range_64
+
+def mt_pivots(t):
+ if t == maple_dense:
+ return 0
+ elif t == maple_leaf_64 or t == maple_range_64:
+ return constants.LX_MAPLE_RANGE64_SLOTS - 1
+ elif t == maple_arange_64:
+ return constants.LX_MAPLE_ARANGE64_SLOTS - 1
+
+def ma_pivots(node, t):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{}: must be {} not {}"
+ .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type))
+ if t == maple_arange_64:
+ return node['ma64']['pivot']
+ elif t == maple_leaf_64 or t == maple_range_64:
+ return node['mr64']['pivot']
+ else:
+ return None
+
+def ma_slots(node, tp):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{}: must be {} not {}"
+ .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type))
+ if tp == maple_arange_64:
+ return node['ma64']['slot']
+ elif tp == maple_range_64 or tp == maple_leaf_64:
+ return node['mr64']['slot']
+ elif tp == maple_dense:
+ return node['slot']
+ else:
+ return None
+
+def mt_slot(mt, slots, offset):
+ ulong_type = utils.get_ulong_type()
+ return slots[offset].cast(ulong_type)
+
+def mtree_lookup_walk(mas):
+ ulong_type = utils.get_ulong_type()
+ n = mas.node
+
+ while True:
+ node = mte_to_node(n)
+ tp = mte_node_type(n)
+ pivots = ma_pivots(node, tp)
+ end = mt_pivots(tp)
+ offset = 0
+ while True:
+ if pivots[offset] >= mas.index:
+ break
+ if offset >= end:
+ break
+ offset += 1
+
+ slots = ma_slots(node, tp)
+ n = mt_slot(mas.tree, slots, offset)
+ if ma_dead_node(node) is True:
+ mas.reset()
+ return None
+ break
+
+ if ma_is_leaf(tp) is True:
+ break
+
+ return n
+
+def mtree_load(mt, index):
+ ulong_type = utils.get_ulong_type()
+ # MT_STATE(...)
+ mas = Mas(mt, index, index)
+ entry = None
+
+ while True:
+ entry = mas.start()
+ if mas.is_none():
+ return None
+
+ if mas.is_ptr():
+ if index != 0:
+ entry = None
+ return entry
+
+ entry = mtree_lookup_walk(mas)
+ if entry is None and mas.is_start():
+ continue
+ else:
+ break
+
+ if xarray.xa_is_zero(entry):
+ return None
+
+ return entry
diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py
index c77b9ce75f6d..9e921b645a68 100644
--- a/scripts/gdb/linux/vfs.py
+++ b/scripts/gdb/linux/vfs.py
@@ -22,7 +22,7 @@ def dentry_name(d):
if parent == d or parent == 0:
return ""
p = dentry_name(d['d_parent']) + "/"
- return p + d['d_iname'].string()
+ return p + d['d_name']['name'].string()
class DentryName(gdb.Function):
"""Return string of the full path of a dentry.
diff --git a/scripts/gdb/linux/xarray.py b/scripts/gdb/linux/xarray.py
new file mode 100644
index 000000000000..f4477b5def75
--- /dev/null
+++ b/scripts/gdb/linux/xarray.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Xarray helpers
+#
+# Copyright (c) 2025 Broadcom
+#
+# Authors:
+# Florian Fainelli <florian.fainelli@broadcom.com>
+
+import gdb
+
+from linux import utils
+from linux import constants
+
+def xa_is_internal(entry):
+ ulong_type = utils.get_ulong_type()
+ return ((entry.cast(ulong_type) & 3) == 2)
+
+def xa_mk_internal(v):
+ return ((v << 2) | 2)
+
+def xa_is_zero(entry):
+ ulong_type = utils.get_ulong_type()
+ return entry.cast(ulong_type) == xa_mk_internal(257)
+
+def xa_is_node(entry):
+ ulong_type = utils.get_ulong_type()
+ return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096)
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 7dd03ffe0c5c..d9c06d2cb1df 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -216,24 +216,14 @@ int cache_get(struct cache *cache, unsigned long key);
void cache_init(struct cache *cache);
void cache_free(struct cache *cache);
-static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr)
-{
- cache_set(cache, addr, 1);
-}
-
-static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr)
-{
- return cache_get(cache, addr) == 1;
-}
-
static inline void cache_mark_expanded(struct cache *cache, void *addr)
{
- __cache_mark_expanded(cache, (uintptr_t)addr);
+ cache_set(cache, (unsigned long)addr, 1);
}
static inline bool cache_was_expanded(struct cache *cache, void *addr)
{
- return __cache_was_expanded(cache, (uintptr_t)addr);
+ return cache_get(cache, (unsigned long)addr) == 1;
}
/*
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
index 39ce1770e463..7bd459ea6c59 100644
--- a/scripts/gendwarfksyms/types.c
+++ b/scripts/gendwarfksyms/types.c
@@ -333,37 +333,11 @@ static void calculate_version(struct version *version,
cache_free(&expansion_cache);
}
-static void __type_expand(struct die *cache, struct type_expansion *type,
- bool recursive);
-
-static void type_expand_child(struct die *cache, struct type_expansion *type,
- bool recursive)
-{
- struct type_expansion child;
- char *name;
-
- name = get_type_name(cache);
- if (!name) {
- __type_expand(cache, type, recursive);
- return;
- }
-
- if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) {
- __cache_mark_expanded(&expansion_cache, cache->addr);
- type_expansion_init(&child);
- __type_expand(cache, &child, true);
- type_map_add(name, &child);
- type_expansion_free(&child);
- }
-
- type_expansion_append(type, name, name);
-}
-
-static void __type_expand(struct die *cache, struct type_expansion *type,
- bool recursive)
+static void __type_expand(struct die *cache, struct type_expansion *type)
{
struct die_fragment *df;
struct die *child;
+ char *name;
list_for_each_entry(df, &cache->fragments, list) {
switch (df->type) {
@@ -379,7 +353,12 @@ static void __type_expand(struct die *cache, struct type_expansion *type,
error("unknown child: %" PRIxPTR,
df->data.addr);
- type_expand_child(child, type, recursive);
+ name = get_type_name(child);
+ if (name)
+ type_expansion_append(type, name, name);
+ else
+ __type_expand(child, type);
+
break;
case FRAGMENT_LINEBREAK:
/*
@@ -397,12 +376,17 @@ static void __type_expand(struct die *cache, struct type_expansion *type,
}
}
-static void type_expand(struct die *cache, struct type_expansion *type,
- bool recursive)
+static void type_expand(const char *name, struct die *cache,
+ struct type_expansion *type)
{
+ const char *override;
+
type_expansion_init(type);
- __type_expand(cache, type, recursive);
- cache_free(&expansion_cache);
+
+ if (stable && kabi_get_type_string(name, &override))
+ type_parse(name, override, type);
+ else
+ __type_expand(cache, type);
}
static void type_parse(const char *name, const char *str,
@@ -416,8 +400,6 @@ static void type_parse(const char *name, const char *str,
if (!*str)
error("empty type string override for '%s'", name);
- type_expansion_init(type);
-
for (pos = 0; str[pos]; ++pos) {
bool empty;
char marker = ' ';
@@ -478,7 +460,6 @@ static void type_parse(const char *name, const char *str,
static void expand_type(struct die *cache, void *arg)
{
struct type_expansion type;
- const char *override;
char *name;
if (cache->mapped)
@@ -504,11 +485,7 @@ static void expand_type(struct die *cache, void *arg)
debug("%s", name);
- if (stable && kabi_get_type_string(name, &override))
- type_parse(name, override, &type);
- else
- type_expand(cache, &type, true);
-
+ type_expand(name, cache, &type);
type_map_add(name, &type);
type_expansion_free(&type);
free(name);
@@ -518,7 +495,6 @@ static void expand_symbol(struct symbol *sym, void *arg)
{
struct type_expansion type;
struct version version;
- const char *override;
struct die *cache;
/*
@@ -532,10 +508,7 @@ static void expand_symbol(struct symbol *sym, void *arg)
if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache))
return; /* We'll warn about missing CRCs later. */
- if (stable && kabi_get_type_string(sym->name, &override))
- type_parse(sym->name, override, &type);
- else
- type_expand(cache, &type, false);
+ type_expand(sym->name, cache, &type);
/* If the symbol already has a version, don't calculate it again. */
if (sym->state != SYMBOL_PROCESSED) {
diff --git a/scripts/misc-check b/scripts/misc-check
index a74450e799d1..84f08da17b2c 100755
--- a/scripts/misc-check
+++ b/scripts/misc-check
@@ -62,6 +62,15 @@ check_unnecessary_include_linux_export_h () {
xargs -r printf "%s: warning: EXPORT_SYMBOL() is not used, but #include <linux/export.h> is present\n" >&2
}
-check_tracked_ignored_files
-check_missing_include_linux_export_h
-check_unnecessary_include_linux_export_h
+case "${KBUILD_EXTRA_WARN}" in
+*1*)
+ check_tracked_ignored_files
+ ;;
+esac
+
+case "${KBUILD_EXTRA_WARN}" in
+*2*)
+ check_missing_include_linux_export_h
+ check_unnecessary_include_linux_export_h
+ ;;
+esac
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index d52a5b14dad4..f494217112c9 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -604,7 +604,7 @@ int aa_file_perm(const char *op, const struct cred *subj_cred,
rcu_read_unlock();
/* TODO: label cross check */
- if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry))
+ if (path_mediated_fs(file->f_path.dentry))
error = __file_path_perm(op, subj_cred, label, flabel, file,
request, denied, in_atomic);
diff --git a/security/inode.c b/security/inode.c
index 3913501621fa..43382ef8896e 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -112,18 +112,20 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
struct dentry *dentry;
struct inode *dir, *inode;
int error;
+ bool pinned = false;
if (!(mode & S_IFMT))
mode = (mode & S_IALLUGO) | S_IFREG;
pr_debug("securityfs: creating file '%s'\n",name);
- error = simple_pin_fs(&fs_type, &mount, &mount_count);
- if (error)
- return ERR_PTR(error);
-
- if (!parent)
+ if (!parent) {
+ error = simple_pin_fs(&fs_type, &mount, &mount_count);
+ if (error)
+ return ERR_PTR(error);
+ pinned = true;
parent = mount->mnt_root;
+ }
dir = d_inode(parent);
@@ -159,7 +161,6 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
inode->i_fop = fops;
}
d_instantiate(dentry, inode);
- dget(dentry);
inode_unlock(dir);
return dentry;
@@ -168,7 +169,8 @@ out1:
dentry = ERR_PTR(error);
out:
inode_unlock(dir);
- simple_release_fs(&mount, &mount_count);
+ if (pinned)
+ simple_release_fs(&mount, &mount_count);
return dentry;
}
@@ -279,6 +281,12 @@ struct dentry *securityfs_create_symlink(const char *name,
}
EXPORT_SYMBOL_GPL(securityfs_create_symlink);
+static void remove_one(struct dentry *victim)
+{
+ if (victim->d_parent == victim->d_sb->s_root)
+ simple_release_fs(&mount, &mount_count);
+}
+
/**
* securityfs_remove - removes a file or directory from the securityfs filesystem
*
@@ -291,43 +299,11 @@ EXPORT_SYMBOL_GPL(securityfs_create_symlink);
* This function is required to be called in order for the file to be
* removed. No automatic cleanup of files will happen when a module is
* removed; you are responsible here.
- */
-void securityfs_remove(struct dentry *dentry)
-{
- struct inode *dir;
-
- if (IS_ERR_OR_NULL(dentry))
- return;
-
- dir = d_inode(dentry->d_parent);
- inode_lock(dir);
- if (simple_positive(dentry)) {
- if (d_is_dir(dentry))
- simple_rmdir(dir, dentry);
- else
- simple_unlink(dir, dentry);
- dput(dentry);
- }
- inode_unlock(dir);
- simple_release_fs(&mount, &mount_count);
-}
-EXPORT_SYMBOL_GPL(securityfs_remove);
-
-static void remove_one(struct dentry *victim)
-{
- simple_release_fs(&mount, &mount_count);
-}
-
-/**
- * securityfs_recursive_remove - recursively removes a file or directory
- *
- * @dentry: a pointer to a the dentry of the file or directory to be removed.
*
- * This function recursively removes a file or directory in securityfs that was
- * previously created with a call to another securityfs function (like
- * securityfs_create_file() or variants thereof.)
+ * AV: when applied to directory it will take all children out; no need to call
+ * it for descendents if ancestor is getting killed.
*/
-void securityfs_recursive_remove(struct dentry *dentry)
+void securityfs_remove(struct dentry *dentry)
{
if (IS_ERR_OR_NULL(dentry))
return;
@@ -336,7 +312,7 @@ void securityfs_recursive_remove(struct dentry *dentry)
simple_recursive_removal(dentry, remove_one);
simple_release_fs(&mount, &mount_count);
}
-EXPORT_SYMBOL_GPL(securityfs_recursive_remove);
+EXPORT_SYMBOL_GPL(securityfs_remove);
#ifdef CONFIG_SECURITY
static struct dentry *lsm_dentry;
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index 9b907c2fee60..b0d2aad27850 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -17,7 +17,6 @@
#include "evm.h"
static struct dentry *evm_dir;
-static struct dentry *evm_init_tpm;
static struct dentry *evm_symlink;
#ifdef CONFIG_EVM_ADD_XATTRS
@@ -286,7 +285,7 @@ static int evm_init_xattrs(void)
{
evm_xattrs = securityfs_create_file("evm_xattrs", 0660, evm_dir, NULL,
&evm_xattr_ops);
- if (!evm_xattrs || IS_ERR(evm_xattrs))
+ if (IS_ERR(evm_xattrs))
return -EFAULT;
return 0;
@@ -301,21 +300,22 @@ static int evm_init_xattrs(void)
int __init evm_init_secfs(void)
{
int error = 0;
+ struct dentry *dentry;
evm_dir = securityfs_create_dir("evm", integrity_dir);
- if (!evm_dir || IS_ERR(evm_dir))
+ if (IS_ERR(evm_dir))
return -EFAULT;
- evm_init_tpm = securityfs_create_file("evm", 0660,
- evm_dir, NULL, &evm_key_ops);
- if (!evm_init_tpm || IS_ERR(evm_init_tpm)) {
+ dentry = securityfs_create_file("evm", 0660,
+ evm_dir, NULL, &evm_key_ops);
+ if (IS_ERR(dentry)) {
error = -EFAULT;
goto out;
}
evm_symlink = securityfs_create_symlink("evm", NULL,
"integrity/evm/evm", NULL);
- if (!evm_symlink || IS_ERR(evm_symlink)) {
+ if (IS_ERR(evm_symlink)) {
error = -EFAULT;
goto out;
}
@@ -328,7 +328,6 @@ int __init evm_init_secfs(void)
return 0;
out:
securityfs_remove(evm_symlink);
- securityfs_remove(evm_init_tpm);
securityfs_remove(evm_dir);
return error;
}
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index e4a79a9b2d58..87045b09f120 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -116,28 +116,6 @@ void ima_putc(struct seq_file *m, void *data, int datalen)
seq_putc(m, *(char *)data++);
}
-static struct dentry **ascii_securityfs_measurement_lists __ro_after_init;
-static struct dentry **binary_securityfs_measurement_lists __ro_after_init;
-static int securityfs_measurement_list_count __ro_after_init;
-
-static void lookup_template_data_hash_algo(int *algo_idx, enum hash_algo *algo,
- struct seq_file *m,
- struct dentry **lists)
-{
- struct dentry *dentry;
- int i;
-
- dentry = file_dentry(m->file);
-
- for (i = 0; i < securityfs_measurement_list_count; i++) {
- if (dentry == lists[i]) {
- *algo_idx = i;
- *algo = ima_algo_array[i].algo;
- break;
- }
- }
-}
-
/* print format:
* 32bit-le=pcr#
* char[n]=template digest
@@ -160,9 +138,10 @@ int ima_measurements_show(struct seq_file *m, void *v)
algo_idx = ima_sha1_idx;
algo = HASH_ALGO_SHA1;
- if (m->file != NULL)
- lookup_template_data_hash_algo(&algo_idx, &algo, m,
- binary_securityfs_measurement_lists);
+ if (m->file != NULL) {
+ algo_idx = (unsigned long)file_inode(m->file)->i_private;
+ algo = ima_algo_array[algo_idx].algo;
+ }
/* get entry */
e = qe->entry;
@@ -256,9 +235,10 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
algo_idx = ima_sha1_idx;
algo = HASH_ALGO_SHA1;
- if (m->file != NULL)
- lookup_template_data_hash_algo(&algo_idx, &algo, m,
- ascii_securityfs_measurement_lists);
+ if (m->file != NULL) {
+ algo_idx = (unsigned long)file_inode(m->file)->i_private;
+ algo = ima_algo_array[algo_idx].algo;
+ }
/* get entry */
e = qe->entry;
@@ -396,11 +376,6 @@ out:
static struct dentry *ima_dir;
static struct dentry *ima_symlink;
-static struct dentry *binary_runtime_measurements;
-static struct dentry *ascii_runtime_measurements;
-static struct dentry *runtime_measurements_count;
-static struct dentry *violations;
-static struct dentry *ima_policy;
enum ima_fs_flags {
IMA_FS_BUSY,
@@ -417,64 +392,33 @@ static const struct seq_operations ima_policy_seqops = {
};
#endif
-static void __init remove_securityfs_measurement_lists(struct dentry **lists)
-{
- int i;
-
- if (lists) {
- for (i = 0; i < securityfs_measurement_list_count; i++)
- securityfs_remove(lists[i]);
-
- kfree(lists);
- }
-}
-
static int __init create_securityfs_measurement_lists(void)
{
- char file_name[NAME_MAX + 1];
- struct dentry *dentry;
- u16 algo;
- int i;
-
- securityfs_measurement_list_count = NR_BANKS(ima_tpm_chip);
+ int count = NR_BANKS(ima_tpm_chip);
if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip))
- securityfs_measurement_list_count++;
+ count++;
- ascii_securityfs_measurement_lists =
- kcalloc(securityfs_measurement_list_count, sizeof(struct dentry *),
- GFP_KERNEL);
- if (!ascii_securityfs_measurement_lists)
- return -ENOMEM;
-
- binary_securityfs_measurement_lists =
- kcalloc(securityfs_measurement_list_count, sizeof(struct dentry *),
- GFP_KERNEL);
- if (!binary_securityfs_measurement_lists)
- return -ENOMEM;
-
- for (i = 0; i < securityfs_measurement_list_count; i++) {
- algo = ima_algo_array[i].algo;
+ for (int i = 0; i < count; i++) {
+ u16 algo = ima_algo_array[i].algo;
+ char file_name[NAME_MAX + 1];
+ struct dentry *dentry;
sprintf(file_name, "ascii_runtime_measurements_%s",
hash_algo_name[algo]);
dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
- ima_dir, NULL,
+ ima_dir, (void *)(uintptr_t)i,
&ima_ascii_measurements_ops);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- ascii_securityfs_measurement_lists[i] = dentry;
-
sprintf(file_name, "binary_runtime_measurements_%s",
hash_algo_name[algo]);
dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP,
- ima_dir, NULL,
+ ima_dir, (void *)(uintptr_t)i,
&ima_measurements_ops);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
-
- binary_securityfs_measurement_lists[i] = dentry;
}
return 0;
@@ -533,8 +477,7 @@ static int ima_release_policy(struct inode *inode, struct file *file)
ima_update_policy();
#if !defined(CONFIG_IMA_WRITE_POLICY) && !defined(CONFIG_IMA_READ_POLICY)
- securityfs_remove(ima_policy);
- ima_policy = NULL;
+ securityfs_remove(file->f_path.dentry);
#elif defined(CONFIG_IMA_WRITE_POLICY)
clear_bit(IMA_FS_BUSY, &ima_fs_flags);
#elif defined(CONFIG_IMA_READ_POLICY)
@@ -553,11 +496,9 @@ static const struct file_operations ima_measure_policy_ops = {
int __init ima_fs_init(void)
{
+ struct dentry *dentry;
int ret;
- ascii_securityfs_measurement_lists = NULL;
- binary_securityfs_measurement_lists = NULL;
-
ima_dir = securityfs_create_dir("ima", integrity_dir);
if (IS_ERR(ima_dir))
return PTR_ERR(ima_dir);
@@ -573,57 +514,45 @@ int __init ima_fs_init(void)
if (ret != 0)
goto out;
- binary_runtime_measurements =
- securityfs_create_symlink("binary_runtime_measurements", ima_dir,
+ dentry = securityfs_create_symlink("binary_runtime_measurements", ima_dir,
"binary_runtime_measurements_sha1", NULL);
- if (IS_ERR(binary_runtime_measurements)) {
- ret = PTR_ERR(binary_runtime_measurements);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
goto out;
}
- ascii_runtime_measurements =
- securityfs_create_symlink("ascii_runtime_measurements", ima_dir,
+ dentry = securityfs_create_symlink("ascii_runtime_measurements", ima_dir,
"ascii_runtime_measurements_sha1", NULL);
- if (IS_ERR(ascii_runtime_measurements)) {
- ret = PTR_ERR(ascii_runtime_measurements);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
goto out;
}
- runtime_measurements_count =
- securityfs_create_file("runtime_measurements_count",
+ dentry = securityfs_create_file("runtime_measurements_count",
S_IRUSR | S_IRGRP, ima_dir, NULL,
&ima_measurements_count_ops);
- if (IS_ERR(runtime_measurements_count)) {
- ret = PTR_ERR(runtime_measurements_count);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
goto out;
}
- violations =
- securityfs_create_file("violations", S_IRUSR | S_IRGRP,
+ dentry = securityfs_create_file("violations", S_IRUSR | S_IRGRP,
ima_dir, NULL, &ima_htable_violations_ops);
- if (IS_ERR(violations)) {
- ret = PTR_ERR(violations);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
goto out;
}
- ima_policy = securityfs_create_file("policy", POLICY_FILE_FLAGS,
+ dentry = securityfs_create_file("policy", POLICY_FILE_FLAGS,
ima_dir, NULL,
&ima_measure_policy_ops);
- if (IS_ERR(ima_policy)) {
- ret = PTR_ERR(ima_policy);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
goto out;
}
return 0;
out:
- securityfs_remove(ima_policy);
- securityfs_remove(violations);
- securityfs_remove(runtime_measurements_count);
- securityfs_remove(ascii_runtime_measurements);
- securityfs_remove(binary_runtime_measurements);
- remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists);
- remove_securityfs_measurement_lists(binary_securityfs_measurement_lists);
- securityfs_measurement_list_count = 0;
securityfs_remove(ima_symlink);
securityfs_remove(ima_dir);
diff --git a/security/ipe/fs.c b/security/ipe/fs.c
index f40e50bfd2e7..0bb9468b8026 100644
--- a/security/ipe/fs.c
+++ b/security/ipe/fs.c
@@ -12,11 +12,8 @@
#include "policy.h"
#include "audit.h"
-static struct dentry *np __ro_after_init;
static struct dentry *root __ro_after_init;
struct dentry *policy_root __ro_after_init;
-static struct dentry *audit_node __ro_after_init;
-static struct dentry *enforce_node __ro_after_init;
/**
* setaudit() - Write handler for the securityfs node, "ipe/success_audit"
@@ -200,27 +197,26 @@ static int __init ipe_init_securityfs(void)
{
int rc = 0;
struct ipe_policy *ap;
+ struct dentry *dentry;
if (!ipe_enabled)
return -EOPNOTSUPP;
root = securityfs_create_dir("ipe", NULL);
- if (IS_ERR(root)) {
- rc = PTR_ERR(root);
- goto err;
- }
+ if (IS_ERR(root))
+ return PTR_ERR(root);
- audit_node = securityfs_create_file("success_audit", 0600, root,
+ dentry = securityfs_create_file("success_audit", 0600, root,
NULL, &audit_fops);
- if (IS_ERR(audit_node)) {
- rc = PTR_ERR(audit_node);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
goto err;
}
- enforce_node = securityfs_create_file("enforce", 0600, root, NULL,
+ dentry = securityfs_create_file("enforce", 0600, root, NULL,
&enforce_fops);
- if (IS_ERR(enforce_node)) {
- rc = PTR_ERR(enforce_node);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
goto err;
}
@@ -237,18 +233,14 @@ static int __init ipe_init_securityfs(void)
goto err;
}
- np = securityfs_create_file("new_policy", 0200, root, NULL, &np_fops);
- if (IS_ERR(np)) {
- rc = PTR_ERR(np);
+ dentry = securityfs_create_file("new_policy", 0200, root, NULL, &np_fops);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
goto err;
}
return 0;
err:
- securityfs_remove(np);
- securityfs_remove(policy_root);
- securityfs_remove(enforce_node);
- securityfs_remove(audit_node);
securityfs_remove(root);
return rc;
}
diff --git a/security/ipe/policy_fs.c b/security/ipe/policy_fs.c
index db26032ccbe1..9d92d8a14b13 100644
--- a/security/ipe/policy_fs.c
+++ b/security/ipe/policy_fs.c
@@ -438,7 +438,7 @@ static const struct ipefs_file policy_subdir[] = {
*/
void ipe_del_policyfs_node(struct ipe_policy *p)
{
- securityfs_recursive_remove(p->policyfs);
+ securityfs_remove(p->policyfs);
p->policyfs = NULL;
}
@@ -485,6 +485,6 @@ int ipe_new_policyfs_node(struct ipe_policy *p)
return 0;
err:
- securityfs_recursive_remove(policyfs);
+ securityfs_remove(policyfs);
return rc;
}
diff --git a/security/keys/gc.c b/security/keys/gc.c
index f27223ea4578..748e83818a76 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -218,8 +218,8 @@ continue_scanning:
key = rb_entry(cursor, struct key, serial_node);
cursor = rb_next(cursor);
- if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) {
- smp_mb(); /* Clobber key->user after FINAL_PUT seen. */
+ if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) {
+ /* Clobber key->user after final put seen. */
goto found_unreferenced_key;
}
diff --git a/security/keys/key.c b/security/keys/key.c
index 7198cd2ac3a3..3bbdde778631 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
key->restrict_link = restrict_link;
key->last_used_at = ktime_get_real_seconds();
+ key->flags |= 1 << KEY_FLAG_USER_ALIVE;
if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
key->flags |= 1 << KEY_FLAG_IN_QUOTA;
if (flags & KEY_ALLOC_BUILT_IN)
@@ -658,8 +659,8 @@ void key_put(struct key *key)
key->user->qnbytes -= key->quotalen;
spin_unlock_irqrestore(&key->user->lock, flags);
}
- smp_mb(); /* key->user before FINAL_PUT set. */
- set_bit(KEY_FLAG_FINAL_PUT, &key->flags);
+ /* Mark key as safe for GC after key->user done. */
+ clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags);
schedule_work(&key_gc_work);
}
}
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 33eafb71e4f3..0116e9f93ffe 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -303,7 +303,6 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
if ((fd_file(f)->f_op == &ruleset_fops) ||
(fd_file(f)->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
(fd_file(f)->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
- d_is_negative(fd_file(f)->f_path.dentry) ||
IS_PRIVATE(d_backing_inode(fd_file(f)->f_path.dentry)))
return -EBADFD;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 7becf3808818..d185754c2786 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1909,11 +1909,17 @@ retry:
goto out_unlock;
}
/* Obtain the sid for the context. */
- rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid);
- if (rc == -ESTALE) {
- rcu_read_unlock();
- context_destroy(&newcontext);
- goto retry;
+ if (context_equal(scontext, &newcontext))
+ *out_sid = ssid;
+ else if (context_equal(tcontext, &newcontext))
+ *out_sid = tsid;
+ else {
+ rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid);
+ if (rc == -ESTALE) {
+ rcu_read_unlock();
+ context_destroy(&newcontext);
+ goto retry;
+ }
}
out_unlock:
rcu_read_unlock();
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 90ec4ef1b082..61d56b0c2be1 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -94,7 +94,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp,
ctx->ctx_doi = XFRM_SC_DOI_LSM;
ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
- ctx->ctx_len = str_len;
+ ctx->ctx_len = str_len + 1;
memcpy(ctx->ctx_str, &uctx[1], str_len);
ctx->ctx_str[str_len] = '\0';
rc = security_context_to_sid(ctx->ctx_str, str_len,
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 840bb9cfe789..a66f258cafaa 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -1269,62 +1269,62 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
stream = &data->stream;
guard(mutex)(&stream->device->lock);
- switch (_IOC_NR(cmd)) {
- case _IOC_NR(SNDRV_COMPRESS_IOCTL_VERSION):
+ switch (cmd) {
+ case SNDRV_COMPRESS_IOCTL_VERSION:
return put_user(SNDRV_COMPRESS_VERSION,
(int __user *)arg) ? -EFAULT : 0;
- case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
+ case SNDRV_COMPRESS_GET_CAPS:
return snd_compr_get_caps(stream, arg);
#ifndef COMPR_CODEC_CAPS_OVERFLOW
- case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
+ case SNDRV_COMPRESS_GET_CODEC_CAPS:
return snd_compr_get_codec_caps(stream, arg);
#endif
- case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
+ case SNDRV_COMPRESS_SET_PARAMS:
return snd_compr_set_params(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_GET_PARAMS):
+ case SNDRV_COMPRESS_GET_PARAMS:
return snd_compr_get_params(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_SET_METADATA):
+ case SNDRV_COMPRESS_SET_METADATA:
return snd_compr_set_metadata(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_GET_METADATA):
+ case SNDRV_COMPRESS_GET_METADATA:
return snd_compr_get_metadata(stream, arg);
}
if (stream->direction == SND_COMPRESS_ACCEL) {
#if IS_ENABLED(CONFIG_SND_COMPRESS_ACCEL)
- switch (_IOC_NR(cmd)) {
- case _IOC_NR(SNDRV_COMPRESS_TASK_CREATE):
+ switch (cmd) {
+ case SNDRV_COMPRESS_TASK_CREATE:
return snd_compr_task_create(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_TASK_FREE):
+ case SNDRV_COMPRESS_TASK_FREE:
return snd_compr_task_seq(stream, arg, snd_compr_task_free_one);
- case _IOC_NR(SNDRV_COMPRESS_TASK_START):
+ case SNDRV_COMPRESS_TASK_START:
return snd_compr_task_start_ioctl(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_TASK_STOP):
+ case SNDRV_COMPRESS_TASK_STOP:
return snd_compr_task_seq(stream, arg, snd_compr_task_stop_one);
- case _IOC_NR(SNDRV_COMPRESS_TASK_STATUS):
+ case SNDRV_COMPRESS_TASK_STATUS:
return snd_compr_task_status_ioctl(stream, arg);
}
#endif
return -ENOTTY;
}
- switch (_IOC_NR(cmd)) {
- case _IOC_NR(SNDRV_COMPRESS_TSTAMP):
+ switch (cmd) {
+ case SNDRV_COMPRESS_TSTAMP:
return snd_compr_tstamp(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_AVAIL):
+ case SNDRV_COMPRESS_AVAIL:
return snd_compr_ioctl_avail(stream, arg);
- case _IOC_NR(SNDRV_COMPRESS_PAUSE):
+ case SNDRV_COMPRESS_PAUSE:
return snd_compr_pause(stream);
- case _IOC_NR(SNDRV_COMPRESS_RESUME):
+ case SNDRV_COMPRESS_RESUME:
return snd_compr_resume(stream);
- case _IOC_NR(SNDRV_COMPRESS_START):
+ case SNDRV_COMPRESS_START:
return snd_compr_start(stream);
- case _IOC_NR(SNDRV_COMPRESS_STOP):
+ case SNDRV_COMPRESS_STOP:
return snd_compr_stop(stream);
- case _IOC_NR(SNDRV_COMPRESS_DRAIN):
+ case SNDRV_COMPRESS_DRAIN:
return snd_compr_drain(stream);
- case _IOC_NR(SNDRV_COMPRESS_PARTIAL_DRAIN):
+ case SNDRV_COMPRESS_PARTIAL_DRAIN:
return snd_compr_partial_drain(stream);
- case _IOC_NR(SNDRV_COMPRESS_NEXT_TRACK):
+ case SNDRV_COMPRESS_NEXT_TRACK:
return snd_compr_next_track(stream);
}
diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c
index 99006dc4777e..5c9e2d41d900 100644
--- a/sound/isa/ad1816a/ad1816a.c
+++ b/sound/isa/ad1816a/ad1816a.c
@@ -98,7 +98,7 @@ static int snd_card_ad1816a_pnp(int dev, struct pnp_card_link *card,
pdev = pnp_request_card_device(card, id->devs[1].id, NULL);
if (pdev == NULL) {
mpu_port[dev] = -1;
- dev_warn(&pdev->dev, "MPU401 device busy, skipping.\n");
+ pr_warn("MPU401 device busy, skipping.\n");
return 0;
}
diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c
index 74db11525003..5a083eecaa6b 100644
--- a/sound/isa/sb/sb16_main.c
+++ b/sound/isa/sb/sb16_main.c
@@ -703,6 +703,9 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct
unsigned char nval, oval;
int change;
+ if (chip->mode & (SB_MODE_PLAYBACK | SB_MODE_CAPTURE))
+ return -EBUSY;
+
nval = ucontrol->value.enumerated.item[0];
if (nval > 2)
return -EINVAL;
@@ -711,6 +714,10 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct
change = nval != oval;
snd_sb16_set_dma_mode(chip, nval);
spin_unlock_irqrestore(&chip->reg_lock, flags);
+ if (change) {
+ snd_dma_disable(chip->dma8);
+ snd_dma_disable(chip->dma16);
+ }
return change;
}
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index 713d36ea40cb..d8dd84d41c87 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -98,8 +98,8 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
if (err < 0)
goto error;
- strcpy(card->driver, "SB-XFi");
- strcpy(card->shortname, "Creative X-Fi");
+ strscpy(card->driver, "SB-XFi");
+ strscpy(card->shortname, "Creative X-Fi");
snprintf(card->longname, sizeof(card->longname), "%s %s %s",
card->shortname, atc->chip_name, atc->model_name);
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 3f2fd32f4ad9..886c53184fec 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -873,6 +873,52 @@ static int cs35l56_hda_system_resume(struct device *dev)
return 0;
}
+static int cs35l56_hda_fixup_yoga9(struct cs35l56_hda *cs35l56, int *bus_addr)
+{
+ /* The cirrus,dev-index property has the wrong values */
+ switch (*bus_addr) {
+ case 0x30:
+ cs35l56->index = 1;
+ return 0;
+ case 0x31:
+ cs35l56->index = 0;
+ return 0;
+ default:
+ /* There is a pseudo-address for broadcast to both amps - ignore it */
+ dev_dbg(cs35l56->base.dev, "Ignoring I2C address %#x\n", *bus_addr);
+ return 0;
+ }
+}
+
+static const struct {
+ const char *sub;
+ int (*fixup_fn)(struct cs35l56_hda *cs35l56, int *bus_addr);
+} cs35l56_hda_fixups[] = {
+ {
+ .sub = "17AA390B", /* Lenovo Yoga Book 9i GenX */
+ .fixup_fn = cs35l56_hda_fixup_yoga9,
+ },
+};
+
+static int cs35l56_hda_apply_platform_fixups(struct cs35l56_hda *cs35l56, const char *sub,
+ int *bus_addr)
+{
+ int i;
+
+ if (IS_ERR(sub))
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(cs35l56_hda_fixups); i++) {
+ if (strcasecmp(cs35l56_hda_fixups[i].sub, sub) == 0) {
+ dev_dbg(cs35l56->base.dev, "Applying fixup for %s\n",
+ cs35l56_hda_fixups[i].sub);
+ return (cs35l56_hda_fixups[i].fixup_fn)(cs35l56, bus_addr);
+ }
+ }
+
+ return 0;
+}
+
static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id)
{
u32 values[HDA_MAX_COMPONENTS];
@@ -897,39 +943,47 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id)
ACPI_COMPANION_SET(cs35l56->base.dev, adev);
}
- property = "cirrus,dev-index";
- ret = device_property_count_u32(cs35l56->base.dev, property);
- if (ret <= 0)
- goto err;
-
- if (ret > ARRAY_SIZE(values)) {
- ret = -EINVAL;
- goto err;
- }
- nval = ret;
+ /* Initialize things that could be overwritten by a fixup */
+ cs35l56->index = -1;
- ret = device_property_read_u32_array(cs35l56->base.dev, property, values, nval);
+ sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev));
+ ret = cs35l56_hda_apply_platform_fixups(cs35l56, sub, &id);
if (ret)
- goto err;
+ return ret;
- cs35l56->index = -1;
- for (i = 0; i < nval; i++) {
- if (values[i] == id) {
- cs35l56->index = i;
- break;
- }
- }
- /*
- * It's not an error for the ID to be missing: for I2C there can be
- * an alias address that is not a real device. So reject silently.
- */
if (cs35l56->index == -1) {
- dev_dbg(cs35l56->base.dev, "No index found in %s\n", property);
- ret = -ENODEV;
- goto err;
- }
+ property = "cirrus,dev-index";
+ ret = device_property_count_u32(cs35l56->base.dev, property);
+ if (ret <= 0)
+ goto err;
- sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev));
+ if (ret > ARRAY_SIZE(values)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ nval = ret;
+
+ ret = device_property_read_u32_array(cs35l56->base.dev, property, values, nval);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < nval; i++) {
+ if (values[i] == id) {
+ cs35l56->index = i;
+ break;
+ }
+ }
+
+ /*
+ * It's not an error for the ID to be missing: for I2C there can be
+ * an alias address that is not a real device. So reject silently.
+ */
+ if (cs35l56->index == -1) {
+ dev_dbg(cs35l56->base.dev, "No index found in %s\n", property);
+ ret = -ENODEV;
+ goto err;
+ }
+ }
if (IS_ERR(sub)) {
dev_info(cs35l56->base.dev,
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index e5210ed48ddf..439cf1bda6e6 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2283,6 +2283,8 @@ static const struct snd_pci_quirk power_save_denylist[] = {
SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0),
/* Dell ALC3271 */
SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0),
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=220210 */
+ SND_PCI_QUIRK(0x17aa, 0x5079, "Lenovo Thinkpad E15", 0),
{}
};
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 08308231b4ed..9a7793eb16e9 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4551,7 +4551,9 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi),
HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi),
+HDA_CODEC_ENTRY(0x10de0033, "SoC 33 HDMI/DP", patch_tegra234_hdmi),
HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi),
+HDA_CODEC_ENTRY(0x10de0035, "SoC 35 HDMI/DP", patch_tegra234_hdmi),
HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi),
@@ -4590,15 +4592,32 @@ HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de009b, "GPU 9b HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de009c, "GPU 9c HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a1, "GPU a1 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a8, "GPU a8 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a9, "GPU a9 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00aa, "GPU aa HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00ab, "GPU ab HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00ad, "GPU ad HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00ae, "GPU ae HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00af, "GPU af HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00b0, "GPU b0 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00b1, "GPU b1 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00c0, "GPU c0 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00c1, "GPU c1 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00c3, "GPU c3 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00c4, "GPU c4 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00c5, "GPU c5 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index cd0d7ba7320e..2627e2f49316 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2656,6 +2656,7 @@ static const struct hda_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x5802, "Clevo X58[05]WN[RST]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
@@ -4752,7 +4753,7 @@ static void alc245_fixup_hp_mute_led_v1_coefbit(struct hda_codec *codec,
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
spec->mute_led_polarity = 0;
spec->mute_led_coef.idx = 0x0b;
- spec->mute_led_coef.mask = 1 << 3;
+ spec->mute_led_coef.mask = 3 << 2;
spec->mute_led_coef.on = 1 << 3;
spec->mute_led_coef.off = 0;
snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
@@ -6609,6 +6610,7 @@ static void alc294_fixup_bass_speaker_15(struct hda_codec *codec,
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
static const hda_nid_t conn[] = { 0x02, 0x03 };
snd_hda_override_conn_list(codec, 0x15, ARRAY_SIZE(conn), conn);
+ snd_hda_gen_add_micmute_led_cdev(codec, NULL);
}
}
@@ -7495,6 +7497,9 @@ static void alc287_fixup_yoga9_14iap7_bass_spk_pin(struct hda_codec *codec,
};
struct alc_spec *spec = codec->spec;
+ /* Support Audio mute LED and Mic mute LED on keyboard */
+ hda_fixup_ideapad_acpi(codec, fix, action);
+
switch (action) {
case HDA_FIXUP_ACT_PRE_PROBE:
snd_hda_apply_pincfgs(codec, pincfgs);
@@ -8030,6 +8035,9 @@ enum {
ALC294_FIXUP_ASUS_CS35L41_SPI_2,
ALC274_FIXUP_HP_AIO_BIND_DACS,
ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2,
+ ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1,
+ ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -10414,6 +10422,26 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc274_fixup_hp_aio_bind_dacs,
},
+ [ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1
+ },
+ [ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ },
+ [ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_limit_int_mic_boost,
+ .chained = true,
+ .chain_id = ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE,
+ },
};
static const struct hda_quirk alc269_fixup_tbl[] = {
@@ -10509,6 +10537,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB),
+ SND_PCI_QUIRK(0x1028, 0x0879, "Dell Latitude 5420 Rugged", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
@@ -10665,6 +10694,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -10713,6 +10743,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
+ SND_PCI_QUIRK(0x103c, 0x898a, "HP Pavilion 15-eg100", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
@@ -10742,6 +10773,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8a2e, "HP Envy 16", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4),
SND_PCI_QUIRK(0x103c, 0x8a74, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
@@ -10787,6 +10819,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8bb3, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8bb4, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8bbe, "HP Victus 16-r0xxx (MB 8BBE)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
+ SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10840,6 +10874,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -10852,6 +10887,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS),
SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED),
@@ -10881,7 +10917,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8dfb, "HP EliteBook 6 G1a 14", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8dfd, "HP EliteBook 6 G1a 16", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e12, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10904,6 +10942,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10932,6 +10972,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x1314, "ASUS GA605K", ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC),
@@ -10971,6 +11012,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1a8e, "ASUS G712LWS", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
@@ -10997,6 +11039,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606WA", ALC294_FIXUP_BASS_SPEAKER_15),
SND_PCI_QUIRK(0x1043, 0x1264, "ASUS UM5606KA", ALC294_FIXUP_BASS_SPEAKER_15),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1e10, "ASUS VivoBook X507UAR", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC),
@@ -11005,6 +11048,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
@@ -11106,6 +11150,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1558, 0x35a1, "Clevo V3[56]0EN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x35b1, "Clevo V3[57]0WN[MNP]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -11133,6 +11179,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x5700, "Clevo X560WN[RST]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -11172,6 +11219,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xa741, "Clevo V54x_6x_TNE", ALC245_FIXUP_CLEVO_NOISY_MIC),
+ SND_PCI_QUIRK(0x1558, 0xa743, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC),
SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC),
SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -11384,6 +11432,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13),
SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
+ SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC),
+ SND_PCI_QUIRK(0x2782, 0x1409, "Positivo K116J", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC),
SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX),
SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX),
SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c
index 5f1d4b3e9688..34217ce9f28e 100644
--- a/sound/pci/hda/tas2781_hda.c
+++ b/sound/pci/hda/tas2781_hda.c
@@ -44,7 +44,7 @@ static void tas2781_apply_calib(struct tasdevice_priv *p)
TASDEVICE_REG(0, 0x13, 0x70),
TASDEVICE_REG(0, 0x18, 0x7c),
};
- unsigned int crc, oft;
+ unsigned int crc, oft, node_num;
unsigned char *buf;
int i, j, k, l;
@@ -80,8 +80,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p)
dev_err(p->dev, "%s: CRC error\n", __func__);
return;
}
+ node_num = tmp_val[1];
- for (j = 0, k = 0; j < tmp_val[1]; j++) {
+ for (j = 0, k = 0; j < node_num; j++) {
oft = j * 6 + 3;
if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) {
for (i = 0; i < TASDEV_CALIB_N; i++) {
@@ -99,8 +100,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p)
}
data[l] = k;
+ oft++;
for (i = 0; i < TASDEV_CALIB_N * 4; i++)
- data[l + i] = data[4 * oft + i];
+ data[l + i + 1] = data[4 * oft + i];
k++;
}
}
diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h
index 85feae45c44c..d7c994e26e4d 100644
--- a/sound/soc/amd/ps/acp63.h
+++ b/sound/soc/amd/ps/acp63.h
@@ -334,6 +334,8 @@ struct acp_hw_ops {
* @addr: pci ioremap address
* @reg_range: ACP reigister range
* @acp_rev: ACP PCI revision id
+ * @acp_sw_pad_keeper_en: store acp SoundWire pad keeper enable register value
+ * @acp_pad_pulldown_ctrl: store acp pad pulldown control register value
* @acp63_sdw0-dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire
* manager-SW0 instance
* @acp63_sdw_dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire
@@ -367,6 +369,8 @@ struct acp63_dev_data {
u32 addr;
u32 reg_range;
u32 acp_rev;
+ u32 acp_sw_pad_keeper_en;
+ u32 acp_pad_pulldown_ctrl;
u16 acp63_sdw0_dma_intr_stat[ACP63_SDW0_DMA_MAX_STREAMS];
u16 acp63_sdw1_dma_intr_stat[ACP63_SDW1_DMA_MAX_STREAMS];
u16 acp70_sdw0_dma_intr_stat[ACP70_SDW0_DMA_MAX_STREAMS];
diff --git a/sound/soc/amd/ps/ps-common.c b/sound/soc/amd/ps/ps-common.c
index 1c89fb5fe1da..7b4966b75dc6 100644
--- a/sound/soc/amd/ps/ps-common.c
+++ b/sound/soc/amd/ps/ps-common.c
@@ -160,6 +160,8 @@ static int __maybe_unused snd_acp63_suspend(struct device *dev)
adata = dev_get_drvdata(dev);
if (adata->is_sdw_dev) {
+ adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL);
adata->sdw_en_stat = check_acp_sdw_enable_status(adata);
if (adata->sdw_en_stat) {
writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL);
@@ -197,6 +199,7 @@ static int __maybe_unused snd_acp63_runtime_resume(struct device *dev)
static int __maybe_unused snd_acp63_resume(struct device *dev)
{
struct acp63_dev_data *adata;
+ u32 acp_sw_pad_keeper_en;
int ret;
adata = dev_get_drvdata(dev);
@@ -209,6 +212,12 @@ static int __maybe_unused snd_acp63_resume(struct device *dev)
if (ret)
dev_err(dev, "ACP init failed\n");
+ acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en);
+ if (!acp_sw_pad_keeper_en) {
+ writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL);
+ }
return ret;
}
@@ -408,6 +417,8 @@ static int __maybe_unused snd_acp70_suspend(struct device *dev)
adata = dev_get_drvdata(dev);
if (adata->is_sdw_dev) {
+ adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL);
adata->sdw_en_stat = check_acp_sdw_enable_status(adata);
if (adata->sdw_en_stat) {
writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL);
@@ -445,6 +456,7 @@ static int __maybe_unused snd_acp70_runtime_resume(struct device *dev)
static int __maybe_unused snd_acp70_resume(struct device *dev)
{
struct acp63_dev_data *adata;
+ u32 acp_sw_pad_keeper_en;
int ret;
adata = dev_get_drvdata(dev);
@@ -459,6 +471,12 @@ static int __maybe_unused snd_acp70_resume(struct device *dev)
if (ret)
dev_err(dev, "ACP init failed\n");
+ acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en);
+ if (!acp_sw_pad_keeper_en) {
+ writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN);
+ writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL);
+ }
return ret;
}
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 7e62445e02c1..f210a253da9f 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -356,6 +356,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "RB"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Nitro ANV15-41"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "83J2"),
}
@@ -363,6 +370,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "83J3"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"),
}
@@ -406,6 +420,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M6501RM"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "E1404FA"),
}
},
@@ -454,6 +475,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 17 D7VF"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
}
@@ -518,6 +546,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb1xxx"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb2xxx"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
DMI_MATCH(DMI_BOARD_NAME, "8A42"),
}
},
@@ -560,6 +602,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_BOARD_NAME, "8A81"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
DMI_MATCH(DMI_BOARD_NAME, "8B27"),
}
},
diff --git a/sound/soc/apple/Kconfig b/sound/soc/apple/Kconfig
index 793f7782e0d7..e9c777cdb6e3 100644
--- a/sound/soc/apple/Kconfig
+++ b/sound/soc/apple/Kconfig
@@ -2,7 +2,6 @@ config SND_SOC_APPLE_MCA
tristate "Apple Silicon MCA driver"
depends on ARCH_APPLE || COMPILE_TEST
select SND_DMAENGINE_PCM
- default ARCH_APPLE
help
This option enables an ASoC platform driver for MCA peripherals found
on Apple Silicon SoCs.
diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c
index 13f602f51bf3..fa9693af3722 100644
--- a/sound/soc/codecs/cs35l56-sdw.c
+++ b/sound/soc/codecs/cs35l56-sdw.c
@@ -238,16 +238,15 @@ static const struct regmap_bus cs35l56_regmap_bus_sdw = {
.val_format_endian_default = REGMAP_ENDIAN_BIG,
};
-static int cs35l56_sdw_set_cal_index(struct cs35l56_private *cs35l56)
+static int cs35l56_sdw_get_unique_id(struct cs35l56_private *cs35l56)
{
int ret;
- /* SoundWire UniqueId is used to index the calibration array */
ret = sdw_read_no_pm(cs35l56->sdw_peripheral, SDW_SCP_DEVID_0);
if (ret < 0)
return ret;
- cs35l56->base.cal_index = ret & 0xf;
+ cs35l56->sdw_unique_id = ret & 0xf;
return 0;
}
@@ -259,11 +258,13 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral)
pm_runtime_get_noresume(cs35l56->base.dev);
- if (cs35l56->base.cal_index < 0) {
- ret = cs35l56_sdw_set_cal_index(cs35l56);
- if (ret < 0)
- goto out;
- }
+ ret = cs35l56_sdw_get_unique_id(cs35l56);
+ if (ret)
+ goto out;
+
+ /* SoundWire UniqueId is used to index the calibration array */
+ if (cs35l56->base.cal_index < 0)
+ cs35l56->base.cal_index = cs35l56->sdw_unique_id;
ret = cs35l56_init(cs35l56);
if (ret < 0) {
@@ -587,6 +588,7 @@ static int cs35l56_sdw_probe(struct sdw_slave *peripheral, const struct sdw_devi
cs35l56->base.dev = dev;
cs35l56->sdw_peripheral = peripheral;
+ cs35l56->sdw_link_num = peripheral->bus->link_id;
INIT_WORK(&cs35l56->sdw_irq_work, cs35l56_sdw_irq_work);
dev_set_drvdata(dev, cs35l56);
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c
index d0831d609584..ba653f6ccfae 100644
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -980,7 +980,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
break;
default:
dev_err(cs35l56_base->dev, "Unknown device %x\n", devid);
- return ret;
+ return -ENODEV;
}
cs35l56_base->type = devid & 0xFF;
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index c78e4746e428..1b42586794ad 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -706,17 +706,41 @@ static int cs35l56_write_cal(struct cs35l56_private *cs35l56)
return ret;
}
-static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56)
+static int cs35l56_dsp_download_and_power_up(struct cs35l56_private *cs35l56,
+ bool load_firmware)
{
int ret;
- /* Use wm_adsp to load and apply the firmware patch and coefficient files */
- ret = wm_adsp_power_up(&cs35l56->dsp, true);
+ /*
+ * Abort the first load if it didn't find the suffixed bins and
+ * we have an alternate fallback suffix.
+ */
+ cs35l56->dsp.bin_mandatory = (load_firmware && cs35l56->fallback_fw_suffix);
+
+ ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware);
+ if ((ret == -ENOENT) && cs35l56->dsp.bin_mandatory) {
+ cs35l56->dsp.fwf_suffix = cs35l56->fallback_fw_suffix;
+ cs35l56->fallback_fw_suffix = NULL;
+ cs35l56->dsp.bin_mandatory = false;
+ ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware);
+ }
+
if (ret) {
- dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret);
- return;
+ dev_dbg(cs35l56->base.dev, "wm_adsp_power_up ret %d\n", ret);
+ return ret;
}
+ return 0;
+}
+
+static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56)
+{
+ int ret;
+
+ ret = cs35l56_dsp_download_and_power_up(cs35l56, true);
+ if (ret)
+ return;
+
cs35l56_write_cal(cs35l56);
/* Always REINIT after applying patch or coefficients */
@@ -750,11 +774,9 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing
* but only if firmware is missing. If firmware is already patched just
* power-up wm_adsp without downloading firmware.
*/
- ret = wm_adsp_power_up(&cs35l56->dsp, !!firmware_missing);
- if (ret) {
- dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret);
+ ret = cs35l56_dsp_download_and_power_up(cs35l56, firmware_missing);
+ if (ret)
goto err;
- }
mutex_lock(&cs35l56->base.irq_lock);
@@ -853,6 +875,34 @@ err:
pm_runtime_put_autosuspend(cs35l56->base.dev);
}
+static int cs35l56_set_fw_suffix(struct cs35l56_private *cs35l56)
+{
+ if (cs35l56->dsp.fwf_suffix)
+ return 0;
+
+ if (!cs35l56->sdw_peripheral)
+ return 0;
+
+ cs35l56->dsp.fwf_suffix = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL,
+ "l%uu%u",
+ cs35l56->sdw_link_num,
+ cs35l56->sdw_unique_id);
+ if (!cs35l56->dsp.fwf_suffix)
+ return -ENOMEM;
+
+ /*
+ * There are published firmware files for L56 B0 silicon using
+ * the ALSA prefix as the filename suffix. Default to trying these
+ * first, with the new name as an alternate.
+ */
+ if ((cs35l56->base.type == 0x56) && (cs35l56->base.rev == 0xb0)) {
+ cs35l56->fallback_fw_suffix = cs35l56->dsp.fwf_suffix;
+ cs35l56->dsp.fwf_suffix = cs35l56->component->name_prefix;
+ }
+
+ return 0;
+}
+
static int cs35l56_component_probe(struct snd_soc_component *component)
{
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
@@ -892,6 +942,10 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
return -ENOMEM;
cs35l56->component = component;
+ ret = cs35l56_set_fw_suffix(cs35l56);
+ if (ret)
+ return ret;
+
wm_adsp2_component_probe(&cs35l56->dsp, component);
debugfs_create_bool("init_done", 0444, debugfs_root, &cs35l56->base.init_done);
diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h
index 200f695efca3..bd77a57249d7 100644
--- a/sound/soc/codecs/cs35l56.h
+++ b/sound/soc/codecs/cs35l56.h
@@ -38,6 +38,7 @@ struct cs35l56_private {
struct snd_soc_component *component;
struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES];
struct sdw_slave *sdw_peripheral;
+ const char *fallback_fw_suffix;
struct work_struct sdw_irq_work;
bool sdw_irq_no_unmask;
bool soft_resetting;
@@ -52,6 +53,8 @@ struct cs35l56_private {
bool tdm_mode;
bool sysclk_set;
u8 old_sdw_clock_scale;
+ u8 sdw_link_num;
+ u8 sdw_unique_id;
};
extern const struct dev_pm_ops cs35l56_pm_ops_i2c_spi;
diff --git a/sound/soc/codecs/cs48l32.c b/sound/soc/codecs/cs48l32.c
index 90a795230d27..9bdd48aab42a 100644
--- a/sound/soc/codecs/cs48l32.c
+++ b/sound/soc/codecs/cs48l32.c
@@ -2162,6 +2162,10 @@ static int cs48l32_hw_params(struct snd_pcm_substream *substream,
n_slots_multiple = 1;
sclk_target = snd_soc_tdm_params_to_bclk(params, slotw, n_slots, n_slots_multiple);
+ if (sclk_target < 0) {
+ cs48l32_asp_err(dai, "Invalid parameters\n");
+ return sclk_target;
+ }
for (i = 0; i < ARRAY_SIZE(cs48l32_sclk_rates); i++) {
if ((cs48l32_sclk_rates[i].freq >= sclk_target) &&
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index 066d92b54312..78c4e68f6002 100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -1079,8 +1079,7 @@ static void es8326_init(struct snd_soc_component *component)
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
regmap_write(es8326->regmap, ES8326_INTOUT_IO,
es8326->interrupt_clk);
- regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
- (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT));
+ regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT);
regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT);
regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 08df87238eee..29a403526cd9 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -82,6 +82,7 @@ static const struct reg_sequence rt5650_init_list[] = {
{0xf6, 0x0100},
{RT5645_PWR_ANLG1, 0x02},
{RT5645_IL_CMD3, 0x6728},
+ {RT5645_PR_BASE + 0x3a, 0x0000},
};
static const struct reg_default rt5645_reg[] = {
diff --git a/sound/soc/codecs/rt5660.c b/sound/soc/codecs/rt5660.c
index 82b92e83be4c..44c3a3b92f98 100644
--- a/sound/soc/codecs/rt5660.c
+++ b/sound/soc/codecs/rt5660.c
@@ -1315,14 +1315,17 @@ static int rt5660_i2c_probe(struct i2c_client *i2c)
regmap_update_bits(rt5660->regmap, RT5660_GPIO_CTRL1,
RT5660_GP1_PIN_MASK, RT5660_GP1_PIN_DMIC1_SCL);
- if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_GPIO2)
+ if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_GPIO2) {
regmap_update_bits(rt5660->regmap, RT5660_DMIC_CTRL1,
RT5660_SEL_DMIC_DATA_MASK,
RT5660_SEL_DMIC_DATA_GPIO2);
- else if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_IN1P)
+ regmap_update_bits(rt5660->regmap, RT5660_GPIO_CTRL1,
+ RT5660_GP2_PIN_MASK, RT5660_GP2_PIN_DMIC1_SDA);
+ } else if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_IN1P) {
regmap_update_bits(rt5660->regmap, RT5660_DMIC_CTRL1,
RT5660_SEL_DMIC_DATA_MASK,
RT5660_SEL_DMIC_DATA_IN1P);
+ }
}
return devm_snd_soc_register_component(&i2c->dev,
diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c
index 1c9f32e405cf..ba080957e933 100644
--- a/sound/soc/codecs/rt721-sdca.c
+++ b/sound/soc/codecs/rt721-sdca.c
@@ -430,6 +430,7 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol,
unsigned int read_l, read_r, ctl_l = 0, ctl_r = 0;
unsigned int adc_vol_flag = 0;
const unsigned int interval_offset = 0xc0;
+ const unsigned int tendA = 0x200;
const unsigned int tendB = 0xa00;
if (strstr(ucontrol->id.name, "FU1E Capture Volume") ||
@@ -439,9 +440,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol,
regmap_read(rt721->mbq_regmap, mc->reg, &read_l);
regmap_read(rt721->mbq_regmap, mc->rreg, &read_r);
- if (mc->shift == 8) /* boost gain */
+ if (mc->shift == 8) {
+ /* boost gain */
ctl_l = read_l / tendB;
- else {
+ } else if (mc->shift == 1) {
+ /* FU33 boost gain */
+ if (read_l == 0x8000 || read_l == 0xfe00)
+ ctl_l = 0;
+ else
+ ctl_l = read_l / tendA + 1;
+ } else {
if (adc_vol_flag)
ctl_l = mc->max - (((0x1e00 - read_l) & 0xffff) / interval_offset);
else
@@ -449,9 +457,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol,
}
if (read_l != read_r) {
- if (mc->shift == 8) /* boost gain */
+ if (mc->shift == 8) {
+ /* boost gain */
ctl_r = read_r / tendB;
- else { /* ADC/DAC gain */
+ } else if (mc->shift == 1) {
+ /* FU33 boost gain */
+ if (read_r == 0x8000 || read_r == 0xfe00)
+ ctl_r = 0;
+ else
+ ctl_r = read_r / tendA + 1;
+ } else { /* ADC/DAC gain */
if (adc_vol_flag)
ctl_r = mc->max - (((0x1e00 - read_r) & 0xffff) / interval_offset);
else
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 3c580faab3b7..8a1d5cc75d6c 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -783,16 +783,19 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
char **coeff_filename)
{
const char *system_name = dsp->system_name;
- const char *asoc_component_prefix = dsp->component->name_prefix;
+ const char *suffix = dsp->component->name_prefix;
int ret = 0;
- if (system_name && asoc_component_prefix) {
+ if (dsp->fwf_suffix)
+ suffix = dsp->fwf_suffix;
+
+ if (system_name && suffix) {
if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
cirrus_dir, system_name,
- asoc_component_prefix, "wmfw")) {
+ suffix, "wmfw")) {
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
cirrus_dir, system_name,
- asoc_component_prefix, "bin");
+ suffix, "bin");
return 0;
}
}
@@ -801,10 +804,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
cirrus_dir, system_name,
NULL, "wmfw")) {
- if (asoc_component_prefix)
+ if (suffix)
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
cirrus_dir, system_name,
- asoc_component_prefix, "bin");
+ suffix, "bin");
if (!*coeff_firmware)
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
@@ -816,10 +819,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
/* Check system-specific bin without wmfw before falling back to generic */
if (dsp->wmfw_optional && system_name) {
- if (asoc_component_prefix)
+ if (suffix)
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
cirrus_dir, system_name,
- asoc_component_prefix, "bin");
+ suffix, "bin");
if (!*coeff_firmware)
wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
@@ -850,7 +853,7 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n",
cirrus_dir, dsp->part,
dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name,
- wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix);
+ wm_adsp_fw[dsp->fw].file, system_name, suffix);
return -ENOENT;
}
@@ -997,11 +1000,17 @@ int wm_adsp_power_up(struct wm_adsp *dsp, bool load_firmware)
return ret;
}
+ if (dsp->bin_mandatory && !coeff_firmware) {
+ ret = -ENOENT;
+ goto err;
+ }
+
ret = cs_dsp_power_up(&dsp->cs_dsp,
wmfw_firmware, wmfw_filename,
coeff_firmware, coeff_filename,
wm_adsp_fw_text[dsp->fw]);
+err:
wm_adsp_release_firmware_files(dsp,
wmfw_firmware, wmfw_filename,
coeff_firmware, coeff_filename);
diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h
index edc5b02ae765..25210d404bf1 100644
--- a/sound/soc/codecs/wm_adsp.h
+++ b/sound/soc/codecs/wm_adsp.h
@@ -29,12 +29,14 @@ struct wm_adsp {
const char *part;
const char *fwf_name;
const char *system_name;
+ const char *fwf_suffix;
struct snd_soc_component *component;
unsigned int sys_config_size;
int fw;
bool wmfw_optional;
+ bool bin_mandatory;
struct work_struct boot_work;
int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl);
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 677529916dc0..745532ccbdba 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -517,7 +517,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
regmap_update_bits(asrc->regmap, REG_ASRCTR,
ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index));
regmap_update_bits(asrc->regmap, REG_ASRCTR,
- ASRCTR_USRi_MASK(index), 0);
+ ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index),
+ ASRCTR_USR(index));
/* Set the input and output clock sources */
regmap_update_bits(asrc->regmap, REG_ASRCSR,
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index af1a168d35e3..50af6b725670 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -803,13 +803,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir)
* anymore. Add software reset to fix this issue.
* This is a hardware bug, and will be fix in the
* next sai version.
+ *
+ * In consumer mode, this can happen even after a
+ * single open/close, especially if both tx and rx
+ * are running concurrently.
*/
- if (!sai->is_consumer_mode[tx]) {
- /* Software Reset */
- regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR);
- /* Clear SR bit to finish the reset */
- regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0);
- }
+ /* Software Reset */
+ regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR);
+ /* Clear SR bit to finish the reset */
+ regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0);
}
static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd,
diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c
index ccf90428126d..0efe490024b0 100644
--- a/sound/soc/intel/avs/pcm.c
+++ b/sound/soc/intel/avs/pcm.c
@@ -1570,11 +1570,13 @@ static void avs_component_hda_unregister_dais(struct snd_soc_component *componen
{
struct snd_soc_acpi_mach *mach;
struct snd_soc_dai *dai, *save;
+ struct avs_mach_pdata *pdata;
struct hda_codec *codec;
char name[32];
mach = dev_get_platdata(component->card->dev);
- codec = mach->pdata;
+ pdata = mach->pdata;
+ codec = pdata->codec;
snprintf(name, sizeof(name), "%s-cpu", dev_name(&codec->core.dev));
for_each_component_dais_safe(component, dai, save) {
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 2df7afa2f469..c23fdb6aad4c 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -11,7 +11,7 @@ menuconfig SND_SOC_INTEL_MACH
kernel: saying N will just cause the configurator to skip all
the questions about Intel ASoC machine drivers.
-if SND_SOC_INTEL_MACH
+if SND_SOC_INTEL_MACH && (SND_SOC_SOF_INTEL_COMMON || !SND_SOC_SOF_INTEL_COMMON)
config SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES
bool "Use more user friendly long card names"
@@ -42,6 +42,7 @@ config SND_SOC_INTEL_SOF_NUVOTON_COMMON
tristate
config SND_SOC_INTEL_SOF_BOARD_HELPERS
+ select SND_SOC_ACPI_INTEL_MATCH
tristate
if SND_SOC_INTEL_CATPT
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 81a914bd7ec2..504887505e68 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -783,6 +783,9 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = {
SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC),
SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC),
{}
};
diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c
index 73e581e93755..6bf7a6250ddc 100644
--- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c
@@ -238,6 +238,15 @@ static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = {
}
};
+static const struct snd_soc_acpi_adr_device rt1316_3_single_adr[] = {
+ {
+ .adr = 0x000330025D131601ull,
+ .num_endpoints = 1,
+ .endpoints = &single_endpoint,
+ .name_prefix = "rt1316-1"
+ }
+};
+
static const struct snd_soc_acpi_adr_device rt1320_2_single_adr[] = {
{
.adr = 0x000230025D132001ull,
@@ -368,6 +377,20 @@ static const struct snd_soc_acpi_link_adr arl_sdca_rvp[] = {
{}
};
+static const struct snd_soc_acpi_link_adr arl_rt711_l0_rt1316_l3[] = {
+ {
+ .mask = BIT(0),
+ .num_adr = ARRAY_SIZE(rt711_sdca_0_adr),
+ .adr_d = rt711_sdca_0_adr,
+ },
+ {
+ .mask = BIT(3),
+ .num_adr = ARRAY_SIZE(rt1316_3_single_adr),
+ .adr_d = rt1316_3_single_adr,
+ },
+ {}
+};
+
static const struct snd_soc_acpi_link_adr arl_rt722_l0_rt1320_l2[] = {
{
.mask = BIT(0),
@@ -468,6 +491,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = {
.get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
+ .link_mask = BIT(2) | BIT(3),
+ .links = arl_cs42l43_l2_cs35l56_l3,
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg",
+ .get_function_tplg_files = sof_sdw_get_tplg_files,
+ },
+ {
.link_mask = BIT(2),
.links = arl_cs42l43_l2,
.drv_name = "sof_sdw",
@@ -475,11 +505,10 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = {
.get_function_tplg_files = sof_sdw_get_tplg_files,
},
{
- .link_mask = BIT(2) | BIT(3),
- .links = arl_cs42l43_l2_cs35l56_l3,
+ .link_mask = BIT(0) | BIT(3),
+ .links = arl_rt711_l0_rt1316_l3,
.drv_name = "sof_sdw",
- .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg",
- .get_function_tplg_files = sof_sdw_get_tplg_files,
+ .sof_tplg_filename = "sof-arl-rt711-l0-rt1316-l3.tplg",
},
{
.link_mask = 0x1, /* link0 required */
diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c
index 90fe7aa3df1c..3cc81dcf047e 100644
--- a/sound/soc/intel/common/sof-function-topology-lib.c
+++ b/sound/soc/intel/common/sof-function-topology-lib.c
@@ -73,7 +73,8 @@ int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_
break;
default:
dev_warn(card->dev,
- "only -2ch and -4ch are supported for dmic\n");
+ "unsupported number of dmics: %d\n",
+ mach_params.dmic_num);
continue;
}
tplg_dev = TPLG_DEVICE_INTEL_PCH_DMIC;
diff --git a/sound/soc/loongson/loongson_i2s.c b/sound/soc/loongson/loongson_i2s.c
index e8852a30f213..e336656e13eb 100644
--- a/sound/soc/loongson/loongson_i2s.c
+++ b/sound/soc/loongson/loongson_i2s.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
+#include <linux/export.h>
#include <linux/pm_runtime.h>
#include <linux/dma-mapping.h>
#include <sound/soc.h>
diff --git a/sound/soc/mediatek/common/mtk-soundcard-driver.c b/sound/soc/mediatek/common/mtk-soundcard-driver.c
index 713a368f79cf..95a083939f3e 100644
--- a/sound/soc/mediatek/common/mtk-soundcard-driver.c
+++ b/sound/soc/mediatek/common/mtk-soundcard-driver.c
@@ -262,9 +262,13 @@ int mtk_soundcard_common_probe(struct platform_device *pdev)
soc_card_data->accdet = accdet_comp;
else
dev_err(&pdev->dev, "No sound component found from mediatek,accdet property\n");
+
+ put_device(&accdet_pdev->dev);
} else {
dev_err(&pdev->dev, "No device found from mediatek,accdet property\n");
}
+
+ of_node_put(accdet_node);
}
platform_node = of_parse_phandle(pdev->dev.of_node, "mediatek,platform", 0);
diff --git a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c
index cae51756cead..cb9beb172ed5 100644
--- a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c
+++ b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c
@@ -812,11 +812,10 @@ static const struct snd_soc_dapm_route mtk_dai_i2s_routes[] = {
static int mt8365_dai_i2s_set_priv(struct mtk_base_afe *afe)
{
int i, ret;
- struct mt8365_afe_private *afe_priv = afe->platform_priv;
for (i = 0; i < DAI_I2S_NUM; i++) {
ret = mt8365_dai_set_priv(afe, mt8365_i2s_priv[i].id,
- sizeof(*afe_priv),
+ sizeof(mt8365_i2s_priv[i]),
&mt8365_i2s_priv[i]);
if (ret)
return ret;
diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig
index e86b4a03dd61..3d9ba13ee1e5 100644
--- a/sound/soc/qcom/Kconfig
+++ b/sound/soc/qcom/Kconfig
@@ -186,6 +186,7 @@ config SND_SOC_SM8250
tristate "SoC Machine driver for SM8250 boards"
depends on QCOM_APR && SOUNDWIRE
depends on COMMON_CLK
+ depends on SND_SOC_QCOM_OFFLOAD_UTILS || !SND_SOC_QCOM_OFFLOAD_UTILS
select SND_SOC_QDSP6
select SND_SOC_QCOM_COMMON
select SND_SOC_QCOM_SDW
diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c
index 64ac26443890..de213a69e0da 100644
--- a/sound/soc/sdca/sdca_functions.c
+++ b/sound/soc/sdca/sdca_functions.c
@@ -211,7 +211,7 @@ static int find_sdca_init_table(struct device *dev,
} else if (num_init_writes % sizeof(*raw) != 0) {
dev_err(dev, "%pfwP: init table size invalid\n", function_node);
return -EINVAL;
- } else if (num_init_writes > SDCA_MAX_INIT_COUNT) {
+ } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) {
dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node);
return -EINVAL;
}
diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c
index b7060b746356..d75e7292240b 100644
--- a/sound/soc/sdw_utils/soc_sdw_utils.c
+++ b/sound/soc/sdw_utils/soc_sdw_utils.c
@@ -1205,6 +1205,8 @@ static int is_sdca_endpoint_present(struct device *dev,
int i;
dlc = kzalloc(sizeof(*dlc), GFP_KERNEL);
+ if (!dlc)
+ return -ENOMEM;
adr_end = &adr_dev->endpoints[end_index];
dai_info = &codec_info->dais[adr_end->num];
diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index a40a8047873e..b73dd91bd529 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -40,6 +40,19 @@ struct imx8m_chip_data {
struct reset_control *run_stall;
};
+static int imx8_shutdown(struct snd_sof_dev *sdev)
+{
+ /*
+ * Force the DSP to stall. After the firmware image is loaded,
+ * the stall will be removed during run() by a matching
+ * imx_sc_pm_cpu_start() call.
+ */
+ imx_sc_pm_cpu_start(get_chip_pdata(sdev), IMX_SC_R_DSP, false,
+ RESET_VECTOR_VADDR);
+
+ return 0;
+}
+
/*
* DSP control.
*/
@@ -281,11 +294,13 @@ static int imx8_ops_init(struct snd_sof_dev *sdev)
static const struct imx_chip_ops imx8_chip_ops = {
.probe = imx8_probe,
.core_kick = imx8_run,
+ .core_shutdown = imx8_shutdown,
};
static const struct imx_chip_ops imx8x_chip_ops = {
.probe = imx8_probe,
.core_kick = imx8x_run,
+ .core_shutdown = imx8_shutdown,
};
static const struct imx_chip_ops imx8m_chip_ops = {
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index bdfe388da198..3b47191ea7a5 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -1257,11 +1257,11 @@ static int check_tplg_quirk_mask(struct snd_soc_acpi_mach *mach)
return 0;
}
-static char *remove_file_ext(const char *tplg_filename)
+static char *remove_file_ext(struct device *dev, const char *tplg_filename)
{
char *filename, *tmp;
- filename = kstrdup(tplg_filename, GFP_KERNEL);
+ filename = devm_kstrdup(dev, tplg_filename, GFP_KERNEL);
if (!filename)
return NULL;
@@ -1345,7 +1345,7 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev)
*/
if (!sof_pdata->tplg_filename) {
/* remove file extension if it exists */
- tplg_filename = remove_file_ext(mach->sof_tplg_filename);
+ tplg_filename = remove_file_ext(sdev->dev, mach->sof_tplg_filename);
if (!tplg_filename)
return NULL;
diff --git a/sound/soc/sof/intel/ptl.c b/sound/soc/sof/intel/ptl.c
index 875d18193b05..1bc1f54c470d 100644
--- a/sound/soc/sof/intel/ptl.c
+++ b/sound/soc/sof/intel/ptl.c
@@ -117,6 +117,7 @@ const struct sof_intel_dsp_desc ptl_chip_info = {
.read_sdw_lcount = hda_sdw_check_lcount_ext,
.check_sdw_irq = lnl_dsp_check_sdw_irq,
.check_sdw_wakeen_irq = lnl_sdw_check_wakeen_irq,
+ .sdw_process_wakeen = hda_sdw_process_wakeen_common,
.check_ipc_irq = mtl_dsp_check_ipc_irq,
.check_mic_privacy_irq = sof_ptl_check_mic_privacy_irq,
.process_mic_privacy = sof_ptl_process_mic_privacy,
diff --git a/sound/usb/format.c b/sound/usb/format.c
index 8cd54f7bf33a..0ee532acbb60 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -310,16 +310,14 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip,
struct audioformat *fp,
unsigned int rate)
{
- struct usb_interface *iface;
struct usb_host_interface *alts;
unsigned char *fmt;
unsigned int max_rate;
- iface = usb_ifnum_to_if(chip->dev, fp->iface);
- if (!iface)
+ alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting);
+ if (!alts)
return true;
- alts = &iface->altsetting[fp->altset_idx];
fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen,
NULL, UAC_FORMAT_TYPE);
if (!fmt)
@@ -328,20 +326,20 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip,
if (fmt[0] == 10) { /* bLength */
max_rate = combine_quad(&fmt[6]);
- /* Validate max rate */
- if (max_rate != 48000 &&
- max_rate != 96000 &&
- max_rate != 192000 &&
- max_rate != 384000) {
-
+ switch (max_rate) {
+ case 48000:
+ return (rate == 44100 || rate == 48000);
+ case 96000:
+ return (rate == 88200 || rate == 96000);
+ case 192000:
+ return (rate == 176400 || rate == 192000);
+ default:
usb_audio_info(chip,
"%u:%d : unexpected max rate: %u\n",
fp->iface, fp->altsetting, max_rate);
return true;
}
-
- return rate <= max_rate;
}
return true;
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 0e9b5431a47f..faac7df1fbcf 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -383,6 +383,13 @@ static const struct usbmix_name_map ms_usb_link_map[] = {
{ 0 } /* terminator */
};
+/* KTMicro USB */
+static struct usbmix_name_map s31b2_0022_map[] = {
+ { 23, "Speaker Playback" },
+ { 18, "Headphone Playback" },
+ { 0 }
+};
+
/* ASUS ROG Zenith II with Realtek ALC1220-VB */
static const struct usbmix_name_map asus_zenith_ii_map[] = {
{ 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */
@@ -692,6 +699,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.id = USB_ID(0x045e, 0x083c),
.map = ms_usb_link_map,
},
+ {
+ /* KTMicro USB */
+ .id = USB_ID(0X31b2, 0x0022),
+ .map = s31b2_0022_map,
+ },
{ 0 } /* terminator */
};
diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c
index 5bc27c82e0af..a25c5a531690 100644
--- a/sound/usb/qcom/qc_audio_offload.c
+++ b/sound/usb/qcom/qc_audio_offload.c
@@ -759,7 +759,7 @@ static void qmi_stop_session(void)
subs = find_substream(pcm_card_num, info->pcm_dev_num,
info->direction);
if (!subs || !chip || atomic_read(&chip->shutdown)) {
- dev_err(&subs->dev->dev,
+ dev_err(&uadev[idx].udev->dev,
"no sub for c#%u dev#%u dir%u\n",
info->pcm_card_num,
info->pcm_dev_num,
@@ -825,8 +825,8 @@ static int uaudio_sideband_notifier(struct usb_interface *intf,
}
}
- mutex_unlock(&qdev_mutex);
mutex_unlock(&chip->mutex);
+ mutex_unlock(&qdev_mutex);
return 0;
}
@@ -1360,20 +1360,21 @@ static int prepare_qmi_response(struct snd_usb_substream *subs,
if (!uadev[card_num].ctrl_intf) {
dev_err(&subs->dev->dev, "audio ctrl intf info not cached\n");
- ret = -ENODEV;
- goto err;
+ return -ENODEV;
}
ret = uaudio_populate_uac_desc(subs, resp);
if (ret < 0)
- goto err;
+ return ret;
resp->slot_id = subs->dev->slot_id;
resp->slot_id_valid = 1;
data = snd_soc_usb_find_priv_data(uaudio_qdev->auxdev->dev.parent);
- if (!data)
- goto err;
+ if (!data) {
+ dev_err(&subs->dev->dev, "No private data found\n");
+ return -ENODEV;
+ }
uaudio_qdev->data = data;
@@ -1382,7 +1383,7 @@ static int prepare_qmi_response(struct snd_usb_substream *subs,
&resp->xhci_mem_info.tr_data,
&resp->std_as_data_ep_desc);
if (ret < 0)
- goto err;
+ return ret;
resp->std_as_data_ep_desc_valid = 1;
@@ -1500,7 +1501,6 @@ drop_data_ep:
xhci_sideband_remove_endpoint(uadev[card_num].sb,
usb_pipe_endpoint(subs->dev, subs->data_endpoint->pipe));
-err:
return ret;
}
@@ -1865,8 +1865,8 @@ static void qc_usb_audio_offload_disconnect(struct snd_usb_audio *chip)
/* Device has already been cleaned up, or never populated */
if (!dev->chip) {
- mutex_unlock(&qdev_mutex);
mutex_unlock(&chip->mutex);
+ mutex_unlock(&qdev_mutex);
return;
}
@@ -1921,8 +1921,8 @@ static void qc_usb_audio_offload_suspend(struct usb_interface *intf,
uaudio_send_disconnect_ind(chip);
- mutex_unlock(&qdev_mutex);
mutex_unlock(&chip->mutex);
+ mutex_unlock(&qdev_mutex);
}
static struct snd_usb_platform_ops offload_ops = {
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index c1ea8844a46f..aa91d63749f2 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -987,6 +987,8 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip,
* and request Cluster Descriptor
*/
wLength = le16_to_cpu(hc_header.wLength);
+ if (wLength < sizeof(cluster))
+ return NULL;
cluster = kzalloc(wLength, GFP_KERNEL);
if (!cluster)
return ERR_PTR(-ENOMEM);
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index af9d9acaf997..ed5f3892674c 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -431,10 +431,11 @@ enum {
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_PMU_V3_FILTER 2
-#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_PMU_V3_FILTER 2
+#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h
index caf1f71a1057..d5fa98d1d177 100644
--- a/tools/arch/loongarch/include/asm/orc_types.h
+++ b/tools/arch/loongarch/include/asm/orc_types.h
@@ -34,7 +34,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
* Frame Information standard. It contains only the necessary parts of DWARF
@@ -53,6 +53,6 @@ struct orc_entry {
unsigned int type:3;
unsigned int signal:1;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h
index 300b6e0765b2..cbce54fec7b9 100644
--- a/tools/arch/x86/include/asm/amd/ibs.h
+++ b/tools/arch/x86/include/asm/amd/ibs.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_IBS_H
+#define _ASM_X86_AMD_IBS_H
+
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
@@ -151,3 +154,5 @@ struct perf_ibs_data {
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
+
+#endif /* _ASM_X86_AMD_IBS_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index e02be2962a01..ee176236c2be 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -336,7 +336,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
+#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@@ -379,6 +379,7 @@
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */
#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -447,6 +448,7 @@
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
+#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
@@ -458,6 +460,7 @@
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
@@ -482,7 +485,8 @@
#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
-#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
+#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
/*
* BUG word(s)
@@ -535,6 +539,8 @@
#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
-#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */
-#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
+#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e7d2f460fcc6..5cfb5d74dd5f 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -533,7 +533,7 @@
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
-#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
@@ -550,16 +550,16 @@
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
-#define HWP_MIN_PERF(x) (x & 0xff)
-#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
-#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
@@ -602,7 +602,11 @@
/* V6 PMON MSR range */
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
#define MSR_IA32_PMC_V6_STEP 4
/* KeyID partitioning between MKTME and TDX */
@@ -624,6 +628,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index b663d916f162..6f3499507c5e 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -441,6 +441,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
+#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SNP_VM 4
#define KVM_X86_TDX_VM 5
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+ KVM_TDX_CAPABILITIES = 0,
+ KVM_TDX_INIT_VM,
+ KVM_TDX_INIT_VCPU,
+ KVM_TDX_INIT_MEM_REGION,
+ KVM_TDX_FINALIZE_VM,
+ KVM_TDX_GET_CPUID,
+
+ KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+ /* enum kvm_tdx_cmd_id */
+ __u32 id;
+ /* flags for sub-commend. If sub-command doesn't use this, set zero. */
+ __u32 flags;
+ /*
+ * data for each sub-command. An immediate or a pointer to the actual
+ * data in process virtual address. If sub-command doesn't use it,
+ * set zero.
+ */
+ __u64 data;
+ /*
+ * Auxiliary error code. The sub-command may return TDX SEAMCALL
+ * status code in addition to -Exxx.
+ */
+ __u64 hw_error;
+};
+
+struct kvm_tdx_capabilities {
+ __u64 supported_attrs;
+ __u64 supported_xfam;
+ __u64 reserved[254];
+
+ /* Configurable CPUID bits for userspace */
+ struct kvm_cpuid2 cpuid;
+};
+
+struct kvm_tdx_init_vm {
+ __u64 attributes;
+ __u64 xfam;
+ __u64 mrconfigid[6]; /* sha384 digest */
+ __u64 mrowner[6]; /* sha384 digest */
+ __u64 mrownerconfig[6]; /* sha384 digest */
+
+ /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
+ __u64 reserved[12];
+
+ /*
+ * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+ * KVM_SET_CPUID2.
+ * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+ * TDX module directly virtualizes those CPUIDs without VMM. The user
+ * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+ * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
+ * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+ * module doesn't virtualize.
+ */
+ struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
+
+struct kvm_tdx_init_mem_region {
+ __u64 source_addr;
+ __u64 gpa;
+ __u64 nr_pages;
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index ec1321248dac..9c640a521a67 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_BUS_LOCK 0x0a5
#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
@@ -225,6 +226,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_BUS_LOCK, "buslock" }, \
{ SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index a5faf6d88f1b..f0f4a4cf84a7 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -34,6 +34,7 @@
#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_INIT_SIGNAL 3
#define EXIT_REASON_SIPI_SIGNAL 4
+#define EXIT_REASON_OTHER_SMI 6
#define EXIT_REASON_INTERRUPT_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
@@ -92,6 +93,7 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
+#define EXIT_REASON_TDCALL 77
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -155,7 +157,8 @@
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
- { EXIT_REASON_NOTIFY, "NOTIFY" }
+ { EXIT_REASON_NOTIFY, "NOTIFY" }, \
+ { EXIT_REASON_TDCALL, "TDCALL" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 59cf6f9065aa..ccc3d923fc1e 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
+SYM_PIC_ALIAS(memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index d66b710d628f..fb5a03cf5ab7 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -42,6 +42,7 @@ SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
+SYM_PIC_ALIAS(memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index afbddea3a39c..ce1b556dfa90 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -17,7 +17,7 @@ endif
# Overrides for the prepare step libraries.
HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \
- CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)"
+ CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)"
RM ?= rm
HOSTCC ?= gcc
diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c
index 0198321d14a2..92e8307b2a46 100644
--- a/tools/hv/hv_fcopy_uio_daemon.c
+++ b/tools/hv/hv_fcopy_uio_daemon.c
@@ -35,7 +35,10 @@
#define WIN8_SRV_MINOR 1
#define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
-#define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio"
+#define FCOPY_DEVICE_PATH(subdir) \
+ "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/" #subdir
+#define FCOPY_UIO_PATH FCOPY_DEVICE_PATH(uio)
+#define FCOPY_CHANNELS_PATH FCOPY_DEVICE_PATH(channels)
#define FCOPY_VER_COUNT 1
static const int fcopy_versions[] = {
@@ -47,9 +50,62 @@ static const int fw_versions[] = {
UTIL_FW_VERSION
};
-#define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */
+static uint32_t get_ring_buffer_size(void)
+{
+ char ring_path[PATH_MAX];
+ DIR *dir;
+ struct dirent *entry;
+ struct stat st;
+ uint32_t ring_size = 0;
+ int retry_count = 0;
+
+ /* Find the channel directory */
+ dir = opendir(FCOPY_CHANNELS_PATH);
+ if (!dir) {
+ usleep(100 * 1000); /* Avoid race with kernel, wait 100ms and retry once */
+ dir = opendir(FCOPY_CHANNELS_PATH);
+ if (!dir) {
+ syslog(LOG_ERR, "Failed to open channels directory: %s", strerror(errno));
+ return 0;
+ }
+ }
+
+retry_once:
+ while ((entry = readdir(dir)) != NULL) {
+ if (entry->d_type == DT_DIR && strcmp(entry->d_name, ".") != 0 &&
+ strcmp(entry->d_name, "..") != 0) {
+ snprintf(ring_path, sizeof(ring_path), "%s/%s/ring",
+ FCOPY_CHANNELS_PATH, entry->d_name);
+
+ if (stat(ring_path, &st) == 0) {
+ /*
+ * stat returns size of Tx, Rx rings combined,
+ * so take half of it for individual ring size.
+ */
+ ring_size = (uint32_t)st.st_size / 2;
+ syslog(LOG_INFO, "Ring buffer size from %s: %u bytes",
+ ring_path, ring_size);
+ break;
+ }
+ }
+ }
+
+ if (!ring_size && retry_count == 0) {
+ retry_count = 1;
+ rewinddir(dir);
+ usleep(100 * 1000); /* Wait 100ms and retry once */
+ goto retry_once;
+ }
+
+ closedir(dir);
-static unsigned char desc[HV_RING_SIZE];
+ if (!ring_size)
+ syslog(LOG_ERR, "Could not determine ring size");
+
+ return ring_size;
+}
+
+static unsigned char *desc;
static int target_fd;
static char target_fname[PATH_MAX];
@@ -62,8 +118,11 @@ static int hv_fcopy_create_file(char *file_name, char *path_name, __u32 flags)
filesize = 0;
p = path_name;
- snprintf(target_fname, sizeof(target_fname), "%s/%s",
- path_name, file_name);
+ if (snprintf(target_fname, sizeof(target_fname), "%s/%s",
+ path_name, file_name) >= sizeof(target_fname)) {
+ syslog(LOG_ERR, "target file name is too long: %s/%s", path_name, file_name);
+ goto done;
+ }
/*
* Check to see if the path is already in place; if not,
@@ -270,7 +329,7 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size)
{
size_t len = 0;
- while (len < dest_size) {
+ while (len < dest_size && *src) {
if (src[len] < 0x80)
dest[len++] = (char)(*src++);
else
@@ -282,27 +341,15 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size)
static int hv_fcopy_start(struct hv_start_fcopy *smsg_in)
{
- setlocale(LC_ALL, "en_US.utf8");
- size_t file_size, path_size;
- char *file_name, *path_name;
- char *in_file_name = (char *)smsg_in->file_name;
- char *in_path_name = (char *)smsg_in->path_name;
-
- file_size = wcstombs(NULL, (const wchar_t *restrict)in_file_name, 0) + 1;
- path_size = wcstombs(NULL, (const wchar_t *restrict)in_path_name, 0) + 1;
-
- file_name = (char *)malloc(file_size * sizeof(char));
- path_name = (char *)malloc(path_size * sizeof(char));
-
- if (!file_name || !path_name) {
- free(file_name);
- free(path_name);
- syslog(LOG_ERR, "Can't allocate memory for file name and/or path name");
- return HV_E_FAIL;
- }
+ /*
+ * file_name and path_name should have same length with appropriate
+ * member of hv_start_fcopy.
+ */
+ char file_name[W_MAX_PATH], path_name[W_MAX_PATH];
- wcstoutf8(file_name, (__u16 *)in_file_name, file_size);
- wcstoutf8(path_name, (__u16 *)in_path_name, path_size);
+ setlocale(LC_ALL, "en_US.utf8");
+ wcstoutf8(file_name, smsg_in->file_name, W_MAX_PATH - 1);
+ wcstoutf8(path_name, smsg_in->path_name, W_MAX_PATH - 1);
return hv_fcopy_create_file(file_name, path_name, smsg_in->copy_flags);
}
@@ -406,7 +453,7 @@ int main(int argc, char *argv[])
int daemonize = 1, long_index = 0, opt, ret = -EINVAL;
struct vmbus_br txbr, rxbr;
void *ring;
- uint32_t len = HV_RING_SIZE;
+ uint32_t ring_size, len;
char uio_name[NAME_MAX] = {0};
char uio_dev_path[PATH_MAX] = {0};
@@ -437,7 +484,20 @@ int main(int argc, char *argv[])
openlog("HV_UIO_FCOPY", 0, LOG_USER);
syslog(LOG_INFO, "starting; pid is:%d", getpid());
- fcopy_get_first_folder(FCOPY_UIO, uio_name);
+ ring_size = get_ring_buffer_size();
+ if (!ring_size) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ desc = malloc(ring_size * sizeof(unsigned char));
+ if (!desc) {
+ syslog(LOG_ERR, "malloc failed for desc buffer");
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ fcopy_get_first_folder(FCOPY_UIO_PATH, uio_name);
snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name);
fcopy_fd = open(uio_dev_path, O_RDWR);
@@ -445,17 +505,17 @@ int main(int argc, char *argv[])
syslog(LOG_ERR, "open %s failed; error: %d %s",
uio_dev_path, errno, strerror(errno));
ret = fcopy_fd;
- goto exit;
+ goto free_desc;
}
- ring = vmbus_uio_map(&fcopy_fd, HV_RING_SIZE);
+ ring = vmbus_uio_map(&fcopy_fd, ring_size);
if (!ring) {
ret = errno;
syslog(LOG_ERR, "mmap ringbuffer failed; error: %d %s", ret, strerror(ret));
goto close;
}
- vmbus_br_setup(&txbr, ring, HV_RING_SIZE);
- vmbus_br_setup(&rxbr, (char *)ring + HV_RING_SIZE, HV_RING_SIZE);
+ vmbus_br_setup(&txbr, ring, ring_size);
+ vmbus_br_setup(&rxbr, (char *)ring + ring_size, ring_size);
rxbr.vbr->imask = 0;
@@ -472,7 +532,7 @@ int main(int argc, char *argv[])
goto close;
}
- len = HV_RING_SIZE;
+ len = ring_size;
ret = rte_vmbus_chan_recv_raw(&rxbr, desc, &len);
if (unlikely(ret <= 0)) {
/* This indicates a failure to communicate (or worse) */
@@ -492,6 +552,8 @@ int main(int argc, char *argv[])
}
close:
close(fcopy_fd);
+free_desc:
+ free(desc);
exit:
return ret;
}
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 14fd0ca9a6cd..7ad056219115 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -12,6 +12,7 @@
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
#define BITS_PER_BYTE 8
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
/*
* Create a contiguous bitmask starting at bit position @l and ending at
@@ -19,16 +20,68 @@
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
#if !defined(__ASSEMBLY__)
+
+/*
+ * Missing asm support
+ *
+ * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
+ * something not available in asm. Nevertheless, fixed width integers is a C
+ * concept. Assembly code can rely on the long and long long versions instead.
+ */
+
#include <linux/build_bug.h>
#include <linux/compiler.h>
+#include <linux/overflow.h>
+
#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
-#else
+
+/*
+ * Generate a mask for the specified type @t. Additional checks are made to
+ * guarantee the value returned fits in that type, relying on
+ * -Wshift-count-overflow compiler check to detect incompatible arguments.
+ * For example, all these create build errors or warnings:
+ *
+ * - GENMASK(15, 20): wrong argument order
+ * - GENMASK(72, 15): doesn't fit unsigned long
+ * - GENMASK_U32(33, 15): doesn't fit in a u32
+ */
+#define GENMASK_TYPE(t, h, l) \
+ ((t)(GENMASK_INPUT_CHECK(h, l) + \
+ (type_max(t) << (l) & \
+ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+
+#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
+#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
+#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
+#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
+
+/*
+ * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
+ * following examples generate compiler warnings due to -Wshift-count-overflow:
+ *
+ * - BIT_U8(8)
+ * - BIT_U32(-1)
+ * - BIT_U32(40)
+ */
+#define BIT_INPUT_CHECK(type, nr) \
+ BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type)))
+
+#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr)))
+
+#define BIT_U8(nr) BIT_TYPE(u8, nr)
+#define BIT_U16(nr) BIT_TYPE(u16, nr)
+#define BIT_U32(nr) BIT_TYPE(u32, nr)
+#define BIT_U64(nr) BIT_TYPE(u64, nr)
+
+#else /* defined(__ASSEMBLY__) */
+
/*
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
#define GENMASK_INPUT_CHECK(h, l) 0
-#endif
+
+#endif /* !defined(__ASSEMBLY__) */
#define GENMASK(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index b4898ff085de..ab2aa97bd8ce 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -4,17 +4,17 @@
#include <linux/compiler.h>
-#ifdef __CHECKER__
-#define BUILD_BUG_ON_ZERO(e) (0)
-#else /* __CHECKER__ */
/*
* Force a compilation error if condition is true, but also produce a
* result (of value 0 and type int), so the expression can be used
* e.g. in a structure initializer (or where-ever else comma expressions
* aren't permitted).
+ *
+ * Take an error message as an optional second argument. If omitted,
+ * default to the stringification of the tested expression.
*/
-#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
-#endif /* __CHECKER__ */
+#define BUILD_BUG_ON_ZERO(e, ...) \
+ __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true")
/* Force a compilation error if a constant expression is not a power of 2 */
#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index d627e66a04a6..33411ca0cc90 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__asm__ ("" : "=r" (var) : "0" (var))
#endif
+#ifndef __BUILD_BUG_ON_ZERO_MSG
+#if defined(__clang__)
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); })))
+#else
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#endif
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index 5a37ccbec54f..f61a01dd7eb7 100644
--- a/tools/include/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr,
return NULL;
}
+#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#include <stdlib.h>
static inline void print_ip_sym(const char *loglvl, unsigned long ip)
@@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip)
free(name);
}
+#else
+static inline void print_ip_sym(const char *loglvl, unsigned long ip) {}
+#endif
#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 7fba37b94401..e63a71d3c607 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -905,13 +905,17 @@ struct drm_syncobj_destroy {
};
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1)
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1)
struct drm_syncobj_handle {
__u32 handle;
__u32 flags;
__s32 fd;
__u32 pad;
+
+ __u64 point;
};
struct drm_syncobj_transfer {
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
index 682b406e1067..a04afef9efca 100644
--- a/tools/include/uapi/linux/bits.h
+++ b/tools/include/uapi/linux/bits.h
@@ -4,9 +4,9 @@
#ifndef _UAPI_LINUX_BITS_H
#define _UAPI_LINUX_BITS_H
-#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))
+#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
-#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_U128(h, l) \
((_BIT128((h)) << 1) - (_BIT128(l)))
diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h
new file mode 100644
index 000000000000..dc3789b78af0
--- /dev/null
+++ b/tools/include/uapi/linux/coredump.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_COREDUMP_H
+#define _UAPI_LINUX_COREDUMP_H
+
+#include <linux/types.h>
+
+/**
+ * coredump_{req,ack} flags
+ * @COREDUMP_KERNEL: kernel writes coredump
+ * @COREDUMP_USERSPACE: userspace writes coredump
+ * @COREDUMP_REJECT: don't generate coredump
+ * @COREDUMP_WAIT: wait for coredump server
+ */
+enum {
+ COREDUMP_KERNEL = (1ULL << 0),
+ COREDUMP_USERSPACE = (1ULL << 1),
+ COREDUMP_REJECT = (1ULL << 2),
+ COREDUMP_WAIT = (1ULL << 3),
+};
+
+/**
+ * struct coredump_req - message kernel sends to userspace
+ * @size: size of struct coredump_req
+ * @size_ack: known size of struct coredump_ack on this kernel
+ * @mask: supported features
+ *
+ * When a coredump happens the kernel will connect to the coredump
+ * socket and send a coredump request to the coredump server. The @size
+ * member is set to the size of struct coredump_req and provides a hint
+ * to userspace how much data can be read. Userspace may use MSG_PEEK to
+ * peek the size of struct coredump_req and then choose to consume it in
+ * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0
+ * request. If the size the kernel sends is larger userspace simply
+ * discards any remaining data.
+ *
+ * The coredump_req->mask member is set to the currently know features.
+ * Userspace may only set coredump_ack->mask to the bits raised by the
+ * kernel in coredump_req->mask.
+ *
+ * The coredump_req->size_ack member is set by the kernel to the size of
+ * struct coredump_ack the kernel knows. Userspace may only send up to
+ * coredump_req->size_ack bytes to the kernel and must set
+ * coredump_ack->size accordingly.
+ */
+struct coredump_req {
+ __u32 size;
+ __u32 size_ack;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * struct coredump_ack - message userspace sends to kernel
+ * @size: size of the struct
+ * @spare: unused
+ * @mask: features kernel is supposed to use
+ *
+ * The @size member must be set to the size of struct coredump_ack. It
+ * may never exceed what the kernel returned in coredump_req->size_ack
+ * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <=
+ * coredump_req->size_ack).
+ *
+ * The @mask member must be set to the features the coredump server
+ * wants the kernel to use. Only bits the kernel returned in
+ * coredump_req->mask may be set.
+ */
+struct coredump_ack {
+ __u32 size;
+ __u32 spare;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * enum coredump_mark - Markers for the coredump socket
+ *
+ * The kernel will place a single byte on the coredump socket. The
+ * markers notify userspace whether the coredump ack succeeded or
+ * failed.
+ *
+ * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small
+ * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big
+ * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid
+ * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options
+ * @COREDUMP_MARK_REQACK: the coredump request and ack was successful
+ * @__COREDUMP_MARK_MAX: the maximum coredump mark value
+ */
+enum coredump_mark {
+ COREDUMP_MARK_REQACK = 0U,
+ COREDUMP_MARK_MINSIZE = 1U,
+ COREDUMP_MARK_MAXSIZE = 2U,
+ COREDUMP_MARK_UNSUPPORTED = 3U,
+ COREDUMP_MARK_CONFLICTING = 4U,
+ __COREDUMP_MARK_MAX = (1U << 31),
+};
+
+#endif /* _UAPI_LINUX_COREDUMP_H */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 7a8f4c290187..3aff99f2696a 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -119,7 +119,7 @@ struct fscrypt_key_specifier {
*/
struct fscrypt_provisioning_key_payload {
__u32 type;
- __u32 __reserved;
+ __u32 flags;
__u8 raw[];
};
@@ -128,7 +128,9 @@ struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
- __u32 __reserved[8];
+#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
+ __u32 flags;
+ __u32 __reserved[7];
__u8 raw[];
};
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index b6ae8ad8934b..d00b85cb168c 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -375,6 +375,7 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
#define KVM_SYSTEM_EVENT_SEV_TERM 6
+#define KVM_SYSTEM_EVENT_TDX_FATAL 7
__u32 type;
__u32 ndata;
union {
@@ -930,6 +931,9 @@ struct kvm_enable_cap {
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
+#define KVM_CAP_ARM_EL2 240
+#define KVM_CAP_ARM_EL2_E2H0 241
+#define KVM_CAP_RISCV_MP_STATE_RESET 242
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index f78ee3670dd5..1686861aae20 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
- /* 0xb8 */
- __u64 __spare3[9]; /* Spare space for future expansion */
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index f1d495dc66bb..37682908cb0f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1384,12 +1384,12 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
fd = open(path, O_RDONLY);
if (fd < 0)
- return libbpf_err_ptr(-errno);
+ return ERR_PTR(-errno);
if (fstat(fd, &st) < 0) {
err = -errno;
close(fd);
- return libbpf_err_ptr(err);
+ return ERR_PTR(err);
}
data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
@@ -1397,7 +1397,7 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
close(fd);
if (data == MAP_FAILED)
- return libbpf_err_ptr(err);
+ return ERR_PTR(err);
btf = btf_new(data, st.st_size, base_btf, true);
if (IS_ERR(btf))
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 460c3e57fadb..0381f209920a 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -226,6 +226,9 @@ static void btf_dump_free_names(struct hashmap *map)
size_t bkt;
struct hashmap_entry *cur;
+ if (!map)
+ return;
+
hashmap__for_each_entry(map, cur, bkt)
free((void *)cur->pkey);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e9c641a2fb20..d41ee26b9443 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -597,7 +597,7 @@ struct extern_desc {
int sym_idx;
int btf_id;
int sec_btf_id;
- const char *name;
+ char *name;
char *essent_name;
bool is_set;
bool is_weak;
@@ -735,7 +735,7 @@ struct bpf_object {
struct usdt_manager *usdt_man;
- struct bpf_map *arena_map;
+ int arena_map_idx;
void *arena_data;
size_t arena_data_sz;
@@ -1517,6 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.btf_maps_shndx = -1;
obj->kconfig_map_idx = -1;
+ obj->arena_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->state = OBJ_OPEN;
@@ -2964,7 +2965,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
const long page_sz = sysconf(_SC_PAGE_SIZE);
size_t mmap_sz;
- mmap_sz = bpf_map_mmap_sz(obj->arena_map);
+ mmap_sz = bpf_map_mmap_sz(map);
if (roundup(data_sz, page_sz) > mmap_sz) {
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
sec_name, mmap_sz, data_sz);
@@ -3038,12 +3039,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
if (map->def.type != BPF_MAP_TYPE_ARENA)
continue;
- if (obj->arena_map) {
+ if (obj->arena_map_idx >= 0) {
pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
- map->name, obj->arena_map->name);
+ map->name, obj->maps[obj->arena_map_idx].name);
return -EINVAL;
}
- obj->arena_map = map;
+ obj->arena_map_idx = i;
if (obj->efile.arena_data) {
err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
@@ -3053,7 +3054,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return err;
}
}
- if (obj->efile.arena_data && !obj->arena_map) {
+ if (obj->efile.arena_data && obj->arena_map_idx < 0) {
pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
ARENA_SEC);
return -ENOENT;
@@ -4259,7 +4260,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return ext->btf_id;
}
t = btf__type_by_id(obj->btf, ext->btf_id);
- ext->name = btf__name_by_offset(obj->btf, t->name_off);
+ ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
+ if (!ext->name)
+ return -ENOMEM;
ext->sym_idx = i;
ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
@@ -4581,8 +4584,13 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
if (shdr_idx == obj->efile.arena_data_shndx) {
reloc_desc->type = RELO_DATA;
reloc_desc->insn_idx = insn_idx;
- reloc_desc->map_idx = obj->arena_map - obj->maps;
+ reloc_desc->map_idx = obj->arena_map_idx;
reloc_desc->sym_off = sym->st_value;
+
+ map = &obj->maps[obj->arena_map_idx];
+ pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
+ prog->name, obj->arena_map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
return 0;
}
@@ -9138,8 +9146,10 @@ void bpf_object__close(struct bpf_object *obj)
zfree(&obj->btf_custom_path);
zfree(&obj->kconfig);
- for (i = 0; i < obj->nr_extern; i++)
+ for (i = 0; i < obj->nr_extern; i++) {
+ zfree(&obj->externs[i].name);
zfree(&obj->externs[i].essent_name);
+ }
zfree(&obj->externs);
obj->nr_extern = 0;
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 55b59f6c79b8..61deb5923067 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -231,14 +231,7 @@ class NlMsg:
self.extack['unknown'].append(extack)
if attr_space:
- # We don't have the ability to parse nests yet, so only do global
- if 'miss-type' in self.extack and 'miss-nest' not in self.extack:
- miss_type = self.extack['miss-type']
- if miss_type in attr_space.attrs_by_val:
- spec = attr_space.attrs_by_val[miss_type]
- self.extack['miss-type'] = spec['name']
- if 'doc' in spec:
- self.extack['miss-type-doc'] = spec['doc']
+ self.annotate_extack(attr_space)
def _decode_policy(self, raw):
policy = {}
@@ -264,6 +257,18 @@ class NlMsg:
policy['mask'] = attr.as_scalar('u64')
return policy
+ def annotate_extack(self, attr_space):
+ """ Make extack more human friendly with attribute information """
+
+ # We don't have the ability to parse nests yet, so only do global
+ if 'miss-type' in self.extack and 'miss-nest' not in self.extack:
+ miss_type = self.extack['miss-type']
+ if miss_type in attr_space.attrs_by_val:
+ spec = attr_space.attrs_by_val[miss_type]
+ self.extack['miss-type'] = spec['name']
+ if 'doc' in spec:
+ self.extack['miss-type-doc'] = spec['doc']
+
def cmd(self):
return self.nl_type
@@ -277,12 +282,12 @@ class NlMsg:
class NlMsgs:
- def __init__(self, data, attr_space=None):
+ def __init__(self, data):
self.msgs = []
offset = 0
while offset < len(data):
- msg = NlMsg(data, offset, attr_space=attr_space)
+ msg = NlMsg(data, offset)
offset += msg.nl_len
self.msgs.append(msg)
@@ -1034,12 +1039,13 @@ class YnlFamily(SpecFamily):
op_rsp = []
while not done:
reply = self.sock.recv(self._recv_size)
- nms = NlMsgs(reply, attr_space=op.attr_set)
+ nms = NlMsgs(reply)
self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.nl_seq in reqs_by_seq:
(op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
if nl_msg.extack:
+ nl_msg.annotate_extack(op.attr_set)
self._decode_extack(req_msg, op, nl_msg.extack, vals)
else:
op = None
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f23bdda737aa..67d76f3a1dce 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -224,6 +224,7 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking14panic_explicit") ||
str_ends_with(func->name, "_4core9panicking14panic_nounwind") ||
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
+ str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
@@ -2318,6 +2319,7 @@ static int read_annotate(struct objtool_file *file,
for_each_reloc(sec->rsec, reloc) {
type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
+ type = bswap_if_needed(file->elf, type);
offset = reloc->sym->offset + reloc_addend(reloc);
insn = find_insn(file, reloc->sym->sec, offset);
diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt
index 55f80beae037..548549935760 100644
--- a/tools/perf/Documentation/perf-amd-ibs.txt
+++ b/tools/perf/Documentation/perf-amd-ibs.txt
@@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool.
# perf mem report
A normal perf mem report output will provide detailed memory access profile.
-However, it can also be aggregated based on output fields. For example:
-
- # perf mem report -F mem,sample,snoop
- Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876
- Memory access Samples Snoop
- N/A 1903343 N/A
- L1 hit 1056754 N/A
- L2 hit 75231 N/A
- L3 hit 9496 HitM
- L3 hit 2270 N/A
- RAM hit 8710 N/A
- Remote node, same socket RAM hit 3241 N/A
- Remote core, same node Any cache hit 1572 HitM
- Remote core, same node Any cache hit 514 N/A
- Remote node, same socket Any cache hit 1216 HitM
- Remote node, same socket Any cache hit 350 N/A
- Uncached hit 18 N/A
+New output fields will show related access info together. For example:
+
+ # perf mem report -F overhead,cache,snoop,comm
+ ...
+ # Samples: 92K of event 'ibs_op//'
+ # Total weight : 531104
+ #
+ # ---------- Cache ----------- --- Snoop ----
+ # Overhead L1 L2 L1-buf Other HitM Other Command
+ # ........ ............................ .............. ..........
+ #
+ 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1
+ 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make
+ 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc
+ 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as
+ 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh
+ 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld
+ 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config
+ 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git
+ 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm
+ ...
+
+Also, it can be aggregated based on various memory access info using the
+sort keys. For example:
+
+ # perf mem report -s mem,snoop
+ ...
+ # Samples: 92K of event 'ibs_op//'
+ # Total weight : 531104
+ # Sort order : mem,snoop
+ #
+ # Overhead Samples Memory access Snoop
+ # ........ ............ ....................................... ............
+ #
+ 47.99% 1509 L2 hit N/A
+ 25.08% 338 core, same node Any cache hit HitM
+ 10.24% 54374 N/A N/A
+ 6.77% 35938 L1 hit N/A
+ 6.39% 101 core, same node Any cache hit N/A
+ 3.50% 69 RAM hit N/A
+ 0.03% 158 LFB/MAB hit N/A
+ 0.00% 2 Uncached hit N/A
Please refer to their man page for more detail.
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 965e73d37772..4d164836d094 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -119,6 +119,22 @@ REPORT OPTIONS
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat.
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Please see linkperf:perf-report[1] for details.
+
+ In addition to the default fields, 'perf mem report' will provide the
+ following fields to break down sample periods.
+
+ - op: operation in the sample instruction (load, store, prefetch, ...)
+ - cache: location in CPU cache (L1, L2, ...) where the sample hit
+ - mem: location in memory or other places the sample hit
+ - dtlb: location in Data TLB (L1, L2) where the sample hit
+ - snoop: snoop result for the sampled data access
+
+ Please take a look at the OUTPUT FIELD SELECTION section for caveats.
+
-T::
--type-profile::
Show data-type profile result instead of code symbols. This requires
@@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20:
90% [k] memcpy
10% [.] strcmp
+OUTPUT FIELD SELECTION
+----------------------
+"perf mem report" adds a number of new output fields specific to data source
+information in the sample. Some of them have the same name with the existing
+sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll
+behave differently when it's used by -F/--fields or -s/--sort.
+
+Using those two as output fields will aggregate samples altogether and show
+breakdown.
+
+ $ perf mem report -F mem,snoop
+ ...
+ # ------ Memory ------- --- Snoop ----
+ # RAM Uncach Other HitM Other
+ # ..................... ..............
+ #
+ 3.5% 0.0% 96.5% 25.1% 74.9%
+
+But using the same name for sort keys will aggregate samples for each type
+separately.
+
+ $ perf mem report -s mem,snoop
+ # Overhead Samples Memory access Snoop
+ # ........ ............ ....................................... ............
+ #
+ 47.99% 1509 L2 hit N/A
+ 25.08% 338 core, same node Any cache hit HitM
+ 10.24% 54374 N/A N/A
+ 6.77% 35938 L1 hit N/A
+ 6.39% 101 core, same node Any cache hit N/A
+ 3.50% 69 RAM hit N/A
+ 0.03% 158 LFB/MAB hit N/A
+ 0.00% 2 Uncached hit N/A
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1]
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fdf133c9520f..d2d6d7f3ea33 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -18,7 +18,6 @@
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
-#include <linux/prctl.h>
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
index 26382e4d8d4c..4c4fee107e59 100644
--- a/tools/perf/bench/futex.c
+++ b/tools/perf/bench/futex.c
@@ -2,11 +2,18 @@
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
-#include <linux/prctl.h>
#include <sys/prctl.h>
#include "futex.h"
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+#endif // PR_FUTEX_HASH
+
void futex_set_nbuckets_param(struct bench_futex_parameters *params)
{
unsigned long flags;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index e9fab20e9330..8085e4d1d8af 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -186,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
-check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
+check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh
index 5ec35c52b7d9..bf54bd6c3e2e 100755
--- a/tools/perf/tests/shell/stat+event_uniquifying.sh
+++ b/tools/perf/tests/shell/stat+event_uniquifying.sh
@@ -9,7 +9,8 @@ perf_tool=perf
err=0
test_event_uniquifying() {
- # We use `clockticks` to verify the uniquify behavior.
+ # We use `clockticks` in `uncore_imc` to verify the uniquify behavior.
+ pmu="uncore_imc"
event="clockticks"
# If the `-A` option is added, the event should be uniquified.
@@ -43,11 +44,18 @@ test_event_uniquifying() {
echo "stat event uniquifying test"
uniquified_event_array=()
+ # Skip if the machine does not have `uncore_imc` device.
+ if ! ${perf_tool} list pmu | grep -q ${pmu}; then
+ echo "Target does not support PMU ${pmu} [Skipped]"
+ err=2
+ return
+ fi
+
# Check how many uniquified events.
while IFS= read -r line; do
uniquified_event=$(echo "$line" | awk '{print $1}')
uniquified_event_array+=("${uniquified_event}")
- done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]")
+ done < <(${perf_tool} list -v ${event} | grep ${pmu})
perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true"
$perf_command
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
index 1d5759d08141..3a2a8438f9af 100644
--- a/tools/perf/tests/tests-scripts.c
+++ b/tools/perf/tests/tests-scripts.c
@@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd,
continue; /* Skip scripts that have a separate driver. */
fd = openat(dir_fd, ent->d_name, O_PATH);
append_scripts_in_dir(fd, result, result_sz);
+ close(fd);
}
for (i = 0; i < n_dirs; i++) /* Clean up */
zfree(&entlist[i]);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index c3322eb3d686..3b262487ec06 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
}
-static inline size_t msg_data_left(struct msghdr *msg)
+static inline size_t msg_data_left(const struct msghdr *msg)
{
return iov_iter_count(&msg->msg_iter);
}
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index e762e1af650c..0098b0ce8ccb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 15c18ef4eb11..43dec6eed559 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -364,4 +364,11 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index f78ee3670dd5..1686861aae20 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
- /* 0xb8 */
- __u64 __spare3[9]; /* Spare space for future expansion */
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 178b00205fe6..89979ca23c3f 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -132,4 +132,8 @@
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#ifndef SYM_PIC_ALIAS
+#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index a786cbfb0ff5..83aaf7cda635 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
+ evsel__close(evsel);
evsel__delete(evsel);
}
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index be8dfac14076..c43db1c41205 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -73,6 +73,7 @@ sbindir ?= /usr/sbin
mandir ?= /usr/man
libdir ?= /usr/lib
libexecdir ?= /usr/libexec
+unitdir ?= /usr/lib/systemd/system
includedir ?= /usr/include
localedir ?= /usr/share/locale
docdir ?= /usr/share/doc/packages/cpupower
@@ -309,9 +310,9 @@ install-tools: $(OUTPUT)cpupower
$(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}'
$(INSTALL) -d $(DESTDIR)${libexecdir}
$(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower'
- $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system
- sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
- $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
+ $(INSTALL) -d $(DESTDIR)${unitdir}
+ sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service'
+ $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service'
install-man:
$(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
@@ -348,7 +349,7 @@ uninstall:
- rm -f $(DESTDIR)${bindir}/utils/cpupower
- rm -f $(DESTDIR)${confdir}cpupower-service.conf
- rm -f $(DESTDIR)${libexecdir}/cpupower
- - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service
+ - rm -f $(DESTDIR)${unitdir}/cpupower.service
- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index e2a2c46c008b..3d8378972d26 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -21,7 +21,6 @@ test_lirc_mode2_user
flow_dissector_load
test_tcpnotify_user
test_libbpf
-test_sysctl
xdping
test_cpp
*.d
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cf5ed3bee573..910d8d6402ef 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -73,7 +73,7 @@ endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
test_sockmap \
- test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user \
test_progs-no_alu32
TEST_INST_SUBDIRS := no_alu32
@@ -220,7 +220,7 @@ ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif
-# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# Define simple and short `make test_progs`, `make test_maps`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
@@ -329,7 +329,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tag: $(TESTING_HELPERS)
$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
$(OUTPUT)/xdping: $(TESTING_HELPERS)
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
index 8100509e561b..0ffa01d54ce2 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -149,3 +149,70 @@ close_prog:
fentry_recursive_target__destroy(target_skel);
fentry_recursive__destroy(tracing_skel);
}
+
+static void *fentry_target_test_run(void *arg)
+{
+ for (;;) {
+ int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ if (prog_fd == -1)
+ break;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "fentry_target test_run"))
+ break;
+ }
+
+ return NULL;
+}
+
+void test_fentry_attach_stress(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, i, tgt_prog_fd;
+ pthread_t thread;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel,
+ "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = pthread_create(&thread, NULL,
+ fentry_target_test_run, &tgt_prog_fd);
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ for (i = 0; i < 1000; i++) {
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto stop_thread;
+
+ prog = tracing_skel->progs.recursive_attach;
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd,
+ "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto stop_thread;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto stop_thread;
+
+ err = fentry_recursive__attach(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto stop_thread;
+
+ fentry_recursive__destroy(tracing_skel);
+ tracing_skel = NULL;
+ }
+
+stop_thread:
+ __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST);
+ err = pthread_join(thread, NULL);
+ ASSERT_OK(err, "pthread_join");
+close_prog:
+ fentry_recursive__destroy(tracing_skel);
+ fentry_recursive_target__destroy(target_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 4be6fdb78c6a..594441acb707 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -116,6 +116,8 @@ static void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+ ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
+ ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
}
void test_snprintf(void)
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index bcdbd27f22f0..273dd41ca09e 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -1,22 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include <bpf/bpf_endian.h>
-#include "bpf_util.h"
+#include "test_progs.h"
#include "cgroup_helpers.h"
-#include "testing_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
@@ -1608,26 +1594,19 @@ static int run_tests(int cgfd)
return fails ? -1 : 0;
}
-int main(int argc, char **argv)
+void test_sysctl(void)
{
- int cgfd = -1;
- int err = 0;
+ int cgfd;
cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
+ if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup"))
+ goto out;
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ if (!ASSERT_OK(run_tests(cgfd), "run_tests"))
+ goto out;
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
- return err;
+ return;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
index a3f220ba7025..ee65bad0436d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last");
int percpu_arr[1] SEC(".data.percpu_arr");
+/* at least one extern is included, to ensure that a specific
+ * regression is tested whereby resizing resulted in a free-after-use
+ * bug after type information is invalidated by the resize operation.
+ *
+ * There isn't a particularly good API to test for this specific condition,
+ * but by having externs for the resizing tests it will cover this path.
+ */
+extern int LINUX_KERNEL_VERSION __kconfig;
+long version_sink;
+
SEC("tp/syscalls/sys_enter_getpid")
int bss_array_sum(void *ctx)
{
@@ -44,6 +54,9 @@ int bss_array_sum(void *ctx)
for (size_t i = 0; i < bss_array_len; ++i)
sum += array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
@@ -59,6 +72,9 @@ int data_array_sum(void *ctx)
for (size_t i = 0; i < data_array_len; ++i)
sum += my_array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
index a7c0a553aa50..3e2d76ee8050 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file)
return 0;
}
+SEC("lsm.s/inode_rename")
+__success
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ if (!inode)
+ return 0;
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
index d6d3f4fcb24c..4b392c6c8fc4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/limits.h>
@@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f)
return 0;
}
+SEC("lsm.s/inode_rename")
+__failure __msg("invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index fda7589c5023..0921939532c6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try)
return ret;
}
+/* Derive target_free from map_size, same as bpf_common_lru_populate */
+static unsigned int __tgt_size(unsigned int map_size)
+{
+ return (map_size / nr_cpus) / 2;
+}
+
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+ return tgt_free * nr_cpus * 2;
+}
+
/* Size of the LRU map is 2
* Add key=1 (+1 key)
* Add key=2 (+1 key)
@@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
printf("Pass\n");
}
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
*/
static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup 1 to tgt_free/2 */
+ /* Lookup 1 to batch_size */
end_key = 1 + batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
BPF_NOEXIST));
}
- /* Insert 1+tgt_free to 2*tgt_free
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ /* Insert another map_size - batch_size keys
+ * Map will contain 1 to batch_size plus these latest, i.e.,
+ * => previous 1+batch_size to map_size - batch_size will have been
* removed by LRU
*/
- key = 1 + tgt_free;
- end_key = key + tgt_free;
+ key = 1 + __map_size(tgt_free);
+ end_key = key + __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- * => The original 1 to tgt_free/2 will be removed due to
- * the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- * => Key 1+tgt_free to tgt_free*3/2
- * will be removed from LRU because it has never
- * been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
*/
static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
* shrink the inactive list to get tgt_free
* number of free nodes.
*
- * Hence, the oldest key 1 to tgt_free/2
- * are removed from the LRU list.
+ * Hence, the oldest key is removed from the LRU list.
*/
key = 1;
if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
BPF_EXIST));
}
- /* Re-insert 1 to tgt_free/2 again and do a lookup
- * immeidately.
+ /* Re-insert 1 to batch_size again and do a lookup immediately.
*/
end_key = 1 + batch_size;
value[0] = 4321;
@@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1+tgt_free to tgt_free*3/2 */
- end_key = 1 + tgt_free + batch_size;
- for (key = 1 + tgt_free; key < end_key; key++)
+ /* Insert batch_size new elements */
+ key = 1 + __map_size(tgt_free);
+ end_key = key + batch_size;
+ for (; key < end_key; key++)
/* These newly added but not referenced keys will be
* gone during the next LRU shrink.
*/
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
- end_key = key + tgt_free;
+ /* Insert map_size - batch_size elements */
+ end_key += __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map is 2*tgt_free
- * It is to test the active/inactive list rotation
- * Insert 1 to 2*tgt_free (+2*tgt_free keys)
- * Lookup key 1 to tgt_free*3/2
- * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
- * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+/* Test the active/inactive list rotation
+ *
+ * Fill the whole map, deplete the free list.
+ * Reference all except the last lru->target_free elements.
+ * Insert lru->target_free new elements. This triggers one shrink.
+ * Verify that the non-referenced elements are replaced.
*/
static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
assert(sched_next_online(0, &next_cpu) != -1);
- batch_size = tgt_free / 2;
- assert(batch_size * 2 == tgt_free);
+ batch_size = __tgt_size(tgt_free);
map_size = tgt_free * 2;
lru_map_fd = create_map(map_type, map_flags, map_size);
@@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
- end_key = 1 + (2 * tgt_free);
+ /* Fill the map */
+ end_key = 1 + map_size;
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup key 1 to tgt_free*3/2 */
- end_key = tgt_free + batch_size;
+ /* Reference all but the last batch_size */
+ end_key = 1 + map_size - batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
BPF_NOEXIST));
}
- /* Add 1+2*tgt_free to tgt_free*5/2
- * (+tgt_free/2 keys)
- */
+ /* Insert new batch_size: replaces the non-referenced elements */
key = 2 * tgt_free + 1;
end_key = key + batch_size;
for (; key < end_key; key++) {
@@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
lru_map_fd = create_map(map_type, map_flags,
3 * tgt_free * nr_cpus);
else
- lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * __map_size(tgt_free));
assert(lru_map_fd != -1);
expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index ed210037b29d..77b3665c73c7 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS = $(KHDR_INCLUDES)
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
TEST_GEN_PROGS := stackdump_test
TEST_FILES := stackdump
diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config
new file mode 100644
index 000000000000..a05ef112b4f9
--- /dev/null
+++ b/tools/testing/selftests/coredump/config
@@ -0,0 +1,3 @@
+CONFIG_COREDUMP=y
+CONFIG_NET=y
+CONFIG_UNIX=y
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 9984413be9f0..5a5a7a5f7e1d 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -14,16 +20,23 @@
#include <unistd.h>
#include "../kselftest_harness.h"
+#include "../filesystems/wrappers.h"
#include "../pidfd/pidfd.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
#define NUM_THREAD_SPAWN 128
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
static void *do_nothing(void *)
{
while (1)
pause();
+
+ return NULL;
}
static void crashing_child(void)
@@ -42,16 +55,32 @@ FIXTURE(coredump)
{
char original_core_pattern[256];
pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
};
+static int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
FIXTURE_SETUP(coredump)
{
- char buf[PATH_MAX];
FILE *file;
- char *dir;
int ret;
self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -60,6 +89,8 @@ FIXTURE_SETUP(coredump)
ASSERT_LT(ret, sizeof(self->original_core_pattern));
self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
ret = fclose(file);
ASSERT_EQ(0, ret);
@@ -98,6 +129,15 @@ FIXTURE_TEARDOWN(coredump)
goto fail;
}
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
return;
fail:
/* This should never happen */
@@ -106,11 +146,10 @@ fail:
TEST_F_TIMEOUT(coredump, stackdump, 120)
{
- struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
- char buf[PATH_MAX];
+ char buf[PAGE_SIZE];
int ret, i, status;
FILE *file;
pid_t pid;
@@ -169,153 +208,166 @@ TEST_F_TIMEOUT(coredump, stackdump, 120)
fclose(file);
}
+static int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+static bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+static int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ return -1;
+ }
+ return fd_peer_pidfd;
+}
+
+static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0;
+}
+
+static void
+wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump)* self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
TEST_F(coredump, socket)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
-
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ goto out;
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
- if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
- fprintf(stderr, "Received connection from non-coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
fd_core_file = creat("/tmp/coredump.file", 0644);
- if (fd_core_file < 0) {
- fprintf(stderr, "Failed to create coredump file\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (fd_core_file < 0)
+ goto out;
for (;;) {
char buffer[4096];
ssize_t bytes_read, bytes_write;
bytes_read = read(fd_coredump, buffer, sizeof(buffer));
- if (bytes_read < 0) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read < 0)
+ goto out;
if (bytes_read == 0)
break;
bytes_write = write(fd_core_file, buffer, bytes_read);
- if (bytes_read != bytes_write) {
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_FAILURE);
- }
+ if (bytes_read != bytes_write)
+ goto out;
}
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_SUCCESS);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -335,48 +387,27 @@ TEST_F(coredump, socket)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_TRUE(WCOREDUMP(status));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
ASSERT_GT(st.st_size, 0);
- /*
- * We should somehow validate the produced core file.
- * For now just allow for visual inspection
- */
system("file /tmp/coredump.file");
}
TEST_F(coredump, socket_detect_userspace_client)
{
- int fd, pidfd, ret, status;
- FILE *file;
+ int pidfd, ret, status;
pid_t pid, pid_coredump_server;
struct stat st;
- char core_file[PATH_MAX];
struct pidfd_info info = {};
int ipc_sockets[2];
char c;
- const struct sockaddr_un coredump_sk = {
- .sun_family = AF_UNIX,
- .sun_path = "/tmp/coredump.socket",
- };
- size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
- sizeof("/tmp/coredump.socket");
-
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
@@ -384,88 +415,49 @@ TEST_F(coredump, socket_detect_userspace_client)
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
- fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
if (fd_server < 0)
- _exit(EXIT_FAILURE);
+ goto out;
- ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- ret = listen(fd_server, 1);
- if (ret < 0) {
- fprintf(stderr, "Failed to listen on coredump socket\n");
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
-
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
- close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
- }
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
close(ipc_sockets[1]);
fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
- if (fd_coredump < 0) {
- fprintf(stderr, "Failed to accept coredump socket connection\n");
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ if (fd_coredump < 0)
+ goto out;
- fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
- ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
- &fd_peer_pidfd, &fd_peer_pidfd_len);
- if (ret < 0) {
- fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- _exit(EXIT_FAILURE);
- }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
- memset(&info, 0, sizeof(info));
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
- if (ret < 0) {
- fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
- if (!(info.mask & PIDFD_INFO_COREDUMP)) {
- fprintf(stderr, "Missing coredump information from coredumping task\n");
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (info.coredump_mask & PIDFD_COREDUMPED)
+ goto out;
- if (info.coredump_mask & PIDFD_COREDUMPED) {
- fprintf(stderr, "Received unexpected connection from coredumping task\n");
+ if (read(fd_coredump, &c, 1) < 1)
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
close(fd_coredump);
+ if (fd_server >= 0)
close(fd_server);
- close(fd_peer_pidfd);
- _exit(EXIT_FAILURE);
- }
-
- close(fd_coredump);
- close(fd_server);
- close(fd_peer_pidfd);
- close(fd_core_file);
- _exit(EXIT_SUCCESS);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -478,17 +470,22 @@ TEST_F(coredump, socket_detect_userspace_client)
if (pid == 0) {
int fd_socket;
ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd_socket < 0)
_exit(EXIT_FAILURE);
-
ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
if (ret < 0)
_exit(EXIT_FAILURE);
- (void *)write(fd_socket, &(char){ 0 }, 1);
close(fd_socket);
_exit(EXIT_SUCCESS);
}
@@ -500,15 +497,11 @@ TEST_F(coredump, socket_detect_userspace_client)
ASSERT_TRUE(WIFEXITED(status));
ASSERT_EQ(WEXITSTATUS(status), 0);
- info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
- ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
ASSERT_EQ(errno, ENOENT);
@@ -516,17 +509,10 @@ TEST_F(coredump, socket_detect_userspace_client)
TEST_F(coredump, socket_enoent)
{
- int pidfd, ret, status;
- FILE *file;
+ int pidfd, status;
pid_t pid;
- char core_file[PATH_MAX];
-
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
pid = fork();
ASSERT_GE(pid, 0);
@@ -544,7 +530,6 @@ TEST_F(coredump, socket_enoent)
TEST_F(coredump, socket_no_listener)
{
int pidfd, ret, status;
- FILE *file;
pid_t pid, pid_coredump_server;
int ipc_sockets[2];
char c;
@@ -555,45 +540,518 @@ TEST_F(coredump, socket_no_listener)
size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
sizeof("/tmp/coredump.socket");
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
- file = fopen("/proc/sys/kernel/core_pattern", "w");
- ASSERT_NE(file, NULL);
-
- ret = fprintf(file, "@/tmp/coredump.socket");
- ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
- ASSERT_EQ(fclose(file), 0);
-
pid_coredump_server = fork();
ASSERT_GE(pid_coredump_server, 0);
if (pid_coredump_server == 0) {
- int fd_server;
- socklen_t fd_peer_pidfd_len;
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
close(ipc_sockets[0]);
fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (fd_server < 0)
- _exit(EXIT_FAILURE);
+ goto out;
ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
- if (ret < 0) {
- fprintf(stderr, "Failed to bind coredump socket\n");
+ if (ret < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+static bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+static bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size)
+ return false;
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0)
+ return false;
+ if (kernel_size >= PAGE_SIZE)
+ return false;
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+static bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+static bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0)
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write)
+ goto out;
}
- if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
close(fd_server);
- close(ipc_sockets[1]);
- _exit(EXIT_FAILURE);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
}
- close(fd_server);
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
close(ipc_sockets[1]);
- _exit(EXIT_SUCCESS);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0)
+ goto out;
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
}
self->pid_coredump_server = pid_coredump_server;
@@ -613,10 +1071,760 @@ TEST_F(coredump, socket_no_listener)
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_FALSE(WCOREDUMP(status));
- waitpid(pid_coredump_server, &status, 0);
- self->pid_coredump_server = -ESRCH;
- ASSERT_TRUE(WIFEXITED(status));
- ASSERT_EQ(WEXITSTATUS(status), 0);
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0)
+ goto out;
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP))
+ goto out;
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE))
+ goto out;
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE))
+ goto out;
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+static int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+#define NUM_CRASHING_COREDUMPS 5
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+#define MAX_EVENTS 128
+
+static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ epfd = epoll_create1(0);
+ if (epfd < 0)
+ goto out;
+
+ struct epoll_event ev;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0)
+ goto out;
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0)
+ break;
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read)
+ goto out;
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0)
+ goto out;
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ goto out;
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0)
+ goto out;
+ if (!get_pidfd_info(fd_peer_pidfd, &info))
+ goto out;
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED))
+ goto out;
+ if (!read_coredump_req(fd_coredump, &req))
+ goto out;
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT))
+ goto out;
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0))
+ goto out;
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
+ goto out;
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0)
+ goto out;
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index ca60ae325c22..7bb552f8b182 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg):
'noise' : (0,) })
+def test_rss_default_context_rule(cfg):
+ """
+ Allocate a port, direct this port to context 0, then create a new RSS
+ context and steer all TCP traffic to it (context 1). Verify that:
+ * Traffic to the specific port continues to use queues of the main
+ context (0/1).
+ * Traffic to any other TCP port is redirected to the new context
+ (queues 2/3).
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except Exception as exc:
+ raise KsftSkipEx("Not enough queues for the test") from exc
+
+ # Use queues 0 and 1 for the main context
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # Create a new RSS context that uses queues 2 and 3
+ ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # Generic low-priority rule: redirect all TCP traffic to the new context.
+ # Give it an explicit higher location number (lower priority).
+ flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+ ethtool(f"-N {cfg.ifname} {flow_generic}")
+ defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+ # Specific high-priority rule for a random port that should stay on context 0.
+ # Assign loc 0 so it is evaluated before the generic rule.
+ port_main = rand_port()
+ flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+ ethtool(f"-N {cfg.ifname} {flow_main}")
+ defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+ _ntuple_rule_check(cfg, 1, ctx_id)
+
+ # Verify that traffic matching the specific rule still goes to queues 0/1
+ _send_traffic_check(cfg, port_main, "context 0",
+ { 'target': (0, 1),
+ 'empty' : (2, 3) })
+
+ # And that traffic for any other port is steered to the new context
+ port_other = rand_port()
+ _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+ { 'target': (2, 3),
+ 'noise' : (0, 1) })
+
+
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
cfg.context_cnt = None
@@ -760,7 +816,8 @@ def main() -> None:
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
test_flow_add_context_missing,
- test_delete_rss_context_busy, test_rss_ntuple_addition],
+ test_delete_rss_context_busy, test_rss_ntuple_addition,
+ test_rss_default_context_rule],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index f439c434ba36..648ff50bc1c3 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
input_xfrm = cfg.ethnl.rss_get(
- {'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
+ {'header': {'dev-name': cfg.ifname}}).get('input-xfrm')
# Check for symmetric xor/or-xor
if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index d9c10613ae67..44151b7b1a24 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+import re
import time
from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
@@ -10,12 +11,11 @@ class GenerateTraffic:
self.env = env
- if port is None:
- port = rand_port()
- self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True)
- wait_port_listen(port)
+ self.port = rand_port() if port is None else port
+ self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True)
+ wait_port_listen(self.port)
time.sleep(0.1)
- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
+ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400",
background=True, host=env.remote)
# Wait for traffic to ramp up
@@ -56,3 +56,16 @@ class GenerateTraffic:
ksft_pr(">> Server:")
ksft_pr(self._iperf_server.stdout)
ksft_pr(self._iperf_server.stderr)
+ self._wait_client_stopped()
+
+ def _wait_client_stopped(self, sleep=0.005, timeout=5):
+ end = time.monotonic() + timeout
+
+ live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ")
+
+ while time.monotonic() < end:
+ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+ if not live_port_pattern.search(data):
+ return
+ time.sleep(sleep)
+ raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index 1bb46ec435d4..7f32b5600925 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
-source ../../../net/lib.sh
+lib_dir=$(dirname $0)/../../../net
+source $lib_dir/lib.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 7b24ae89594a..776ad658f75e 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -11,3 +11,4 @@ futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
futex_waitv
+futex_numa
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index 20a9d3ecf743..a9ecfb2d3932 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -144,7 +144,7 @@ int main(int argc, char *argv[])
struct futex32_numa *futex_numa;
int mem_size, i;
void *futex_ptr;
- char c;
+ int c;
while ((c = getopt(argc, argv, "chv:")) != -1) {
switch (c) {
@@ -210,6 +210,10 @@ int main(int argc, char *argv[])
ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
sizeof(nodemask) * 8, 0);
if (ret == 0) {
+ ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno);
+
ksft_print_msg("Node %d test\n", i);
futex_numa->futex = 0;
futex_numa->numa = FUTEX_NO_NODE;
@@ -220,8 +224,8 @@ int main(int argc, char *argv[])
if (0)
test_futex_mpol(futex_numa, 0);
if (futex_numa->numa != i) {
- ksft_test_result_fail("Returned NUMA node is %d expected %d\n",
- futex_numa->numa, i);
+ ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
}
}
}
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index 2dca18fefedc..24a92dc94eb8 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -130,7 +130,7 @@ int main(int argc, char *argv[])
pthread_mutexattr_t mutex_attr_pi;
int use_global_hash = 0;
int ret;
- char c;
+ int c;
while ((c = getopt(argc, argv, "cghv:")) != -1) {
switch (c) {
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index ea79662405bc..1f625b39948a 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -4,6 +4,7 @@
*
* Copyright 2021 Collabora Ltd.
*/
+#include <linux/time_types.h>
#include <stdint.h>
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
@@ -65,7 +66,12 @@ struct futex32_numa {
static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
unsigned long flags, struct timespec *timo, clockid_t clockid)
{
- return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
}
/*
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
index 66daf7e5975c..eb4e15a0e53b 100644
--- a/tools/testing/selftests/hid/tests/test_mouse.py
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
return 32 # EPIPE
+class BadReportDescriptorMouse(BaseMouse):
+ """
+ This "device" was one autogenerated by syzbot. There are a lot of issues in
+ it, and the most problematic is that it declares features that have no
+ size.
+
+ This leads to report->size being set to 0 and can mess up with usbhid
+ internals. Fortunately, uhid merely passes the incoming buffer, without
+ touching it so a buffer of size 0 will be translated to [] without
+ triggering a kernel oops.
+
+ Because the report descriptor is wrong, no input are created, and we need
+ to tweak a little bit the parameters to make it look correct.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x96, 0x01, 0x00, # Report Count (1) 0
+ 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3
+ # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow
+ 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6
+ 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9
+ 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14
+ 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19
+ 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24
+ ]
+ # fmt: on
+
+ def __init__(
+ self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA)
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.high_resolution_report_called = False
+
+ def get_evdev(self, application=None):
+ assert self._input_nodes is None
+ return (
+ "Ok" # should be a list or None, but both would fail, so abusing the system
+ )
+
+ def next_sync_events(self, application=None):
+ # there are no evdev nodes, so no events
+ return []
+
+ def is_ready(self):
+ # we wait for the SET_REPORT command to come
+ return self.high_resolution_report_called
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x0:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if len(data) != 1:
+ raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'")
+
+ self.high_resolution_report_called = True
+
+ return 0
+
+
class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
# fmt: off
report_descriptor = [
@@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse):
# assert below print out the real error
pass
assert remaining == []
+
+
+class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ return BadReportDescriptorMouse()
+
+ def assertName(self, uhdev):
+ pass
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1a8e85afe9aa..1926ef6b40ab 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void)
mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
&mfd);
+ assert(mfd_buffer != MAP_FAILED);
+ assert(mfd > 0);
}
FIXTURE(iommufd)
@@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ size_t mmap_buffer_size;
unsigned long size;
int mmap_flags;
void *vrc;
@@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
- rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
- if (rc || !self->buffer) {
- SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
- variant->buffer_size, rc);
- }
-
mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ mmap_buffer_size = variant->buffer_size;
if (variant->hugepages) {
/*
* MAP_POPULATE will cause the kernel to fail mmap if THPs are
* not available.
*/
mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+
+ /*
+ * Allocation must be aligned to the HUGEPAGE_SIZE, because the
+ * following mmap() will automatically align the length to be a
+ * multiple of the underlying huge page size. Failing to do the
+ * same at this allocation will result in a memory overwrite by
+ * the mmap().
+ */
+ if (mmap_buffer_size < HUGEPAGE_SIZE)
+ mmap_buffer_size = HUGEPAGE_SIZE;
+ }
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ mmap_buffer_size, rc);
}
assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
- vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
assert(vrc == self->buffer);
@@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
- munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
+ free(self->buffer);
+ free(self->bitmap);
teardown_iommufd(self->fd, _metadata);
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 72f6636e5d90..6e967b58acfd 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
{
int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
int mfd = memfd_create("buffer", mfd_flags);
+ void *buf = MAP_FAILED;
if (mfd <= 0)
return MAP_FAILED;
if (ftruncate(mfd, length))
- return MAP_FAILED;
+ goto out;
*mfd_p = mfd;
- return mmap(0, length, prot, flags, mfd, 0);
+ buf = mmap(0, length, prot, flags, mfd, 0);
+out:
+ if (buf == MAP_FAILED)
+ close(mfd);
+ return buf;
}
/*
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..4e71740a098b 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -22,7 +22,8 @@
#include "gic.h"
#include "vgic.h"
-static const uint64_t CVAL_MAX = ~0ULL;
+/* Depends on counter width. */
+static uint64_t CVAL_MAX;
/* tval is a signed 32-bit int. */
static const int32_t TVAL_MAX = INT32_MAX;
static const int32_t TVAL_MIN = INT32_MIN;
@@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static uint64_t DEF_CNT;
/* Number of runs. */
static const uint32_t NR_TEST_ITERS_DEF = 5;
@@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
}
static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
@@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer)
test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
wm);
}
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
- }
}
/*
@@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer)
GUEST_DONE();
}
+static cpu_set_t default_cpuset;
+
static uint32_t next_pcpu(void)
{
uint32_t max = get_nprocs();
uint32_t cur = sched_getcpu();
uint32_t next = cur;
- cpu_set_t cpuset;
+ cpu_set_t cpuset = default_cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
@@ -959,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
+static int gic_fd;
+
static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
enum arch_timer timer)
{
@@ -973,8 +970,18 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
+ gic_fd = vgic_v3_setup(*vm, 1, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ close(gic_fd);
+ kvm_vm_free(vm);
}
static void test_print_help(char *name)
@@ -986,7 +993,7 @@ static void test_print_help(char *name)
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
@@ -1035,6 +1042,17 @@ static bool parse_args(int argc, char *argv[])
return false;
}
+static void set_counter_defaults(void)
+{
+ const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ uint64_t freq = read_sysreg(CNTFRQ_EL0);
+ uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -1046,16 +1064,19 @@ int main(int argc, char *argv[])
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
return 0;
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 390ae2d87493..0eb371c62ab8 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
int testcase;
char test[80];
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
ksft_print_header();
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index a28baa536332..deba93379c80 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ANON_VMA_NAME=y
+CONFIG_FTRACE=y
+CONFIG_PROFILING=y
+CONFIG_UPROBES=y
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 8a97ac5176a4..29047d2e0c49 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
- if (fd < 0)
+ if (fd < 0) {
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
+ }
fn(fd, pagesize);
close(fd);
@@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
fd = memfd_create("test", flags);
if (fd < 0) {
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
}
fn(fd, hugetlbsize);
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index bbae66fc5038..cc26480098ae 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma)
ASSERT_GE(fd, 0);
ASSERT_EQ(ftruncate(fd, page_size), 0);
- ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
+ if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) {
+ SKIP(goto out, "Failed to read uprobe sysfs file, skipping");
+ }
memset(&attr, 0, attr_sz);
attr.size = attr_sz;
@@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma)
ASSERT_NE(mremap(ptr2, page_size, page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
+out:
close(fd);
remove(probe_file);
}
diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings
index a953c96aa16e..e2206265f67c 100644
--- a/tools/testing/selftests/mm/settings
+++ b/tools/testing/selftests/mm/settings
@@ -1 +1 @@
-timeout=180
+timeout=900
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index aa7400ed0e99..f0d9c035641d 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -31,6 +31,7 @@ uint64_t pmd_pagesize;
#define INPUT_MAX 80
#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
#define PATH_FMT "%s,0x%lx,0x%lx,%d"
#define PFN_MASK ((1UL<<55)-1)
@@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order);
else
- write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
+ write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr,
(uint64_t)addr + fd_size, order, offset);
for (i = 0; i < fd_size; i++)
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index b380e102b22f..169dbd692bf5 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr)
{
unsigned long addr = (unsigned long) ptr;
- if (high_addr && addr < HIGH_ADDR_MARK)
- ksft_exit_fail_msg("Bad address %lx\n", addr);
+ if (high_addr) {
+ if (addr < HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
+ return;
+ }
if (addr > HIGH_ADDR_MARK)
ksft_exit_fail_msg("Bad address %lx\n", addr);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 532bb732bc6d..c6dd2a335cf4 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -50,6 +50,7 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
+tfo
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ea84b88bcb30..332f387615d7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -27,6 +27,7 @@ TEST_PROGS += amt.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += udpgro_frglist.sh
+TEST_PROGS += nat6to4.sh
TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
@@ -109,6 +110,8 @@ TEST_GEN_PROGS += proc_net_pktgen
TEST_PROGS += lwt_dst_cache_ref_loop.sh
TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
+TEST_GEN_FILES += tfo
+TEST_PROGS += tfo_passive.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 3ed3882a93b8..b5f474969917 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata,
static void __recvpair(struct __test_metadata *_metadata,
FIXTURE_DATA(msg_oob) *self,
const char *expected_buf, int expected_len,
- int buf_len, int flags)
+ int buf_len, int flags, bool is_sender)
{
int i, ret[2], recv_errno[2], expected_errno = 0;
char recv_buf[2][BUF_SZ] = {};
@@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata,
errno = 0;
for (i = 0; i < 2; i++) {
- ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ int index = is_sender ? i * 2 : i * 2 + 1;
+
+ ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags);
recv_errno[i] = errno;
}
@@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
ASSERT_EQ(answ[0], answ[1]);
}
+static void __resetpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const FIXTURE_VARIANT(msg_oob) *variant,
+ bool reset)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ close(self->fd[i * 2 + 1]);
+
+ __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1,
+ variant->peek ? MSG_PEEK : 0, true);
+}
+
#define sendpair(buf, len, flags) \
__sendpair(_metadata, self, buf, len, flags)
@@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
if (variant->peek) \
__recvpair(_metadata, self, \
expected_buf, expected_len, \
- buf_len, (flags) | MSG_PEEK); \
+ buf_len, (flags) | MSG_PEEK, false); \
__recvpair(_metadata, self, \
- expected_buf, expected_len, buf_len, flags); \
+ expected_buf, expected_len, \
+ buf_len, flags, false); \
} while (0)
#define epollpair(oob_remaining) \
@@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
#define setinlinepair() \
__setinlinepair(_metadata, self)
+#define resetpair(reset) \
+ __resetpair(_metadata, self, variant, reset)
+
#define tcp_incompliant \
for (self->tcp_compliant = false; \
self->tcp_compliant == false; \
@@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(true);
+}
+
+TEST_F(msg_oob, non_oob_no_reset)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob)
@@ -355,6 +390,19 @@ TEST_F(msg_oob, oob)
recvpair("x", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, oob_reset)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ resetpair(true);
}
TEST_F(msg_oob, oob_drop)
@@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead)
@@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead)
recvpair("hell", 4, 4, 0);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, oob_break)
@@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break)
recvpair("", -EAGAIN, 1, 0);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead_break)
@@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break)
recvpair("world", 5, 5, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_break_drop)
@@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_break)
@@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break)
recvpair("ld", 2, 2, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop)
@@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop_2)
@@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_oob)
@@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("z", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
}
TEST_F(msg_oob, ex_oob_ahead_break)
@@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break)
recvpair("d", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, ex_oob_siocatmark)
@@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_F(msg_oob, inline_oob)
@@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob)
recvpair("x", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_break)
@@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break)
recvpair("o", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_ahead_break)
@@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break)
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_break)
@@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break)
recvpair("rld", 3, 3, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_no_drop)
@@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop)
recvpair("y", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_drop)
@@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop)
epollpair(false);
siocatmarkpair(false);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_siocatmark)
@@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
index 5b34f6e1f831..48eb999a3120 100755
--- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -24,7 +24,10 @@ setup_basenet()
ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
}
-# Check if network device has an IPv6 link-local address assigned.
+# Check the IPv6 configuration of a network device.
+#
+# We currently check the generation of the link-local IPv6 address and the
+# creation of the ff00::/8 multicast route.
#
# Parameters:
#
@@ -35,7 +38,7 @@ setup_basenet()
# a link-local address)
# * $4: The user visible name for the scenario being tested
#
-check_ipv6_ll_addr()
+check_ipv6_device_config()
{
local DEV="$1"
local EXTRA_MATCH="$2"
@@ -45,7 +48,11 @@ check_ipv6_ll_addr()
RET=0
set +e
ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
- check_err_fail "${XRET}" $? ""
+ check_err_fail "${XRET}" $? "IPv6 link-local address generation"
+
+ ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}"
+ check_err_fail 0 $? "IPv6 multicast route creation"
+
log_test "${MSG}"
set -e
}
@@ -102,20 +109,20 @@ test_gre_device()
;;
esac
- # Check that IPv6 link-local address is generated when device goes up
+ # Check the IPv6 device configuration when it goes up
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
ip -netns "${NS0}" link set dev gretest up
- check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
# Now disable link-local address generation
ip -netns "${NS0}" link set dev gretest down
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
ip -netns "${NS0}" link set dev gretest up
- # Check that link-local address generation works when re-enabled while
- # the device is already up
+ # Check the IPv6 device configuration when link-local address
+ # generation is re-enabled while the device is already up
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
- check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
ip -netns "${NS0}" link del dev gretest
}
@@ -126,7 +133,7 @@ test_gre4()
local MODE
for GRE_TYPE in "gre" "gretap"; do
- printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
for MODE in "eui64" "none" "stable-privacy" "random"; do
test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
@@ -142,7 +149,7 @@ test_gre6()
local MODE
for GRE_TYPE in "ip6gre" "ip6gretap"; do
- printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
for MODE in "eui64" "none" "stable-privacy" "random"; do
test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 006fdadcc4b9..86a216e9aca8 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -312,7 +312,7 @@ log_test_result()
local test_name=$1; shift
local opt_str=$1; shift
local result=$1; shift
- local retmsg=$1; shift
+ local retmsg=$1
printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
if [[ $retmsg ]]; then
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index e47788bfa671..4c7e51336ab2 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -4,7 +4,8 @@ top_srcdir = ../../../../..
CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \
+ mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh
new file mode 100755
index 000000000000..0ee859b622a4
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS="ns-peer-$(mktemp -u XXXXXX)"
+
+ip netns add "${NS}"
+ip -netns "${NS}" link set lo up
+ip -netns "${NS}" route add default via 127.0.0.2 dev lo
+
+tc -n "${NS}" qdisc add dev lo ingress
+tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \
+ bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
+
+ip netns exec "${NS}" \
+ bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1'
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index 64c4f8d9aa6c..5d2be9a00627 100644
--- a/tools/testing/selftests/net/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -5,3 +5,4 @@ conntrack_dump_flush
conntrack_reverse_clash
sctp_collision
nf_queue
+udpclash
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index e9b2f553588d..a98ed892f55f 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh
TEST_PROGS += conntrack_resize.sh
TEST_PROGS += conntrack_sctp_collision.sh
TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += conntrack_clash.sh
TEST_PROGS += conntrack_reverse_clash.sh
TEST_PROGS += ipvs.sh
TEST_PROGS += nf_conntrack_packetdrill.sh
@@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue
TEST_GEN_FILES += conntrack_dump_flush
TEST_GEN_FILES += conntrack_reverse_clash
TEST_GEN_FILES += sctp_collision
+TEST_GEN_FILES += udpclash
include ../../lib.mk
@@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+$(OUTPUT)/udpclash: LDLIBS += -lpthread
TEST_FILES := lib.sh
TEST_FILES += packetdrill
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
new file mode 100755
index 000000000000..606a43a60f73
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+clash_resolution_active=0
+dport=22111
+ret=0
+
+cleanup()
+{
+ # netns cleanup also zaps any remaining socat echo server.
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter
+
+ip netns exec "$nsrouter" nft -f -<<EOF
+table ip t {
+ chain lb {
+ meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 }
+ }
+
+ chain prerouting {
+ type nat hook prerouting priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+
+ chain output {
+ type nat hook output priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+}
+EOF
+
+load_simple_ruleset()
+{
+ip netns exec "$1" nft -f -<<EOF
+table ip t {
+ chain forward {
+ type filter hook forward priority 0
+
+ ct state new counter
+ }
+}
+EOF
+}
+
+spawn_servers()
+{
+ local ns="$1"
+ local ports="9000 9001 9002"
+
+ for port in $ports; do
+ ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null &
+ done
+
+ for port in $ports; do
+ wait_local_port_listen "$ns" $port udp
+ done
+}
+
+add_addr()
+{
+ local ns="$1"
+ local dev="$2"
+ local i="$3"
+ local j="$4"
+
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev"
+}
+
+ping_test()
+{
+ local ns="$1"
+ local daddr="$2"
+
+ if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then
+ echo "FAIL: ping from $ns to $daddr"
+ exit 1
+ fi
+}
+
+run_one_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local entries
+ local cre
+
+ if ! ip netns exec "$ns" ./udpclash $daddr $dport;then
+ echo "INFO: did not receive expected number of replies for $daddr:$dport"
+ ip netns exec "$ctns" conntrack -S
+ # don't fail: check if clash resolution triggered after all.
+ fi
+
+ entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+ cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
+
+ if [ "$cre" -ne "$entries" ];then
+ clash_resolution_active=1
+ return 0
+ fi
+
+ # not a failure: clash resolution logic did not trigger.
+ # With right timing, xmit completed sequentially and
+ # no parallel insertion occurs.
+ return $ksft_skip
+}
+
+run_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local softerr=0
+
+ for i in $(seq 1 10);do
+ run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
+ local rv=$?
+ if [ $rv -eq 0 ];then
+ echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
+ return 0
+ elif [ $rv -eq $ksft_skip ]; then
+ softerr=1
+ fi
+ done
+
+ [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
+}
+
+ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
+ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter"
+add_addr "$nsclient1" veth0 1 1
+add_addr "$nsclient2" veth0 2 1
+add_addr "$nsrouter" veth0 1 99
+add_addr "$nsrouter" veth1 2 99
+
+ip -net "$nsclient1" route add default via 10.0.1.99
+ip -net "$nsclient2" route add default via 10.0.2.99
+ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1
+
+ping_test "$nsclient1" 10.0.1.99
+ping_test "$nsclient1" 10.0.2.1
+ping_test "$nsclient2" 10.0.1.1
+
+spawn_servers "$nsclient2"
+
+# exercise clash resolution with nat:
+# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
+
+# exercise clash resolution without nat.
+load_simple_ruleset "$nsclient2"
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
+
+if [ $clash_resolution_active -eq 0 ];then
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ echo "SKIP: Clash resolution did not trigger"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
index 9e033e80219e..788cd56ea4a0 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -12,6 +12,9 @@ tmpfile=""
tmpfile_proc=""
tmpfile_uniq=""
ret=0
+have_socat=0
+
+socat -h > /dev/null && have_socat=1
insert_count=2000
[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
@@ -123,7 +126,7 @@ ctflush() {
done
}
-ctflood()
+ct_pingflood()
{
local ns="$1"
local duration="$2"
@@ -152,6 +155,44 @@ ctflood()
wait
}
+ct_udpflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ $have_socat -ne "1" ] && return
+
+ while [ $now -lt $end ]; do
+ip netns exec "$ns" bash<<"EOF"
+ for i in $(seq 1 100);do
+ dport=$(((RANDOM%65536)+1))
+
+ echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
+ done > /dev/null 2>&1
+ wait
+EOF
+ now=$(date +%s)
+ done
+}
+
+ct_udpclash()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ -x udpclash ] || return
+
+ while [ $now -lt $end ]; do
+ ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+
+ now=$(date +%s)
+ done
+}
+
# dump to /dev/null. We don't want dumps to cause infinite loops
# or use-after-free even when conntrack table is altered while dumps
# are in progress.
@@ -169,6 +210,48 @@ ct_nulldump()
wait
}
+ct_nulldump_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ ct_nulldump "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+}
+
+change_timeouts()
+{
+ local ns="$1"
+ local r1=$((RANDOM%2))
+ local r2=$((RANDOM%2))
+
+ [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
+ [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
+}
+
+ct_change_timeouts_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ change_timeouts "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+
+ # restore defaults
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
+}
+
check_taint()
{
local tainted_then="$1"
@@ -198,10 +281,14 @@ insert_flood()
r=$((RANDOM%$insert_count))
- ctflood "$n" "$timeout" "floodresize" &
+ ct_pingflood "$n" "$timeout" "floodresize" &
+ ct_udpflood "$n" "$timeout" &
+ ct_udpclash "$n" "$timeout" &
+
insert_ctnetlink "$n" "$r" &
ctflush "$n" "$timeout" &
- ct_nulldump "$n" &
+ ct_nulldump_loop "$n" "$timeout" &
+ ct_change_timeouts_loop "$n" "$timeout" &
wait
}
@@ -306,7 +393,7 @@ test_dump_all()
ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
- ctflood "$nsclient1" $timeout "dumpall" &
+ ct_pingflood "$nsclient1" $timeout "dumpall" &
insert_ctnetlink "$nsclient2" $insert_count
wait
@@ -368,7 +455,7 @@ test_conntrack_disable()
ct_flush_once "$nsclient1"
ct_flush_once "$nsclient2"
- ctflood "$nsclient1" "$timeout" "conntrack disable"
+ ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
# Disabled, should not have picked up any connection.
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index cd12b8b5ac0e..20e76b395c85 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -1311,6 +1311,9 @@ maybe_send_match() {
# - remove some elements, check that packets don't match anymore
test_correctness_main() {
range_size=1
+
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
for i in $(seq "${start}" $((start + count))); do
local elem=""
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
new file mode 100644
index 000000000000..85c7b906ad08
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Usage: ./udpclash <IP> <PORT>
+ *
+ * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair.
+ *
+ * This mimics DNS resolver libraries that emit A and AAAA requests
+ * in parallel.
+ *
+ * This exercises conntrack clash resolution logic added and later
+ * refined in
+ *
+ * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+ * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks")
+ * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries")
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define THREAD_COUNT 128
+
+struct thread_args {
+ const struct sockaddr_in *si_remote;
+ int sockfd;
+};
+
+static int wait = 1;
+
+static void *thread_main(void *varg)
+{
+ const struct sockaddr_in *si_remote;
+ const struct thread_args *args = varg;
+ static const char msg[] = "foo";
+
+ si_remote = args->si_remote;
+
+ while (wait == 1)
+ ;
+
+ if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL,
+ (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0)
+ exit(111);
+
+ return varg;
+}
+
+static int run_test(int fd, const struct sockaddr_in *si_remote)
+{
+ struct thread_args thread_args = {
+ .si_remote = si_remote,
+ .sockfd = fd,
+ };
+ pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t));
+ unsigned int repl_count = 0, timeout = 0;
+ int i;
+
+ if (!tid) {
+ perror("calloc");
+ return 1;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args);
+
+ if (err != 0) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ wait = 0;
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(tid[i], NULL);
+
+ while (repl_count < THREAD_COUNT) {
+ struct sockaddr_in si_repl;
+ socklen_t si_repl_len = sizeof(si_repl);
+ char repl[512];
+ ssize_t ret;
+
+ ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL,
+ (struct sockaddr *) &si_repl, &si_repl_len);
+ if (ret < 0) {
+ if (timeout++ > 5000) {
+ fputs("timed out while waiting for reply from thread\n", stderr);
+ break;
+ }
+
+ /* give reply time to pass though the stack */
+ usleep(1000);
+ continue;
+ }
+
+ if (si_repl_len != sizeof(*si_remote)) {
+ fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n",
+ (int)si_repl_len, (int)sizeof(si_repl));
+ } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr ||
+ si_remote->sin_port != si_repl.sin_port) {
+ char a[64], b[64];
+
+ inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a));
+ inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b));
+
+ fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n",
+ a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port));
+ }
+
+ repl_count++;
+ }
+
+ printf("got %d of %d replies\n", repl_count, THREAD_COUNT);
+
+ free(tid);
+
+ return repl_count == THREAD_COUNT ? 0 : 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in si_local = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in si_remote = {
+ .sin_family = AF_INET,
+ };
+ int fd, ret;
+
+ if (argc < 3) {
+ fputs("Usage: send_udp <daddr> <dport>\n", stderr);
+ return 1;
+ }
+
+ si_remote.sin_port = htons(atoi(argv[2]));
+ si_remote.sin_addr.s_addr = inet_addr(argv[1]);
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP);
+ if (fd < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) {
+ perror("bind");
+ return 1;
+ }
+
+ ret = run_test(fd, &si_remote);
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
new file mode 100644
index 000000000000..09aabc775e80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+// Test that a not-yet-accepted socket does not change
+// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets.
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+ +0 < . 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001>
+ +0 < . 1001:2001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 101001
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+// Test that ooo packets for accepted sockets do increase sk_rcvbuf
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+
+ +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }%
+
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
new file mode 100644
index 000000000000..eb3cac5e583c
--- /dev/null
+++ b/tools/testing/selftests/net/tfo.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static struct sockaddr_in6 cfg_addr;
+static char *cfg_outfile;
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static void run_server(void)
+{
+ unsigned long qlen = 32;
+ int fd, opt, connfd;
+ socklen_t len;
+ char buf[64];
+ FILE *outfile;
+
+ outfile = fopen(cfg_outfile, "w");
+ if (!outfile)
+ error(1, errno, "fopen() outfile");
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ opt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0)
+ error(1, errno, "setsockopt(TCP_FASTOPEN)");
+
+ if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0)
+ error(1, errno, "bind()");
+
+ if (listen(fd, 5) < 0)
+ error(1, errno, "listen()");
+
+ len = sizeof(cfg_addr);
+ connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len);
+ if (connfd < 0)
+ error(1, errno, "accept()");
+
+ len = sizeof(opt);
+ if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
+ error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
+
+ read(connfd, buf, 64);
+ fprintf(outfile, "%d\n", opt);
+
+ fclose(outfile);
+ close(connfd);
+ close(fd);
+}
+
+static void run_client(void)
+{
+ int fd;
+ char *msg = "Hello, world!";
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "sch:p:o:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Server cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "Client address parse error: %s", addr);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
new file mode 100755
index 000000000000..80bf11fdc046
--- /dev/null
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_SV_SYS/net -exec basename {} \;)
+ NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_CL_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_SV_NAME netns nssv
+ ip link set $NSIM_CL_NAME netns nscl
+
+ ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+ ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ # Enable passive TFO
+ ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+ $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+out_file=$(mktemp)
+
+timeout -k 1s 30s ip netns exec nssv ./tfo \
+ -s \
+ -p ${SERVER_PORT} \
+ -o ${out_file}&
+
+wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+
+wait
+
+res=$(cat $out_file)
+rm $out_file
+
+if [ $res -eq 0 ]; then
+ echo "got invalid NAPI ID from passive TFO socket"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 1dc337c709f8..b17e032a6d75 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -48,7 +48,7 @@ run_one() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} &
local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
@@ -95,7 +95,7 @@ run_one_nat() {
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
@@ -117,9 +117,9 @@ run_one_2sock() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
index 7bc804ffaf7c..0fb56baf28e4 100755
--- a/tools/testing/selftests/net/vlan_hw_filter.sh
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -3,27 +3,101 @@
readonly NETNS="ns-$(mktemp -u XXXXXX)"
+ALL_TESTS="
+ test_vlan_filter_check
+ test_vlan0_del_crash_01
+ test_vlan0_del_crash_02
+ test_vlan0_del_crash_03
+ test_vid0_memleak
+"
+
ret=0
+setup() {
+ ip netns add ${NETNS}
+}
+
cleanup() {
- ip netns del $NETNS
+ ip netns del $NETNS 2>/dev/null
}
trap cleanup EXIT
fail() {
- echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
- ret=1
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ ret=1
+}
+
+tests_run()
+{
+ local current_test
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+test_vlan_filter_check() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+ cleanup
}
-ip netns add ${NETNS}
-ip netns exec ${NETNS} ip link add bond0 type bond mode 0
-ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
-ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
-ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
-ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
-ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
-ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
-ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+#enable vlan_filter feature of real_dev with vlan0 during running time
+test_vlan0_del_crash_01() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature and add vlan0 for real_dev during running time
+test_vlan0_del_crash_02() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature of real_dev during running time
+#test kernel_bug of vlan unregister
+test_vlan0_del_crash_03() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+test_vid0_memleak() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 up type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+tests_run
exit $ret
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 9451782689de..ee25824b1cbe 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx)
struct bpf_link *link;
char buf[16];
+ /*
+ * On single-CPU systems, ops.select_cpu() is never
+ * invoked, so skip this test to avoid getting stuck
+ * indefinitely.
+ */
+ if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1)
+ continue;
+
skel = exit__open();
SCX_ENUM_INIT(skel);
skel->rodata->exit_point = tc;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 9aa44d8176d9..c6db7fa94f55 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -128,6 +128,32 @@
]
},
{
+ "id": "5456",
+ "name": "Test htb_dequeue_tree with deactivation and row emptying",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: htb default 1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j qdisc show dev $DUMMY",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
"id": "c024",
"name": "Test TBF with SKBPRIO - catch qlen corner cases",
"category": [
@@ -635,5 +661,108 @@
"$TC qdisc del dev $DUMMY handle 1:0 root",
"$IP addr del 10.10.10.10/24 dev $DUMMY || true"
]
+ },
+ {
+ "id": "d74b",
+ "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain",
+ "category": [
+ "qdisc",
+ "hfsc",
+ "drr",
+ "netem",
+ "blackhole"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: drr",
+ "$TC filter add dev $DUMMY parent 1: basic classid 1:1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1",
+ "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem",
+ "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole",
+ "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true",
+ "$TC class del dev $DUMMY classid 1:1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: drr"
+ ]
+ },
+ {
+ "id": "be28",
+ "name": "Try to add fq_codel qdisc as a child of an hhf qdisc",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: hhf"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
+ "id": "fcb5",
+ "name": "Try to add pie qdisc as a child of a drr qdisc",
+ "category": [
+ "qdisc",
+ "pie",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: drr"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
+ "id": "7801",
+ "name": "Try to add fq qdisc as a child of an inexistent hfsc class",
+ "category": [
+ "qdisc",
+ "sfq",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: hfsc"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
}
]
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index 6eef282d569f..3ed4c9b2d8c0 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -32,22 +32,23 @@ _create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_remove 8G -t null -q 4 --auto_zc &
ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+ wait
fi
-wait
if _have_feature "PER_IO_DAEMON"; then
ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks &
ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks &
+ wait
fi
-wait
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index f703fcfe9f7c..83148875a12c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- test_vsyscall mov_ss_trap \
+ test_vsyscall mov_ss_trap sigtrap_loop \
syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_BOTHBITS += nx_stack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c
new file mode 100644
index 000000000000..9d065479e89f
--- /dev/null
+++ b/tools/testing/selftests/x86/sigtrap_loop.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Intel Corporation
+ */
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ucontext.h>
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+ return;
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ static unsigned int loop_count_on_same_ip;
+ static unsigned long last_trap_ip;
+
+ if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) {
+ printf("\tTrapped at %016lx\n", last_trap_ip);
+
+ /*
+ * If the same IP is hit more than 10 times in a row, it is
+ * _considered_ an infinite loop.
+ */
+ if (++loop_count_on_same_ip > 10) {
+ printf("[FAIL]\tDetected SIGTRAP infinite loop\n");
+ exit(1);
+ }
+
+ return;
+ }
+
+ loop_count_on_same_ip = 0;
+ last_trap_ip = ctx->uc_mcontext.gregs[REG_IP];
+ printf("\tTrapped at %016lx\n", last_trap_ip);
+}
+
+int main(int argc, char *argv[])
+{
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ /*
+ * Set the Trap Flag (TF) to single-step the test code, therefore to
+ * trigger a SIGTRAP signal after each instruction until the TF is
+ * cleared.
+ *
+ * Because the arithmetic flags are not significant here, the TF is
+ * set by pushing 0x302 onto the stack and then popping it into the
+ * flags register.
+ *
+ * Four instructions in the following asm code are executed with the
+ * TF set, thus the SIGTRAP handler is expected to run four times.
+ */
+ printf("[RUN]\tSIGTRAP infinite loop detection\n");
+ asm volatile(
+#ifdef __x86_64__
+ /*
+ * Avoid clobbering the redzone
+ *
+ * Equivalent to "sub $128, %rsp", however -128 can be encoded
+ * in a single byte immediate while 128 uses 4 bytes.
+ */
+ "add $-128, %rsp\n\t"
+#endif
+ "push $0x302\n\t"
+ "popf\n\t"
+ "nop\n\t"
+ "nop\n\t"
+ "push $0x202\n\t"
+ "popf\n\t"
+#ifdef __x86_64__
+ "sub $-128, %rsp\n\t"
+#endif
+ );
+
+ printf("[OK]\tNo SIGTRAP infinite loop detected\n");
+ return 0;
+}
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 4505b1c31be1..14718ca23a05 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t;
#define ASSERT_EXCLUSIVE_WRITER(x)
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
+ */
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
struct kref {
refcount_t refcount;
};
@@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
(void)vma;
}
+static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
+{
+ /* Changing an anonymous vma with this is illegal */
+ get_file(file);
+ swap(vma->vm_file, file);
+ fput(file);
+}
+
#endif /* __MM_VMA_INTERNAL_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eec82775c5bf..222f0e894a0c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
if (r)
goto out_unlock;
+
+ cond_resched();
}
kvm_handle_gfn_range(kvm, &pre_set_range);
@@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
GFP_KERNEL_ACCOUNT));
KVM_BUG_ON(r, kvm);
+ cond_resched();
}
kvm_handle_gfn_range(kvm, &post_set_range);