diff options
1098 files changed, 50590 insertions, 28712 deletions
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst index ce6f4e8ca487..30858757c9f2 100644 --- a/Documentation/admin-guide/bug-hunting.rst +++ b/Documentation/admin-guide/bug-hunting.rst @@ -196,7 +196,7 @@ will see the assembler code for the routine shown, but if your kernel has debug symbols the C code will also be available. (Debug symbols can be enabled in the kernel hacking menu of the menu configuration.) For example:: - $ objdump -r -S -l --disassemble net/dccp/ipv4.o + $ objdump -r -S -l --disassemble net/ipv4/tcp.o .. note:: diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7fe0352dff0f..7b6a2fde8175 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -23,6 +23,7 @@ properties: - allwinner,sun20i-d1-emac - allwinner,sun50i-h6-emac - allwinner,sun50i-h616-emac0 + - allwinner,sun55i-a523-emac0 - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml index 660e2ca42daf..a3db6d594c8c 100644 --- a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml +++ b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/brcm,asp-v2.0.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Broadcom ASP 2.0 Ethernet controller +title: Broadcom ASP Ethernet controller maintainers: - Justin Chen <justin.chen@broadcom.com> @@ -17,16 +17,16 @@ properties: oneOf: - items: - enum: + - brcm,bcm74110-asp + - const: brcm,asp-v3.0 + - items: + - enum: - brcm,bcm74165b0-asp - const: brcm,asp-v2.2 - items: - enum: - brcm,bcm74165-asp - const: brcm,asp-v2.1 - - items: - - enum: - - brcm,bcm72165-asp - - const: brcm,asp-v2.0 "#address-cells": const: 1 @@ -39,11 +39,9 @@ properties: ranges: true interrupts: - minItems: 1 items: - description: RX/TX interrupt - - description: Port 0 Wake-on-LAN - - description: Port 1 Wake-on-LAN + - description: Wake-on-LAN interrupt clocks: maxItems: 1 @@ -106,16 +104,17 @@ examples: #include <dt-bindings/interrupt-controller/arm-gic.h> ethernet@9c00000 { - compatible = "brcm,bcm72165-asp", "brcm,asp-v2.0"; + compatible = "brcm,bcm74165-asp", "brcm,asp-v2.1"; reg = <0x9c00000 0x1fff14>; - interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&intc GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>, + <&aon_pm_l2_intc 14>; ranges = <0x0 0x9c00000 0x1fff14>; clocks = <&scmi 14>; #address-cells = <1>; #size-cells = <1>; mdio@c614 { - compatible = "brcm,asp-v2.0-mdio"; + compatible = "brcm,asp-v2.1-mdio"; reg = <0xc614 0x8>; reg-names = "mdio"; #address-cells = <1>; @@ -127,7 +126,7 @@ examples: }; mdio@ce14 { - compatible = "brcm,asp-v2.0-mdio"; + compatible = "brcm,asp-v2.1-mdio"; reg = <0xce14 0x8>; reg-names = "mdio"; #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml index 63bee5b542f5..43516dd357b8 100644 --- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml +++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml @@ -22,9 +22,9 @@ properties: - brcm,genet-mdio-v3 - brcm,genet-mdio-v4 - brcm,genet-mdio-v5 - - brcm,asp-v2.0-mdio - brcm,asp-v2.1-mdio - brcm,asp-v2.2-mdio + - brcm,asp-v3.0-mdio - brcm,unimac-mdio - brcm,bcm6846-mdio diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index a2d4c626f659..7cbf11bbe99c 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -16,30 +16,6 @@ properties: label: description: Human readable label on a port of a box. - local-mac-address: - description: - Specifies the MAC address that was assigned to the network device. - $ref: /schemas/types.yaml#/definitions/uint8-array - minItems: 6 - maxItems: 6 - - mac-address: - description: - Specifies the MAC address that was last used by the boot - program; should be used in cases where the MAC address assigned - to the device by the boot program is different from the - local-mac-address property. - $ref: /schemas/types.yaml#/definitions/uint8-array - minItems: 6 - maxItems: 6 - - max-frame-size: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - Maximum transfer unit (IEEE defined MTU), rather than the - maximum frame size (there\'s contradiction in the Devicetree - Specification). - max-speed: $ref: /schemas/types.yaml#/definitions/uint32 description: @@ -195,7 +171,7 @@ properties: description: Link speed. $ref: /schemas/types.yaml#/definitions/uint32 - enum: [10, 100, 1000, 2500, 10000] + enum: [10, 100, 1000, 2500, 5000, 10000] full-duplex: $ref: /schemas/types.yaml#/definitions/flag @@ -260,6 +236,7 @@ dependencies: pcs-handle-names: [pcs-handle] allOf: + - $ref: /schemas/net/network-class.yaml# - if: properties: phy-mode: diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 824bbe4333b7..71e2cd32580f 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -238,6 +238,16 @@ properties: peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default will be left as is. + mac-termination-ohms: + maximum: 200 + description: + The xMII signals need series termination on the driver side to match both + the output driver impedance and the line characteristic impedance, to + prevent reflections and EMI problems. Select a resistance value which is + supported by the builtin resistors of the PHY, otherwise the resistors may + have to be placed on board. When omitted, the PHYs default will be left as + is. + leds: type: object diff --git a/Documentation/devicetree/bindings/net/network-class.yaml b/Documentation/devicetree/bindings/net/network-class.yaml new file mode 100644 index 000000000000..06461fb92eb8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/network-class.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/network-class.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Network Class Common Properties + +maintainers: + - Devicetree Specification Mailing List <devicetree-spec@vger.kernel.org> + +properties: + address-bits: + description: + Specifies number of address bits required to address the device + described by this node, e.g. size of the MAC address. + default: 48 + const: 48 + + local-mac-address: + description: + Specifies MAC address that was assigned to the network device described by + the node containing this property. + $ref: /schemas/types.yaml#/definitions/uint8-array + minItems: 6 + maxItems: 6 + + mac-address: + description: + Specifies the MAC address that was last used by the boot program. This + property should be used in cases where the MAC address assigned to the + device by the boot program is different from the + local-mac-address property. This property shall be used only if the value + differs from local-mac-address property value. + $ref: /schemas/types.yaml#/definitions/uint8-array + minItems: 6 + maxItems: 6 + + max-frame-size: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Maximum transfer unit (IEEE defined MTU), rather than the + maximum frame size (there\'s contradiction in the Devicetree + Specification). + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml b/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml new file mode 100644 index 000000000000..c498a9999289 --- /dev/null +++ b/Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/renesas,r9a09g057-gbeth.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GBETH glue layer for Renesas RZ/V2H(P) (and similar SoCs) + +maintainers: + - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> + +select: + properties: + compatible: + contains: + enum: + - renesas,r9a09g056-gbeth + - renesas,r9a09g057-gbeth + - renesas,rzv2h-gbeth + required: + - compatible + +properties: + compatible: + items: + - enum: + - renesas,r9a09g056-gbeth # RZ/V2N + - renesas,r9a09g057-gbeth # RZ/V2H(P) + - const: renesas,rzv2h-gbeth + - const: snps,dwmac-5.20 + + reg: + maxItems: 1 + + clocks: + items: + - description: CSR clock + - description: AXI system clock + - description: PTP clock + - description: TX clock + - description: RX clock + - description: TX clock phase-shifted by 180 degrees + - description: RX clock phase-shifted by 180 degrees + + clock-names: + items: + - const: stmmaceth + - const: pclk + - const: ptp_ref + - const: tx + - const: rx + - const: tx-180 + - const: rx-180 + + interrupts: + minItems: 11 + + interrupt-names: + items: + - const: macirq + - const: eth_wake_irq + - const: eth_lpi + - const: rx-queue-0 + - const: rx-queue-1 + - const: rx-queue-2 + - const: rx-queue-3 + - const: tx-queue-0 + - const: tx-queue-1 + - const: tx-queue-2 + - const: tx-queue-3 + + resets: + items: + - description: AXI power-on system reset + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - resets + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/renesas-cpg-mssr.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + ethernet@15c30000 { + compatible = "renesas,r9a09g057-gbeth", "renesas,rzv2h-gbeth", "snps,dwmac-5.20"; + reg = <0x15c30000 0x10000>; + clocks = <&cpg CPG_MOD 0xbd>, <&cpg CPG_MOD 0xbc>, + <&ptp_clock>, <&cpg CPG_MOD 0xb8>, + <&cpg CPG_MOD 0xb9>, <&cpg CPG_MOD 0xba>, + <&cpg CPG_MOD 0xbb>; + clock-names = "stmmaceth", "pclk", "ptp_ref", + "tx", "rx", "tx-180", "rx-180"; + resets = <&cpg 0xb0>; + interrupts = <GIC_SPI 765 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 767 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 766 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 772 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 773 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 774 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 745 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 768 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 769 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 770 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 771 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi", + "rx-queue-0", "rx-queue-1", "rx-queue-2", + "rx-queue-3", "tx-queue-0", "tx-queue-1", + "tx-queue-2", "tx-queue-3"; + phy-mode = "rgmii-id"; + snps,multicast-filter-bins = <256>; + snps,perfect-filter-entries = <128>; + rx-fifo-depth = <8192>; + tx-fifo-depth = <8192>; + snps,fixed-burst; + snps,force_thresh_dma_mode; + snps,axi-config = <&stmmac_axi_setup>; + snps,mtl-rx-config = <&mtl_rx_setup>; + snps,mtl-tx-config = <&mtl_tx_setup>; + snps,txpbl = <32>; + snps,rxpbl = <32>; + phy-handle = <&phy0>; + + stmmac_axi_setup: stmmac-axi-config { + snps,lpi_en; + snps,wr_osr_lmt = <0xf>; + snps,rd_osr_lmt = <0xf>; + snps,blen = <16 8 4 0 0 0 0>; + }; + + mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <4>; + snps,rx-sched-sp; + + queue0 { + snps,dcb-algorithm; + snps,priority = <0x1>; + snps,map-to-dma-channel = <0>; + }; + + queue1 { + snps,dcb-algorithm; + snps,priority = <0x2>; + snps,map-to-dma-channel = <1>; + }; + + queue2 { + snps,dcb-algorithm; + snps,priority = <0x4>; + snps,map-to-dma-channel = <2>; + }; + + queue3 { + snps,dcb-algorithm; + snps,priority = <0x8>; + snps,map-to-dma-channel = <3>; + }; + }; + + mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <4>; + + queue0 { + snps,dcb-algorithm; + snps,priority = <0x1>; + }; + + queue1 { + snps,dcb-algorithm; + snps,priority = <0x2>; + }; + + queue2 { + snps,dcb-algorithm; + snps,priority = <0x4>; + }; + + queue3 { + snps,dcb-algorithm; + snps,priority = <0x1>; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 78b3030dc56d..90b79283e228 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -75,6 +75,7 @@ properties: - qcom,sm8150-ethqos - renesas,r9a06g032-gmac - renesas,rzn1-gmac + - renesas,rzv2h-gbeth - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac @@ -114,19 +115,25 @@ properties: interrupts: minItems: 1 - items: - - description: Combined signal for various interrupt events - - description: The interrupt to manage the remote wake-up packet detection - - description: The interrupt that occurs when Rx exits the LPI state - - description: The interrupt that occurs when HW safety error triggered + maxItems: 11 interrupt-names: minItems: 1 + maxItems: 11 items: - - const: macirq - - enum: [eth_wake_irq, eth_lpi, sfty] - - enum: [eth_wake_irq, eth_lpi, sfty] - - enum: [eth_wake_irq, eth_lpi, sfty] + oneOf: + - description: Combined signal for various interrupt events + const: macirq + - description: The interrupt to manage the remote wake-up packet detection + const: eth_wake_irq + - description: The interrupt that occurs when Rx exits the LPI state + const: eth_lpi + - description: The interrupt that occurs when HW safety error triggered + const: sfty + - description: Per channel receive completion interrupt + pattern: '^rx-queue-[0-3]$' + - description: Per channel transmit completion interrupt + pattern: '^tx-queue-[0-3]$' clocks: minItems: 1 @@ -703,7 +710,7 @@ examples: }; }; - mdio0 { + mdio { #address-cells = <1>; #size-cells = <0>; compatible = "snps,dwmac-mdio"; diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml index 50c24248df26..28a0bddb9af9 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml @@ -122,6 +122,9 @@ properties: - free-running - recovered + mac-termination-ohms: + enum: [43, 44, 46, 48, 50, 53, 55, 58, 61, 65, 69, 73, 78, 84, 91, 99] + required: - reg @@ -137,6 +140,7 @@ examples: rx-internal-delay-ps = <1>; tx-internal-delay-ps = <1>; ti,gpio2-clk-out = "xi"; + mac-termination-ohms = <43>; }; }; diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index b11894fbaec4..7b3d948f187d 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -143,6 +143,8 @@ properties: label: description: label associated with this port + fixed-link: true + ti,mac-only: $ref: /schemas/types.yaml#/definitions/flag description: diff --git a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml index 4158673f723c..8359de7ad272 100644 --- a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml +++ b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml @@ -63,7 +63,7 @@ examples: compatible = "vertexcom,mse1021"; reg = <0>; interrupt-parent = <&gpio>; - interrupts = <23 IRQ_TYPE_EDGE_RISING>; + interrupts = <23 IRQ_TYPE_LEVEL_HIGH>; spi-cpha; spi-cpol; spi-max-frequency = <7142857>; diff --git a/Documentation/devicetree/bindings/net/via,vt8500-rhine.yaml b/Documentation/devicetree/bindings/net/via,vt8500-rhine.yaml new file mode 100644 index 000000000000..e663d5a2f014 --- /dev/null +++ b/Documentation/devicetree/bindings/net/via,vt8500-rhine.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/via,vt8500-rhine.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: VIA Rhine 10/100 Network Controller + +description: + VIA's Ethernet controller integrated into VIA VT8500, + WonderMedia WM8950 and related SoCs + +maintainers: + - Alexey Charkov <alchark@gmail.com> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + const: via,vt8500-rhine + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt deleted file mode 100644 index 334eca2bf937..000000000000 --- a/Documentation/devicetree/bindings/net/via-rhine.txt +++ /dev/null @@ -1,17 +0,0 @@ -* VIA Rhine 10/100 Network Controller - -Required properties: -- compatible : Should be "via,vt8500-rhine" for integrated - Rhine controllers found in VIA VT8500, WonderMedia WM8950 - and similar. These are listed as 1106:3106 rev. 0x84 on the - virtual PCI bus under vendor-provided kernels -- reg : Address and length of the io space -- interrupts : Should contain the controller interrupt line - -Examples: - -ethernet@d8004000 { - compatible = "via,vt8500-rhine"; - reg = <0xd8004000 0x100>; - interrupts = <10>; -}; diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml index a3607d55ef36..7c8100e59a6c 100644 --- a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml +++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml @@ -16,7 +16,7 @@ description: binding. allOf: - - $ref: ieee80211.yaml# + - $ref: /schemas/net/wireless/wireless-controller.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml new file mode 100644 index 000000000000..363a0ecb6ad9 --- /dev/null +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml @@ -0,0 +1,315 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/wireless/qcom,ipq5332-wifi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Technologies ath12k wireless devices (AHB) + +maintainers: + - Jeff Johnson <jjohnson@kernel.org> + +description: + Qualcomm Technologies IEEE 802.11be AHB devices. + +properties: + compatible: + enum: + - qcom,ipq5332-wifi + + reg: + maxItems: 1 + + clocks: + items: + - description: XO clock used for copy engine + + clock-names: + items: + - const: xo + + interrupts: + items: + - description: Fatal interrupt + - description: Ready interrupt + - description: Spawn acknowledge interrupt + - description: Stop acknowledge interrupt + - description: misc-pulse1 interrupt events + - description: misc-latch interrupt events + - description: sw exception interrupt events + - description: interrupt event for ring CE0 + - description: interrupt event for ring CE1 + - description: interrupt event for ring CE2 + - description: interrupt event for ring CE3 + - description: interrupt event for ring CE4 + - description: interrupt event for ring CE5 + - description: interrupt event for ring CE6 + - description: interrupt event for ring CE7 + - description: interrupt event for ring CE8 + - description: interrupt event for ring CE9 + - description: interrupt event for ring CE10 + - description: interrupt event for ring CE11 + - description: interrupt event for ring host2wbm-desc-feed + - description: interrupt event for ring host2reo-re-injection + - description: interrupt event for ring host2reo-command + - description: interrupt event for ring host2rxdma-monitor-ring1 + - description: interrupt event for ring reo2ost-exception + - description: interrupt event for ring wbm2host-rx-release + - description: interrupt event for ring reo2host-status + - description: interrupt event for ring reo2host-destination-ring4 + - description: interrupt event for ring reo2host-destination-ring3 + - description: interrupt event for ring reo2host-destination-ring2 + - description: interrupt event for ring reo2host-destination-ring1 + - description: interrupt event for ring rxdma2host-monitor-destination-mac3 + - description: interrupt event for ring rxdma2host-monitor-destination-mac2 + - description: interrupt event for ring rxdma2host-monitor-destination-mac1 + - description: interrupt event for ring host2rxdma-host-buf-ring-mac3 + - description: interrupt event for ring host2rxdma-host-buf-ring-mac2 + - description: interrupt event for ring host2rxdma-host-buf-ring-mac1 + - description: interrupt event for ring host2tcl-input-ring4 + - description: interrupt event for ring host2tcl-input-ring3 + - description: interrupt event for ring host2tcl-input-ring2 + - description: interrupt event for ring host2tcl-input-ring1 + - description: interrupt event for ring wbm2host-tx-completions-ring4 + - description: interrupt event for ring wbm2host-tx-completions-ring3 + - description: interrupt event for ring wbm2host-tx-completions-ring2 + - description: interrupt event for ring wbm2host-tx-completions-ring1 + - description: interrupt event for ring host2tx-monitor-ring1 + - description: interrupt event for ring txmon2host-monitor-destination-mac3 + - description: interrupt event for ring txmon2host-monitor-destination-mac2 + - description: interrupt event for ring txmon2host-monitor-destination-mac1 + - description: interrupt event for umac-reset + + interrupt-names: + items: + - const: fatal + - const: ready + - const: spawn + - const: stop-ack + - const: misc-pulse1 + - const: misc-latch + - const: sw-exception + - const: ce0 + - const: ce1 + - const: ce2 + - const: ce3 + - const: ce4 + - const: ce5 + - const: ce6 + - const: ce7 + - const: ce8 + - const: ce9 + - const: ce10 + - const: ce11 + - const: host2wbm-desc-feed + - const: host2reo-re-injection + - const: host2reo-command + - const: host2rxdma-monitor-ring1 + - const: reo2ost-exception + - const: wbm2host-rx-release + - const: reo2host-status + - const: reo2host-destination-ring4 + - const: reo2host-destination-ring3 + - const: reo2host-destination-ring2 + - const: reo2host-destination-ring1 + - const: rxdma2host-monitor-destination-mac3 + - const: rxdma2host-monitor-destination-mac2 + - const: rxdma2host-monitor-destination-mac1 + - const: host2rxdma-host-buf-ring-mac3 + - const: host2rxdma-host-buf-ring-mac2 + - const: host2rxdma-host-buf-ring-mac1 + - const: host2tcl-input-ring4 + - const: host2tcl-input-ring3 + - const: host2tcl-input-ring2 + - const: host2tcl-input-ring1 + - const: wbm2host-tx-completions-ring4 + - const: wbm2host-tx-completions-ring3 + - const: wbm2host-tx-completions-ring2 + - const: wbm2host-tx-completions-ring1 + - const: host2tx-monitor-ring1 + - const: txmon2host-monitor-destination-mac3 + - const: txmon2host-monitor-destination-mac2 + - const: txmon2host-monitor-destination-mac1 + - const: umac-reset + + memory-region: + description: + Memory regions used by the ath12k firmware. + items: + - description: Q6 memory region + - description: m3 dump memory region + - description: Q6 caldata memory region + - description: Multi Link Operation (MLO) Global memory region + + memory-region-names: + items: + - const: q6-region + - const: m3-dump + - const: q6-caldb + - const: mlo-global-mem + + qcom,calibration-variant: + $ref: /schemas/types.yaml#/definitions/string + description: + String to uniquely identify variant of the calibration data for designs + with colliding bus and device ids + + qcom,rproc: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the Qualcomm Hexagon DSP(q6 remote processor), which is utilized + for offloading WiFi processing tasks, this q6 remote processor operates in + conjunction with WiFi. + + qcom,smem-states: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: States used by the AP to signal the remote processor + items: + - description: Shutdown WCSS pd + - description: Stop WCSS pd + - description: Spawn WCSS pd + + qcom,smem-state-names: + description: + Names of the states used by the AP to signal the remote processor + items: + - const: shutdown + - const: stop + - const: spawn + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - memory-region + - memory-region-names + - qcom,rproc + - qcom,smem-states + - qcom,smem-state-names + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/clock/qcom,ipq5332-gcc.h> + + wifi0: wifi@c000000 { + compatible = "qcom,ipq5332-wifi"; + reg = <0x0c000000 0x1000000>; + clocks = <&gcc GCC_XO_CLK>; + clock-names = "xo"; + interrupts-extended = <&wcss_smp2p_in 8 IRQ_TYPE_NONE>, + <&wcss_smp2p_in 9 IRQ_TYPE_NONE>, + <&wcss_smp2p_in 12 IRQ_TYPE_NONE>, + <&wcss_smp2p_in 11 IRQ_TYPE_NONE>, + <&intc GIC_SPI 559 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 560 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 561 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 422 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 423 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 424 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 425 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 426 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 427 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 428 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 429 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 430 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 431 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 432 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 433 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 491 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 495 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 493 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 544 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 457 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 466 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 497 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 454 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 453 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 452 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 451 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 488 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 488 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 484 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 554 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 554 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 549 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 507 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 500 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 499 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 498 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 450 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 449 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 448 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 447 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 543 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 486 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 486 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 482 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 419 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "fatal", + "ready", + "spawn", + "stop-ack", + "misc-pulse1", + "misc-latch", + "sw-exception", + "ce0", + "ce1", + "ce2", + "ce3", + "ce4", + "ce5", + "ce6", + "ce7", + "ce8", + "ce9", + "ce10", + "ce11", + "host2wbm-desc-feed", + "host2reo-re-injection", + "host2reo-command", + "host2rxdma-monitor-ring1", + "reo2ost-exception", + "wbm2host-rx-release", + "reo2host-status", + "reo2host-destination-ring4", + "reo2host-destination-ring3", + "reo2host-destination-ring2", + "reo2host-destination-ring1", + "rxdma2host-monitor-destination-mac3", + "rxdma2host-monitor-destination-mac2", + "rxdma2host-monitor-destination-mac1", + "host2rxdma-host-buf-ring-mac3", + "host2rxdma-host-buf-ring-mac2", + "host2rxdma-host-buf-ring-mac1", + "host2tcl-input-ring4", + "host2tcl-input-ring3", + "host2tcl-input-ring2", + "host2tcl-input-ring1", + "wbm2host-tx-completions-ring4", + "wbm2host-tx-completions-ring3", + "wbm2host-tx-completions-ring2", + "wbm2host-tx-completions-ring1", + "host2tx-monitor-ring1", + "txmon2host-monitor-destination-mac3", + "txmon2host-monitor-destination-mac2", + "txmon2host-monitor-destination-mac1", + "umac-reset"; + + memory-region = <&q6_region>, <&m3_dump>, <&q6_caldb>, <&mlo_mem>; + memory-region-names = "q6-region", "m3-dump", "q6-caldb", "mlo-global-mem"; + qcom,calibration-variant = "RDP441_1"; + qcom,rproc = <&q6v5_wcss>; + qcom,smem-states = <&wcss_smp2p_out 8>, + <&wcss_smp2p_out 9>, + <&wcss_smp2p_out 10>; + qcom,smem-state-names = "shutdown", + "stop", + "spawn"; + }; diff --git a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml index 84e5659e50ef..6c0888ae4c4e 100644 --- a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml +++ b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml @@ -71,15 +71,12 @@ properties: "Platform Data Set" in Silabs jargon). Default depends of "compatible" string. For "silabs,wf200", the default is 'wf200.pds'. - local-mac-address: true - - mac-address: true - required: - compatible - reg allOf: + - $ref: /schemas/net/wireless/wireless-controller.yaml# - $ref: /schemas/spi/spi-peripheral-props.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/net/wireless/wireless-controller.yaml b/Documentation/devicetree/bindings/net/wireless/wireless-controller.yaml new file mode 100644 index 000000000000..7379f6c1aa05 --- /dev/null +++ b/Documentation/devicetree/bindings/net/wireless/wireless-controller.yaml @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/wireless/wireless-controller.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Wireless Controller Common Properties + +maintainers: + - Lorenzo Bianconi <lorenzo@kernel.org> + +properties: + $nodename: + pattern: "^wifi(@.*)?$" + +allOf: + - $ref: ieee80211.yaml# + - $ref: /schemas/net/network-class.yaml# + +additionalProperties: true + +... + diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml index fd6db0ca98eb..4fcae6bedfff 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml @@ -54,7 +54,7 @@ properties: - compatible wifi: - additionalProperties: false + unevaluatedProperties: false type: object properties: compatible: @@ -88,6 +88,9 @@ properties: - qcom,smem-states - qcom,smem-state-names + allOf: + - $ref: /schemas/net/wireless/wireless-controller.yaml# + required: - compatible - qcom,mmio diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 96fa1f1522ed..5a234e9b5fa2 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -148,6 +148,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index a8c5b521937d..4cbfe666e6f5 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -193,6 +193,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 1b0772c8e333..e34bf23897fa 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -207,6 +207,9 @@ properties: attr-max-name: description: The explicit name for last member of attribute enum. type: string + header: + description: For C-compatible languages, header which already defines this attribute set. + type: string # End genetlink-c attributes: description: List of attributes in the space. diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index bd9726269b4f..05fee1b7fe19 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -202,6 +202,28 @@ definitions: name: exception - name: control + - + type: enum + name: var-attr-type + entries: + - + name: u8 + value: 1 + - + name: u16 + - + name: u32 + - + name: u64 + - + name: string + - + name: flag + - + name: nul_string + value: 10 + - + name: binary attribute-sets: - @@ -498,6 +520,7 @@ attribute-sets: - name: param-type type: u8 + enum: var-attr-type # TODO: fill in the attributes in between @@ -592,6 +615,7 @@ attribute-sets: - name: fmsg-obj-value-type type: u8 + enum: var-attr-type # TODO: fill in the attributes in between diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index f5e0750ab71d..c0ef6d0d7786 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -743,6 +743,18 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - + name: bind-tx + doc: Bind dmabuf to netdev for TX + attribute-set: dmabuf + do: + request: + attributes: + - ifindex + - fd + reply: + attributes: + - id kernel-family: headers: [ "net/netdev_netlink.h"] diff --git a/Documentation/netlink/specs/nl80211.yaml b/Documentation/netlink/specs/nl80211.yaml index 1ec49c3562cd..3611b11a7d8f 100644 --- a/Documentation/netlink/specs/nl80211.yaml +++ b/Documentation/netlink/specs/nl80211.yaml @@ -204,71 +204,6 @@ definitions: - sched-scan-random-mac-addr - no-random-mac-addr - - name: ieee80211-mcs-info - type: struct - members: - - - name: rx-mask - type: binary - len: 10 - - - name: rx-highest - type: u16 - byte-order: little-endian - - - name: tx-params - type: u8 - - - name: reserved - type: binary - len: 3 - - - name: ieee80211-vht-mcs-info - type: struct - members: - - - name: rx-mcs-map - type: u16 - byte-order: little-endian - - - name: rx-highest - type: u16 - byte-order: little-endian - - - name: tx-mcs-map - type: u16 - byte-order: little-endian - - - name: tx-highest - type: u16 - byte-order: little-endian - - - name: ieee80211-ht-cap - type: struct - members: - - - name: cap-info - type: u16 - byte-order: little-endian - - - name: ampdu-params-info - type: u8 - - - name: mcs - type: binary - struct: ieee80211-mcs-info - - - name: extended-ht-cap-info - type: u16 - byte-order: little-endian - - - name: tx-bf-cap-info - type: u32 - byte-order: little-endian - - - name: antenna-selection-info - type: u8 - - name: channel-type type: enum entries: @@ -761,7 +696,6 @@ attribute-sets: - name: ht-capability-mask type: binary - struct: ieee80211-ht-cap - name: noack-map type: u16 @@ -1382,7 +1316,6 @@ attribute-sets: - name: ht-mcs-set type: binary - struct: ieee80211-mcs-info - name: ht-capa type: u16 @@ -1395,7 +1328,6 @@ attribute-sets: - name: vht-mcs-set type: binary - struct: ieee80211-vht-mcs-info - name: vht-capa type: u32 diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml new file mode 100644 index 000000000000..096c51f0c69a --- /dev/null +++ b/Documentation/netlink/specs/ovpn.yaml @@ -0,0 +1,367 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +# +# Author: Antonio Quartulli <antonio@openvpn.net> +# +# Copyright (c) 2024-2025, OpenVPN Inc. +# + +name: ovpn + +protocol: genetlink + +doc: Netlink protocol to control OpenVPN network devices + +definitions: + - + type: const + name: nonce-tail-size + value: 8 + - + type: enum + name: cipher-alg + entries: [ none, aes-gcm, chacha20-poly1305 ] + - + type: enum + name: del-peer-reason + entries: + - teardown + - userspace + - expired + - transport-error + - transport-disconnect + - + type: enum + name: key-slot + entries: [ primary, secondary ] + +attribute-sets: + - + name: peer + attributes: + - + name: id + type: u32 + doc: >- + The unique ID of the peer in the device context. To be used to identify + peers during operations for a specific device + checks: + max: 0xFFFFFF + - + name: remote-ipv4 + type: u32 + doc: The remote IPv4 address of the peer + byte-order: big-endian + display-hint: ipv4 + - + name: remote-ipv6 + type: binary + doc: The remote IPv6 address of the peer + display-hint: ipv6 + checks: + exact-len: 16 + - + name: remote-ipv6-scope-id + type: u32 + doc: The scope id of the remote IPv6 address of the peer (RFC2553) + - + name: remote-port + type: u16 + doc: The remote port of the peer + byte-order: big-endian + checks: + min: 1 + - + name: socket + type: u32 + doc: The socket to be used to communicate with the peer + - + name: socket-netnsid + type: s32 + doc: The ID of the netns the socket assigned to this peer lives in + - + name: vpn-ipv4 + type: u32 + doc: The IPv4 address assigned to the peer by the server + byte-order: big-endian + display-hint: ipv4 + - + name: vpn-ipv6 + type: binary + doc: The IPv6 address assigned to the peer by the server + display-hint: ipv6 + checks: + exact-len: 16 + - + name: local-ipv4 + type: u32 + doc: The local IPv4 to be used to send packets to the peer (UDP only) + byte-order: big-endian + display-hint: ipv4 + - + name: local-ipv6 + type: binary + doc: The local IPv6 to be used to send packets to the peer (UDP only) + display-hint: ipv6 + checks: + exact-len: 16 + - + name: local-port + type: u16 + doc: The local port to be used to send packets to the peer (UDP only) + byte-order: big-endian + checks: + min: 1 + - + name: keepalive-interval + type: u32 + doc: >- + The number of seconds after which a keep alive message is sent to the + peer + - + name: keepalive-timeout + type: u32 + doc: >- + The number of seconds from the last activity after which the peer is + assumed dead + - + name: del-reason + type: u32 + doc: The reason why a peer was deleted + enum: del-peer-reason + - + name: vpn-rx-bytes + type: uint + doc: Number of bytes received over the tunnel + - + name: vpn-tx-bytes + type: uint + doc: Number of bytes transmitted over the tunnel + - + name: vpn-rx-packets + type: uint + doc: Number of packets received over the tunnel + - + name: vpn-tx-packets + type: uint + doc: Number of packets transmitted over the tunnel + - + name: link-rx-bytes + type: uint + doc: Number of bytes received at the transport level + - + name: link-tx-bytes + type: uint + doc: Number of bytes transmitted at the transport level + - + name: link-rx-packets + type: uint + doc: Number of packets received at the transport level + - + name: link-tx-packets + type: uint + doc: Number of packets transmitted at the transport level + - + name: keyconf + attributes: + - + name: peer-id + type: u32 + doc: >- + The unique ID of the peer in the device context. To be used to + identify peers during key operations + checks: + max: 0xFFFFFF + - + name: slot + type: u32 + doc: The slot where the key should be stored + enum: key-slot + - + name: key-id + doc: >- + The unique ID of the key in the peer context. Used to fetch the + correct key upon decryption + type: u32 + checks: + max: 7 + - + name: cipher-alg + type: u32 + doc: The cipher to be used when communicating with the peer + enum: cipher-alg + - + name: encrypt-dir + type: nest + doc: Key material for encrypt direction + nested-attributes: keydir + - + name: decrypt-dir + type: nest + doc: Key material for decrypt direction + nested-attributes: keydir + - + name: keydir + attributes: + - + name: cipher-key + type: binary + doc: The actual key to be used by the cipher + checks: + max-len: 256 + - + name: nonce-tail + type: binary + doc: >- + Random nonce to be concatenated to the packet ID, in order to + obtain the actual cipher IV + checks: + exact-len: nonce-tail-size + - + name: ovpn + attributes: + - + name: ifindex + type: u32 + doc: Index of the ovpn interface to operate on + - + name: peer + type: nest + doc: >- + The peer object containing the attributed of interest for the specific + operation + nested-attributes: peer + - + name: keyconf + type: nest + doc: Peer specific cipher configuration + nested-attributes: keyconf + +operations: + list: + - + name: peer-new + attribute-set: ovpn + flags: [ admin-perm ] + doc: Add a remote peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - peer + - + name: peer-set + attribute-set: ovpn + flags: [ admin-perm ] + doc: modify a remote peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - peer + - + name: peer-get + attribute-set: ovpn + flags: [ admin-perm ] + doc: Retrieve data about existing remote peers (or a specific one) + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - peer + reply: + attributes: + - peer + dump: + request: + attributes: + - ifindex + reply: + attributes: + - peer + - + name: peer-del + attribute-set: ovpn + flags: [ admin-perm ] + doc: Delete existing remote peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - peer + - + name: peer-del-ntf + doc: Notification about a peer being deleted + notify: peer-get + mcgrp: peers + + - + name: key-new + attribute-set: ovpn + flags: [ admin-perm ] + doc: Add a cipher key for a specific peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - keyconf + - + name: key-get + attribute-set: ovpn + flags: [ admin-perm ] + doc: Retrieve non-sensitive data about peer key and cipher + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - keyconf + reply: + attributes: + - keyconf + - + name: key-swap + attribute-set: ovpn + flags: [ admin-perm ] + doc: Swap primary and secondary session keys for a specific peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - keyconf + - + name: key-swap-ntf + notify: key-get + doc: >- + Notification about key having exhausted its IV space and requiring + renegotiation + mcgrp: peers + - + name: key-del + attribute-set: ovpn + flags: [ admin-perm ] + doc: Delete cipher key for a specific peer + do: + pre: ovpn-nl-pre-doit + post: ovpn-nl-post-doit + request: + attributes: + - ifindex + - keyconf + +mcast-groups: + list: + - + name: peers diff --git a/Documentation/netlink/specs/ovs_datapath.yaml b/Documentation/netlink/specs/ovs_datapath.yaml index edc8c95ca6f5..df6a8f94975e 100644 --- a/Documentation/netlink/specs/ovs_datapath.yaml +++ b/Documentation/netlink/specs/ovs_datapath.yaml @@ -35,8 +35,7 @@ definitions: name: dispatch-upcall-per-cpu doc: Allow per-cpu dispatch of upcalls - - name: datapath-stats - enum-name: ovs-dp-stats + name: ovs-dp-stats type: struct members: - @@ -52,8 +51,7 @@ definitions: name: n-flows type: u64 - - name: megaflow-stats - enum-name: ovs-dp-megaflow-stats + name: ovs-dp-megaflow-stats type: struct members: - @@ -88,11 +86,11 @@ attribute-sets: - name: stats type: binary - struct: datapath-stats + struct: ovs-dp-stats - name: megaflow-stats type: binary - struct: megaflow-stats + struct: ovs-dp-megaflow-stats - name: user-features type: u32 diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml index b538bb99ee9b..306da6bb842d 100644 --- a/Documentation/netlink/specs/ovs_vport.yaml +++ b/Documentation/netlink/specs/ovs_vport.yaml @@ -23,9 +23,8 @@ definitions: name-prefix: ovs-vport-type- entries: [ unspec, netdev, internal, gre, vxlan, geneve ] - - name: vport-stats + name: ovs-vport-stats type: struct - enum-name: ovs-vport-stats members: - name: rx-packets @@ -106,7 +105,7 @@ attribute-sets: - name: stats type: binary - struct: vport-stats + struct: ovs-vport-stats - name: pad type: unused diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt-addr.yaml index df6b23f06a22..4f86aa1075da 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt-addr.yaml @@ -2,6 +2,7 @@ name: rt-addr protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -49,6 +50,8 @@ definitions: - name: ifa-flags type: flags + name-prefix: ifa-f- + enum-name: entries: - name: secondary @@ -124,6 +127,7 @@ attribute-sets: operations: fixed-header: ifaddrmsg enum-model: directional + name-prefix: rtm- list: - name: newaddr @@ -133,11 +137,6 @@ operations: request: value: 20 attributes: &ifaddr-all - - ifa-family - - ifa-flags - - ifa-prefixlen - - ifa-scope - - ifa-index - address - label - local @@ -150,11 +149,6 @@ operations: request: value: 21 attributes: - - ifa-family - - ifa-flags - - ifa-prefixlen - - ifa-scope - - ifa-index - address - local - @@ -164,8 +158,7 @@ operations: dump: request: value: 22 - attributes: - - ifa-index + attributes: [] reply: value: 20 attributes: *ifaddr-all @@ -177,9 +170,7 @@ operations: do: request: value: 58 - attributes: - - ifa-family - - ifa-index + attributes: [] reply: value: 58 attributes: &mcaddr-attrs @@ -188,8 +179,7 @@ operations: dump: request: value: 58 - attributes: - - ifa-family + attributes: [] reply: value: 58 attributes: *mcaddr-attrs diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt-link.yaml index 6b9d5ee87d93..7f91f474ff25 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -2,6 +2,7 @@ name: rt-link protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -11,6 +12,9 @@ definitions: - name: ifinfo-flags type: flags + header: linux/if.h + enum-name: net-device-flags + name-prefix: iff- entries: - name: up @@ -53,6 +57,7 @@ definitions: - name: vlan-protocols type: enum + enum-name: entries: - name: 8021q @@ -299,419 +304,294 @@ definitions: type: u8 - name: ipv4-devconf - type: struct - members: + enum-name: + type: enum + entries: - name: forwarding - type: u32 - name: mc-forwarding - type: u32 - name: proxy-arp - type: u32 - name: accept-redirects - type: u32 - name: secure-redirects - type: u32 - name: send-redirects - type: u32 - name: shared-media - type: u32 - name: rp-filter - type: u32 - name: accept-source-route - type: u32 - name: bootp-relay - type: u32 - name: log-martians - type: u32 - name: tag - type: u32 - name: arpfilter - type: u32 - name: medium-id - type: u32 - name: noxfrm - type: u32 - name: nopolicy - type: u32 - name: force-igmp-version - type: u32 - name: arp-announce - type: u32 - name: arp-ignore - type: u32 - name: promote-secondaries - type: u32 - name: arp-accept - type: u32 - name: arp-notify - type: u32 - name: accept-local - type: u32 - name: src-vmark - type: u32 - name: proxy-arp-pvlan - type: u32 - name: route-localnet - type: u32 - name: igmpv2-unsolicited-report-interval - type: u32 - name: igmpv3-unsolicited-report-interval - type: u32 - name: ignore-routes-with-linkdown - type: u32 - name: drop-unicast-in-l2-multicast - type: u32 - name: drop-gratuitous-arp - type: u32 - name: bc-forwarding - type: u32 - name: arp-evict-nocarrier - type: u32 - name: ipv6-devconf - type: struct - members: + enum-name: + type: enum + entries: - name: forwarding - type: u32 - name: hoplimit - type: u32 - name: mtu6 - type: u32 - name: accept-ra - type: u32 - name: accept-redirects - type: u32 - name: autoconf - type: u32 - name: dad-transmits - type: u32 - name: rtr-solicits - type: u32 - name: rtr-solicit-interval - type: u32 - name: rtr-solicit-delay - type: u32 - name: use-tempaddr - type: u32 - name: temp-valid-lft - type: u32 - name: temp-prefered-lft - type: u32 - name: regen-max-retry - type: u32 - name: max-desync-factor - type: u32 - name: max-addresses - type: u32 - name: force-mld-version - type: u32 - name: accept-ra-defrtr - type: u32 - name: accept-ra-pinfo - type: u32 - name: accept-ra-rtr-pref - type: u32 - name: rtr-probe-interval - type: u32 - name: accept-ra-rt-info-max-plen - type: u32 - name: proxy-ndp - type: u32 - name: optimistic-dad - type: u32 - name: accept-source-route - type: u32 - name: mc-forwarding - type: u32 - name: disable-ipv6 - type: u32 - name: accept-dad - type: u32 - name: force-tllao - type: u32 - name: ndisc-notify - type: u32 - name: mldv1-unsolicited-report-interval - type: u32 - name: mldv2-unsolicited-report-interval - type: u32 - name: suppress-frag-ndisc - type: u32 - name: accept-ra-from-local - type: u32 - name: use-optimistic - type: u32 - name: accept-ra-mtu - type: u32 - name: stable-secret - type: u32 - name: use-oif-addrs-only - type: u32 - name: accept-ra-min-hop-limit - type: u32 - name: ignore-routes-with-linkdown - type: u32 - name: drop-unicast-in-l2-multicast - type: u32 - name: drop-unsolicited-na - type: u32 - name: keep-addr-on-down - type: u32 - name: rtr-solicit-max-interval - type: u32 - name: seg6-enabled - type: u32 - name: seg6-require-hmac - type: u32 - name: enhanced-dad - type: u32 - name: addr-gen-mode - type: u8 - name: disable-policy - type: u32 - name: accept-ra-rt-info-min-plen - type: u32 - name: ndisc-tclass - type: u32 - name: rpl-seg-enabled - type: u32 - name: ra-defrtr-metric - type: u32 - name: ioam6-enabled - type: u32 - name: ioam6-id - type: u32 - name: ioam6-id-wide - type: u32 - name: ndisc-evict-nocarrier - type: u32 - name: accept-untracked-na - type: u32 - name: ifla-icmp6-stats - type: struct - members: + enum-name: + type: enum + entries: + - + name: num - name: inmsgs - type: u64 - name: inerrors - type: u64 - name: outmsgs - type: u64 - name: outerrors - type: u64 - name: csumerrors - type: u64 - name: ratelimithost - type: u64 - name: ifla-inet6-stats - type: struct - members: + enum-name: + type: enum + entries: + - + name: num - name: inpkts - type: u64 - name: inoctets - type: u64 - name: indelivers - type: u64 - name: outforwdatagrams - type: u64 - name: outpkts - type: u64 - name: outoctets - type: u64 - name: inhdrerrors - type: u64 - name: intoobigerrors - type: u64 - name: innoroutes - type: u64 - name: inaddrerrors - type: u64 - name: inunknownprotos - type: u64 - name: intruncatedpkts - type: u64 - name: indiscards - type: u64 - name: outdiscards - type: u64 - name: outnoroutes - type: u64 - name: reasmtimeout - type: u64 - name: reasmreqds - type: u64 - name: reasmoks - type: u64 - name: reasmfails - type: u64 - name: fragoks - type: u64 - name: fragfails - type: u64 - name: fragcreates - type: u64 - name: inmcastpkts - type: u64 - name: outmcastpkts - type: u64 - name: inbcastpkts - type: u64 - name: outbcastpkts - type: u64 - name: inmcastoctets - type: u64 - name: outmcastoctets - type: u64 - name: inbcastoctets - type: u64 - name: outbcastoctets - type: u64 - name: csumerrors - type: u64 - name: noectpkts - type: u64 - name: ect1-pkts - type: u64 - name: ect0-pkts - type: u64 - name: cepkts - type: u64 - name: reasm-overlaps - type: u64 - name: br-boolopt-multi type: struct members: @@ -754,6 +634,7 @@ definitions: - name: vlan-flags type: flags + enum-name: entries: - reorder-hdr - gvrp @@ -840,6 +721,7 @@ definitions: - name: ifla-vf-link-state-enum type: enum + enum-name: entries: - auto - enable @@ -906,6 +788,7 @@ definitions: - name: rtext-filter type: flags + enum-name: entries: - vf - brvlan @@ -918,6 +801,7 @@ definitions: - name: netkit-policy type: enum + enum-name: entries: - name: forward @@ -928,6 +812,7 @@ definitions: - name: netkit-mode type: enum + enum-name: netkit-mode entries: - name: l2 - name: l3 @@ -935,9 +820,16 @@ definitions: - name: netkit-scrub type: enum + enum-name: entries: - name: none - name: default + - + name: ovpn-mode + type: enum + entries: + - p2p + - mp attribute-sets: - @@ -1171,24 +1063,27 @@ attribute-sets: multi-attr: true - name: af-spec-attrs + name-prefix: af- + attr-max-name: af-max attributes: - - name: "inet" + name: inet type: nest value: 2 nested-attributes: ifla-attrs - - name: "inet6" + name: inet6 type: nest value: 10 nested-attributes: ifla6-attrs - - name: "mctp" + name: mctp type: nest value: 45 nested-attributes: mctp-attrs - name: vfinfo-list-attrs + name-prefix: ifla-vf- attributes: - name: info @@ -1197,6 +1092,7 @@ attribute-sets: multi-attr: true - name: vfinfo-attrs + name-prefix: ifla-vf- attributes: - name: mac @@ -1251,6 +1147,7 @@ attribute-sets: type: binary - name: vf-stats-attrs + name-prefix: ifla-vf-stats- attributes: - name: rx-packets @@ -1282,6 +1179,8 @@ attribute-sets: type: u64 - name: vf-vlan-attrs + name-prefix: ifla-vf-vlan- + attr-max-name: ifla-vf-vlan-info-max attributes: - name: info @@ -1290,12 +1189,15 @@ attribute-sets: multi-attr: true - name: vf-ports-attrs + name-prefix: ifla- attributes: [] - name: port-self-attrs + name-prefix: ifla- attributes: [] - name: linkinfo-attrs + name-prefix: ifla-info- attributes: - name: kind @@ -1420,6 +1322,8 @@ attribute-sets: type: indexed-array sub-type: binary display-hint: ipv6 + checks: + exact-len: 16 - name: coupled-control type: u8 @@ -1849,6 +1753,7 @@ attribute-sets: - name: linkinfo-vti-attrs name-prefix: ifla-vti- + header: linux/if_tunnel.h attributes: - name: link @@ -2101,7 +2006,7 @@ attribute-sets: byte-order: big-endian - name: ifla-vlan-qos - name-prefix: ifla-vlan-qos + name-prefix: ifla-vlan-qos- attributes: - name: mapping @@ -2117,6 +2022,7 @@ attribute-sets: type: u32 - name: xdp-attrs + name-prefix: ifla-xdp- attributes: - name: fd @@ -2144,13 +2050,16 @@ attribute-sets: type: s32 - name: ifla-attrs + name-prefix: ifla-inet- attributes: - name: conf type: binary - struct: ipv4-devconf + sub-type: u32 + doc: u32 indexed by ipv4-devconf - 1 on output, on input it's a nest - name: ifla6-attrs + name-prefix: ifla-inet6- attributes: - name: flags @@ -2158,11 +2067,12 @@ attribute-sets: - name: conf type: binary - struct: ipv6-devconf + sub-type: u32 + doc: u32 indexed by ipv6-devconf - 1 on output, on input it's a nest - name: stats type: binary - struct: ifla-inet6-stats + sub-type: u64 - name: mcast type: binary @@ -2173,7 +2083,7 @@ attribute-sets: - name: icmp6stats type: binary - struct: ifla-icmp6-stats + sub-type: u64 - name: token type: binary @@ -2216,6 +2126,7 @@ attribute-sets: type: binary - name: link-offload-xstats + name-prefix: ifla-offload-xstats- attributes: - name: cpu-hit @@ -2230,6 +2141,7 @@ attribute-sets: type: binary - name: hw-s-info-one + name-prefix: ifla-offload-xstats-hw-s-info- attributes: - name: request @@ -2239,6 +2151,8 @@ attribute-sets: type: u8 - name: link-dpll-pin-attrs + name-prefix: dpll-a- + header: linux/dpll.h attributes: - name: id @@ -2279,6 +2193,13 @@ attribute-sets: - name: tailroom type: u16 + - + name: linkinfo-ovpn-attrs + attributes: + - + name: mode + type: u8 + enum: ovpn-mode sub-messages: - @@ -2329,6 +2250,9 @@ sub-messages: - value: netkit attribute-set: linkinfo-netkit-attrs + - + value: ovpn + attribute-set: linkinfo-ovpn-attrs - name: linkinfo-member-data-msg formats: @@ -2341,6 +2265,7 @@ sub-messages: operations: enum-model: directional + name-prefix: rtm- list: - name: newlink @@ -2351,7 +2276,6 @@ operations: request: value: 16 attributes: &link-new-attrs - - ifi-index - ifname - net-ns-pid - net-ns-fd @@ -2367,7 +2291,6 @@ operations: - txqlen - operstate - linkmode - - group - gso-max-size - gso-max-segs - gro-max-size @@ -2375,6 +2298,12 @@ operations: - gro-ipv4-max-size - af-spec - + name: newlink-ntf + doc: Notify that a link has been created + value: 16 + notify: getlink + fixed-header: ifinfomsg + - name: dellink doc: Delete an existing link. attribute-set: link-attrs @@ -2383,7 +2312,6 @@ operations: request: value: 17 attributes: - - ifi-index - ifname - name: getlink @@ -2394,7 +2322,6 @@ operations: request: value: 18 attributes: - - ifi-index - ifname - alt-ifname - ext-mask @@ -2402,11 +2329,6 @@ operations: reply: value: 16 attributes: &link-all-attrs - - ifi-family - - ifi-type - - ifi-index - - ifi-flags - - ifi-change - address - broadcast - ifname @@ -2452,7 +2374,6 @@ operations: - xdp - event - new-netnsid - - if-netnsid - target-netnsid - carrier-up-count - carrier-down-count @@ -2499,14 +2420,9 @@ operations: do: request: value: 94 - attributes: - - ifindex reply: value: 92 attributes: &link-stats-attrs - - family - - ifindex - - filter-mask - link-64 - link-xstats - link-xstats-slave diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt-neigh.yaml index a843caa72259..e9cba164e3d1 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt-neigh.yaml @@ -2,6 +2,7 @@ name: rt-neigh protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -48,6 +49,7 @@ definitions: - name: nud-state type: flags + enum-name: entries: - incomplete - reachable @@ -60,6 +62,7 @@ definitions: - name: ntf-flags type: flags + enum-name: entries: - use - self @@ -72,12 +75,14 @@ definitions: - name: ntf-ext-flags type: flags + enum-name: entries: - managed - locked - name: rtm-type type: enum + enum-name: entries: - unspec - unicast @@ -179,6 +184,7 @@ definitions: attribute-sets: - name: neighbour-attrs + name-prefix: nda- attributes: - name: unspec @@ -241,6 +247,7 @@ attribute-sets: type: u8 - name: ndt-attrs + name-prefix: ndta- attributes: - name: name @@ -274,6 +281,7 @@ attribute-sets: type: pad - name: ndtpa-attrs + name-prefix: ndtpa- attributes: - name: ifindex @@ -335,6 +343,7 @@ attribute-sets: operations: enum-model: directional + name-prefix: rtm- list: - name: newneigh @@ -372,7 +381,7 @@ operations: name: delneigh-ntf doc: Notify a neighbour deletion value: 29 - notify: delneigh + notify: getneigh fixed-header: ndmsg - name: getneigh @@ -393,6 +402,7 @@ operations: - ifindex - master reply: + value: 28 attributes: *neighbour-all - name: newneigh-ntf diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt-route.yaml index 292469c7d4b9..800f3a823d47 100644 --- a/Documentation/netlink/specs/rt_route.yaml +++ b/Documentation/netlink/specs/rt-route.yaml @@ -2,6 +2,7 @@ name: rt-route protocol: netlink-raw +uapi-header: linux/rtnetlink.h protonum: 0 doc: @@ -11,6 +12,7 @@ definitions: - name: rtm-type name-prefix: rtn- + enum-name: type: enum entries: - unspec @@ -245,21 +247,19 @@ attribute-sets: operations: enum-model: directional + fixed-header: rtmsg + name-prefix: rtm- list: - name: getroute doc: Dump route information. attribute-set: route-attrs - fixed-header: rtmsg do: request: value: 26 attributes: - - rtm-family - src - - rtm-src-len - dst - - rtm-dst-len - iif - oif - ip-proto @@ -271,15 +271,6 @@ operations: reply: value: 24 attributes: &all-route-attrs - - rtm-family - - rtm-dst-len - - rtm-src-len - - rtm-tos - - rtm-table - - rtm-protocol - - rtm-scope - - rtm-type - - rtm-flags - dst - src - iif @@ -311,8 +302,7 @@ operations: dump: request: value: 26 - attributes: - - rtm-family + attributes: [] reply: value: 24 attributes: *all-route-attrs @@ -320,7 +310,6 @@ operations: name: newroute doc: Create a new route attribute-set: route-attrs - fixed-header: rtmsg do: request: value: 24 @@ -329,7 +318,6 @@ operations: name: delroute doc: Delete an existing route attribute-set: route-attrs - fixed-header: rtmsg do: request: value: 25 diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt-rule.yaml index de0938d36541..003707ca4a3e 100644 --- a/Documentation/netlink/specs/rt_rule.yaml +++ b/Documentation/netlink/specs/rt-rule.yaml @@ -2,6 +2,7 @@ name: rt-rule protocol: netlink-raw +uapi-header: linux/fib_rules.h protonum: 0 doc: @@ -56,6 +57,7 @@ definitions: - name: fr-act type: enum + enum-name: entries: - unspec - to-tbl @@ -90,6 +92,7 @@ definitions: attribute-sets: - name: fib-rule-attrs + name-prefix: fra- attributes: - name: dst @@ -198,6 +201,7 @@ attribute-sets: operations: enum-model: directional fixed-header: fib-rule-hdr + name-prefix: rtm- list: - name: newrule @@ -234,7 +238,7 @@ operations: name: newrule-ntf doc: Notify a rule creation value: 32 - notify: newrule + notify: getrule - name: delrule doc: Remove an existing FIB rule @@ -247,7 +251,7 @@ operations: name: delrule-ntf doc: Notify a rule deletion value: 33 - notify: delrule + notify: getrule - name: getrule doc: Dump all FIB rules diff --git a/Documentation/networking/dccp.rst b/Documentation/networking/dccp.rst deleted file mode 100644 index 91e5c33ba3ff..000000000000 --- a/Documentation/networking/dccp.rst +++ /dev/null @@ -1,219 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -============= -DCCP protocol -============= - - -.. Contents - - Introduction - - Missing features - - Socket options - - Sysctl variables - - IOCTLs - - Other tunables - - Notes - - -Introduction -============ -Datagram Congestion Control Protocol (DCCP) is an unreliable, connection -oriented protocol designed to solve issues present in UDP and TCP, particularly -for real-time and multimedia (streaming) traffic. -It divides into a base protocol (RFC 4340) and pluggable congestion control -modules called CCIDs. Like pluggable TCP congestion control, at least one CCID -needs to be enabled in order for the protocol to function properly. In the Linux -implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as -the TCP-friendly CCID3 (RFC 4342), are optional. -For a brief introduction to CCIDs and suggestions for choosing a CCID to match -given applications, see section 10 of RFC 4340. - -It has a base protocol and pluggable congestion control IDs (CCIDs). - -DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol -is at http://www.ietf.org/html.charters/dccp-charter.html - - -Missing features -================ -The Linux DCCP implementation does not currently support all the features that are -specified in RFCs 4340...42. - -The known bugs are at: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP - -For more up-to-date versions of the DCCP implementation, please consider using -the experimental DCCP test tree; instructions for checking this out are on: -http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree - - -Socket options -============== -DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes -a policy ID as argument and can only be set before the connection (i.e. changes -during an established connection are not supported). Currently, two policies are -defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special, -and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an -u32 priority value as ancillary data to sendmsg(), where higher numbers indicate -a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to -be formatted using a cmsg(3) message header filled in as follows:: - - cmsg->cmsg_level = SOL_DCCP; - cmsg->cmsg_type = DCCP_SCM_PRIORITY; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */ - -DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero -value is always interpreted as unbounded queue length. If different from zero, -the interpretation of this parameter depends on the current dequeuing policy -(see above): the "simple" policy will enforce a fixed queue size by returning -EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the -lowest-priority packet first. The default value for this parameter is -initialised from /proc/sys/net/dccp/default/tx_qlen. - -DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of -service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, -the socket will fall back to 0 (which means that no meaningful service code -is present). On active sockets this is set before connect(); specifying more -than one code has no effect (all subsequent service codes are ignored). The -case is different for passive sockets, where multiple service codes (up to 32) -can be set before calling bind(). - -DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet -size (application payload size) in bytes, see RFC 4340, section 14. - -DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs -supported by the endpoint. The option value is an array of type uint8_t whose -size is passed as option length. The minimum array size is 4 elements, the -value returned in the optlen argument always reflects the true number of -built-in CCIDs. - -DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same -time, combining the operation of the next two socket options. This option is -preferable over the latter two, since often applications will use the same -type of CCID for both directions; and mixed use of CCIDs is not currently well -understood. This socket option takes as argument at least one uint8_t value, or -an array of uint8_t values, which must match available CCIDS (see above). CCIDs -must be registered on the socket before calling connect() or listen(). - -DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets -the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID. -Please note that the getsockopt argument type here is ``int``, not uint8_t. - -DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID. - -DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold -timewait state when closing the connection (RFC 4340, 8.3). The usual case is -that the closing server sends a CloseReq, whereupon the client holds timewait -state. When this boolean socket option is on, the server sends a Close instead -and will enter TIMEWAIT. This option must be set after accept() returns. - -DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the -partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums -always cover the entire packet and that only fully covered application data is -accepted by the receiver. Hence, when using this feature on the sender, it must -be enabled at the receiver, too with suitable choice of CsCov. - -DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the - range 0..15 are acceptable. The default setting is 0 (full coverage), - values between 1..15 indicate partial coverage. - -DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it - sets a threshold, where again values 0..15 are acceptable. The default - of 0 means that all packets with a partial coverage will be discarded. - Values in the range 1..15 indicate that packets with minimally such a - coverage value are also acceptable. The higher the number, the more - restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage - settings are inherited to the child socket after accept(). - -The following two options apply to CCID 3 exclusively and are getsockopt()-only. -In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned. - -DCCP_SOCKOPT_CCID_RX_INFO - Returns a ``struct tfrc_rx_info`` in optval; the buffer for optval and - optlen must be set to at least sizeof(struct tfrc_rx_info). - -DCCP_SOCKOPT_CCID_TX_INFO - Returns a ``struct tfrc_tx_info`` in optval; the buffer for optval and - optlen must be set to at least sizeof(struct tfrc_tx_info). - -On unidirectional connections it is useful to close the unused half-connection -via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs. - - -Sysctl variables -================ -Several DCCP default parameters can be managed by the following sysctls -(sysctl net.dccp.default or /proc/sys/net/dccp/default): - -request_retries - The number of active connection initiation retries (the number of - Requests minus one) before timing out. In addition, it also governs - the behaviour of the other, passive side: this variable also sets - the number of times DCCP repeats sending a Response when the initial - handshake does not progress from RESPOND to OPEN (i.e. when no Ack - is received after the initial Request). This value should be greater - than 0, suggested is less than 10. Analogue of tcp_syn_retries. - -retries1 - How often a DCCP Response is retransmitted until the listening DCCP - side considers its connecting peer dead. Analogue of tcp_retries1. - -retries2 - The number of times a general DCCP packet is retransmitted. This has - importance for retransmitted acknowledgments and feature negotiation, - data packets are never retransmitted. Analogue of tcp_retries2. - -tx_ccid = 2 - Default CCID for the sender-receiver half-connection. Depending on the - choice of CCID, the Send Ack Vector feature is enabled automatically. - -rx_ccid = 2 - Default CCID for the receiver-sender half-connection; see tx_ccid. - -seq_window = 100 - The initial sequence window (sec. 7.5.2) of the sender. This influences - the local ackno validity and the remote seqno validity windows (7.5.1). - Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set. - -tx_qlen = 5 - The size of the transmit buffer in packets. A value of 0 corresponds - to an unbounded transmit buffer. - -sync_ratelimit = 125 ms - The timeout between subsequent DCCP-Sync packets sent in response to - sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit - of this parameter is milliseconds; a value of 0 disables rate-limiting. - - -IOCTLS -====== -FIONREAD - Works as in udp(7): returns in the ``int`` argument pointer the size of - the next pending datagram in bytes, or 0 when no datagram is pending. - -SIOCOUTQ - Returns the number of unsent data bytes in the socket send queue as ``int`` - into the buffer specified by the argument pointer. - -Other tunables -============== -Per-route rto_min support - CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value - of the RTO timer. This setting can be modified via the 'rto_min' option - of iproute2; for example:: - - > ip route change 10.0.0.0/24 rto_min 250j dev wlan0 - > ip route add 10.0.0.254/32 rto_min 800j dev wlan0 - > ip route show dev wlan0 - - CCID-3 also supports the rto_min setting: it is used to define the lower - bound for the expiry of the nofeedback timer. This can be useful on LANs - with very low RTTs (e.g., loopback, Gbit ethernet). - - -Notes -===== -DCCP does not travel through NAT successfully at present on many boxes. This is -because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT -support for DCCP has been added. diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 05d822b904b4..f9ed93c1da35 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -55,6 +55,7 @@ Contents: ti/cpsw_switchdev ti/am65_nuss_cpsw_switchdev ti/tlan + ti/icssg_prueth wangxun/txgbe wangxun/ngbe diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index 04e0595bb0a7..f8592dec8851 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -28,9 +28,60 @@ devlink dev info provides version information for all three components. In addition to the version the hg commit hash of the build is included as a separate entry. +Upgrading Firmware +------------------ + +fbnic supports updating firmware using signed PLDM images with devlink dev +flash. PLDM images are written into the flash. Flashing does not interrupt +the operation of the device. + +On host boot the latest UEFI driver is always used, no explicit activation +is required. Firmware activation is required to run new control firmware. cmrt +firmware can only be activated by power cycling the NIC. + Statistics ---------- +TX MAC Interface +~~~~~~~~~~~~~~~~ + + - ``ptp_illegal_req``: packets sent to the NIC with PTP request bit set but routed to BMC/FW + - ``ptp_good_ts``: packets successfully routed to MAC with PTP request bit set + - ``ptp_bad_ts``: packets destined for MAC with PTP request bit set but aborted because of some error (e.g., DMA read error) + +TX Extension (TEI) Interface (TTI) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + - ``tti_cm_drop``: control messages dropped at the TX Extension (TEI) Interface because of credit starvation + - ``tti_frame_drop``: packets dropped at the TX Extension (TEI) Interface because of credit starvation + - ``tti_tbi_drop``: packets dropped at the TX BMC Interface (TBI) because of credit starvation + +RXB (RX Buffer) Enqueue +~~~~~~~~~~~~~~~~~~~~~~~ + + - ``rxb_integrity_err[i]``: frames enqueued with integrity errors (e.g., multi-bit ECC errors) on RXB input i + - ``rxb_mac_err[i]``: frames enqueued with MAC end-of-frame errors (e.g., bad FCS) on RXB input i + - ``rxb_parser_err[i]``: frames experienced RPC parser errors + - ``rxb_frm_err[i]``: frames experienced signaling errors (e.g., missing end-of-packet/start-of-packet) on RXB input i + - ``rxb_drbo[i]_frames``: frames received at RXB input i + - ``rxb_drbo[i]_bytes``: bytes received at RXB input i + +RXB (RX Buffer) FIFO +~~~~~~~~~~~~~~~~~~~~ + + - ``rxb_fifo[i]_drop``: transitions into the drop state on RXB pool i + - ``rxb_fifo[i]_dropped_frames``: frames dropped on RXB pool i + - ``rxb_fifo[i]_ecn``: transitions into the ECN mark state on RXB pool i + - ``rxb_fifo[i]_level``: current occupancy of RXB pool i + +RXB (RX Buffer) Dequeue +~~~~~~~~~~~~~~~~~~~~~~~ + + - ``rxb_intf[i]_frames``: frames sent to the output i + - ``rxb_intf[i]_bytes``: bytes sent to the output i + - ``rxb_pbuf[i]_frames``: frames sent to output i from the perspective of internal packet buffer + - ``rxb_pbuf[i]_bytes``: bytes sent to output i from the perspective of internal packet buffer + RPC (Rx parser) ~~~~~~~~~~~~~~~ @@ -44,6 +95,15 @@ RPC (Rx parser) - ``rpc_out_of_hdr_err``: frames where header was larger than parsable region - ``ovr_size_err``: oversized frames +Hardware Queues +~~~~~~~~~~~~~~~ + +1. RX DMA Engine: + + - ``rde_[i]_pkt_err``: packets with MAC EOP, RPC parser, RXB truncation, or RDE frame truncation errors. These error are flagged in the packet metadata because of cut-through support but the actual drop happens once PCIE/RDE is reached. + - ``rde_[i]_pkt_cq_drop``: packets dropped because RCQ is full + - ``rde_[i]_pkt_bdq_drop``: packets dropped because HPQ or PPQ ran out of host buffer + PCIe ~~~~ diff --git a/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst b/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst new file mode 100644 index 000000000000..da21ddf431bb --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst @@ -0,0 +1,56 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Texas Instruments ICSSG PRUETH ethernet driver +============================================== + +:Version: 1.0 + +ICSSG Firmware +============== + +Every ICSSG core has two Programmable Real-Time Unit(PRUs), two auxiliary +Real-Time Transfer Unit (RTUs), and two Transmit Real-Time Transfer Units +(TX_PRUs). Each one of these runs its own firmware. The firmwares combnined are +referred as ICSSG Firmware. + +Firmware Statistics +=================== + +The ICSSG firmware maintains certain statistics which are dumped by the driver +via ``ethtool -S <interface>`` + +These statistics are as follows, + + - ``FW_RTU_PKT_DROP``: Diagnostic error counter which increments when RTU drops a locally injected packet due to port being disabled or rule violation. + - ``FW_Q0_OVERFLOW``: TX overflow counter for queue0 + - ``FW_Q1_OVERFLOW``: TX overflow counter for queue1 + - ``FW_Q2_OVERFLOW``: TX overflow counter for queue2 + - ``FW_Q3_OVERFLOW``: TX overflow counter for queue3 + - ``FW_Q4_OVERFLOW``: TX overflow counter for queue4 + - ``FW_Q5_OVERFLOW``: TX overflow counter for queue5 + - ``FW_Q6_OVERFLOW``: TX overflow counter for queue6 + - ``FW_Q7_OVERFLOW``: TX overflow counter for queue7 + - ``FW_DROPPED_PKT``: This counter is incremented when a packet is dropped at PRU because of rule violation. + - ``FW_RX_ERROR``: Incremented if there was a CRC error or Min/Max frame error at PRU + - ``FW_RX_DS_INVALID``: Incremented when RTU detects Data Status invalid condition + - ``FW_TX_DROPPED_PACKET``: Counter for packets dropped via TX Port + - ``FW_TX_TS_DROPPED_PACKET``: Counter for packets with TS flag dropped via TX Port + - ``FW_INF_PORT_DISABLED``: Incremented when RX frame is dropped due to port being disabled + - ``FW_INF_SAV``: Incremented when RX frame is dropped due to Source Address violation + - ``FW_INF_SA_DL``: Incremented when RX frame is dropped due to Source Address being in the denylist + - ``FW_INF_PORT_BLOCKED``: Incremented when RX frame is dropped due to port being blocked and frame being a special frame + - ``FW_INF_DROP_TAGGED`` : Incremented when RX frame is dropped for being tagged + - ``FW_INF_DROP_PRIOTAGGED``: Incremented when RX frame is dropped for being priority tagged + - ``FW_INF_DROP_NOTAG``: Incremented when RX frame is dropped for being untagged + - ``FW_INF_DROP_NOTMEMBER``: Incremented when RX frame is dropped for port not being member of VLAN + - ``FW_RX_EOF_SHORT_FRMERR``: Incremented if End Of Frame (EOF) task is scheduled without seeing RX_B1 + - ``FW_RX_B0_DROP_EARLY_EOF``: Incremented when frame is dropped due to Early EOF + - ``FW_TX_JUMBO_FRM_CUTOFF``: Incremented when frame is cut off to prevent packet size > 2000 Bytes + - ``FW_RX_EXP_FRAG_Q_DROP``: Incremented when express frame is received in the same queue as the previous fragment + - ``FW_RX_FIFO_OVERRUN``: RX fifo overrun counter + - ``FW_CUT_THR_PKT``: Incremented when a packet is forwarded using Cut-Through forwarding method + - ``FW_HOST_RX_PKT_CNT``: Number of valid packets sent by Rx PRU to Host on PSI + - ``FW_HOST_TX_PKT_CNT``: Number of valid packets copied by RTU0 to Tx queues + - ``FW_HOST_EGRESS_Q_PRE_OVERFLOW``: Host Egress Q (Pre-emptible) Overflow Counter + - ``FW_HOST_EGRESS_Q_EXP_OVERFLOW``: Host Egress Q (Pre-emptible) Overflow Counter diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst index 23073bc219d8..dd6adc4d0559 100644 --- a/Documentation/networking/devlink/devlink-info.rst +++ b/Documentation/networking/devlink/devlink-info.rst @@ -86,6 +86,10 @@ In case software/firmware components are loaded from the disk (e.g. ``/lib/firmware``) only the running version should be reported via the kernel API. +Please note that any security versions reported via devlink are purely +informational. Devlink does not use a secure channel to communicate with +the device. + Generic Versions ================ diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 2c14dfe69b3a..5885e21e2212 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -451,7 +451,7 @@ be added to the following table: * - ``udp_parsing`` - ``drop`` - Traps packets dropped due to an error in the UDP header parsing. - This packet trap could include checksum errorrs, an improper UDP + This packet trap could include checksum errors, an improper UDP length detected (smaller than 8 bytes) or detection of header truncation. * - ``tcp_parsing`` diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 948c8c44e233..8319f43b5933 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -84,6 +84,7 @@ parameters, info versions, and other features it supports. i40e ionic ice + ixgbe mlx4 mlx5 mlxsw diff --git a/Documentation/networking/devlink/ixgbe.rst b/Documentation/networking/devlink/ixgbe.rst new file mode 100644 index 000000000000..c27d1436c70e --- /dev/null +++ b/Documentation/networking/devlink/ixgbe.rst @@ -0,0 +1,171 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +ixgbe devlink support +===================== + +This document describes the devlink features implemented by the ``ixgbe`` +device driver. + +Info versions +============= + +Any of the versions dealing with the security presented by ``devlink-info`` +is purely informational. Devlink does not use a secure channel to communicate +with the device. + +The ``ixgbe`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 5 5 5 90 + + * - Name + - Type + - Example + - Description + * - ``board.id`` + - fixed + - H49289-000 + - The Product Board Assembly (PBA) identifier of the board. + * - ``fw.undi`` + - running + - 1.1937.0 + - Version of the Option ROM containing the UEFI driver. The version is + reported in ``major.minor.patch`` format. The major version is + incremented whenever a major breaking change occurs, or when the + minor version would overflow. The minor version is incremented for + non-breaking changes and reset to 1 when the major version is + incremented. The patch version is normally 0 but is incremented when + a fix is delivered as a patch against an older base Option ROM. + * - ``fw.undi.srev`` + - running + - 4 + - Number indicating the security revision of the Option ROM. + * - ``fw.bundle_id`` + - running + - 0x80000d0d + - Unique identifier of the firmware image file that was loaded onto + the device. Also referred to as the EETRACK identifier of the NVM. + * - ``fw.mgmt.api`` + - running + - 1.5.1 + - 3-digit version number (major.minor.patch) of the API exported over + the AdminQ by the management firmware. Used by the driver to + identify what commands are supported. Historical versions of the + kernel only displayed a 2-digit version number (major.minor). + * - ``fw.mgmt.build`` + - running + - 0x305d955f + - Unique identifier of the source for the management firmware. + * - ``fw.mgmt.srev`` + - running + - 3 + - Number indicating the security revision of the firmware. + * - ``fw.psid.api`` + - running + - 0.80 + - Version defining the format of the flash contents. + * - ``fw.netlist`` + - running + - 1.1.2000-6.7.0 + - The version of the netlist module. This module defines the device's + Ethernet capabilities and default settings, and is used by the + management firmware as part of managing link and device + connectivity. + * - ``fw.netlist.build`` + - running + - 0xee16ced7 + - The first 4 bytes of the hash of the netlist module contents. + +Flash Update +============ + +The ``ixgbe`` driver implements support for flash update using the +``devlink-flash`` interface. It supports updating the device flash using a +combined flash image that contains the ``fw.mgmt``, ``fw.undi``, and +``fw.netlist`` components. + +.. list-table:: List of supported overwrite modes + :widths: 5 95 + + * - Bits + - Behavior + * - ``DEVLINK_FLASH_OVERWRITE_SETTINGS`` + - Do not preserve settings stored in the flash components being + updated. This includes overwriting the port configuration that + determines the number of physical functions the device will + initialize with. + * - ``DEVLINK_FLASH_OVERWRITE_SETTINGS`` and ``DEVLINK_FLASH_OVERWRITE_IDENTIFIERS`` + - Do not preserve either settings or identifiers. Overwrite everything + in the flash with the contents from the provided image, without + performing any preservation. This includes overwriting device + identifying fields such as the MAC address, Vital product Data (VPD) area, + and device serial number. It is expected that this combination be used with an + image customized for the specific device. + +Reload +====== + +The ``ixgbe`` driver supports activating new firmware after a flash update +using ``DEVLINK_CMD_RELOAD`` with the ``DEVLINK_RELOAD_ACTION_FW_ACTIVATE`` +action. + +.. code:: shell + + $ devlink dev reload pci/0000:01:00.0 reload action fw_activate + +The new firmware is activated by issuing a device specific Embedded +Management Processor reset which requests the device to reset and reload the +EMP firmware image. + +The driver does not currently support reloading the driver via +``DEVLINK_RELOAD_ACTION_DRIVER_REINIT``. + +Regions +======= + +The ``ixgbe`` driver implements the following regions for accessing internal +device data. + +.. list-table:: regions implemented + :widths: 15 85 + + * - Name + - Description + * - ``nvm-flash`` + - The contents of the entire flash chip, sometimes referred to as + the device's Non Volatile Memory. + * - ``shadow-ram`` + - The contents of the Shadow RAM, which is loaded from the beginning + of the flash. Although the contents are primarily from the flash, + this area also contains data generated during device boot which is + not stored in flash. + * - ``device-caps`` + - The contents of the device firmware's capabilities buffer. Useful to + determine the current state and configuration of the device. + +Both the ``nvm-flash`` and ``shadow-ram`` regions can be accessed without a +snapshot. The ``device-caps`` region requires a snapshot as the contents are +sent by firmware and can't be split into separate reads. + +Users can request an immediate capture of a snapshot for all three regions +via the ``DEVLINK_CMD_REGION_NEW`` command. + +.. code:: shell + + $ devlink region show + pci/0000:01:00.0/nvm-flash: size 10485760 snapshot [] max 1 + pci/0000:01:00.0/device-caps: size 4096 snapshot [] max 10 + + $ devlink region new pci/0000:01:00.0/nvm-flash snapshot 1 + + $ devlink region dump pci/0000:01:00.0/nvm-flash snapshot 1 + 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 + 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8 + 0000000000000020 0016 0bb8 0016 1720 0000 0000 c00f 3ffc + 0000000000000030 bada cce5 bada cce5 bada cce5 bada cce5 + + $ devlink region read pci/0000:01:00.0/nvm-flash snapshot 1 address 0 length 16 + 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 + + $ devlink region delete pci/0000:01:00.0/device-caps snapshot 1 diff --git a/Documentation/networking/devmem.rst b/Documentation/networking/devmem.rst index eb678ca45496..a6cd7236bfbd 100644 --- a/Documentation/networking/devmem.rst +++ b/Documentation/networking/devmem.rst @@ -62,15 +62,15 @@ More Info https://lore.kernel.org/netdev/20240831004313.3713467-1-almasrymina@google.com/ -Interface -========= +RX Interface +============ Example ------- -tools/testing/selftests/net/ncdevmem.c:do_server shows an example of setting up -the RX path of this API. +./tools/testing/selftests/drivers/net/hw/ncdevmem:do_server shows an example of +setting up the RX path of this API. NIC Setup @@ -235,6 +235,148 @@ can be less than the tokens provided by the user in case of: (a) an internal kernel leak bug. (b) the user passed more than 1024 frags. +TX Interface +============ + + +Example +------- + +./tools/testing/selftests/drivers/net/hw/ncdevmem:do_client shows an example of +setting up the TX path of this API. + + +NIC Setup +--------- + +The user must bind a TX dmabuf to a given NIC using the netlink API:: + + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + + tx_dmabuf_id = rsp->id; + + +The netlink API returns a dmabuf_id: a unique ID that refers to this dmabuf +that has been bound. + +The user can unbind the dmabuf from the netdevice by closing the netlink socket +that established the binding. We do this so that the binding is automatically +unbound even if the userspace process crashes. + +Note that any reasonably well-behaved dmabuf from any exporter should work with +devmem TCP, even if the dmabuf is not actually backed by devmem. An example of +this is udmabuf, which wraps user memory (non-devmem) in a dmabuf. + +Socket Setup +------------ + +The user application must use MSG_ZEROCOPY flag when sending devmem TCP. Devmem +cannot be copied by the kernel, so the semantics of the devmem TX are similar +to the semantics of MSG_ZEROCOPY:: + + setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + +It is also recommended that the user binds the TX socket to the same interface +the dma-buf has been bound to via SO_BINDTODEVICE:: + + setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); + + +Sending data +------------ + +Devmem data is sent using the SCM_DEVMEM_DMABUF cmsg. + +The user should create a msghdr where, + +* iov_base is set to the offset into the dmabuf to start sending from +* iov_len is set to the number of bytes to be sent from the dmabuf + +The user passes the dma-buf id to send from via the dmabuf_tx_cmsg.dmabuf_id. + +The example below sends 1024 bytes from offset 100 into the dmabuf, and 2048 +from offset 2000 into the dmabuf. The dmabuf to send from is tx_dmabuf_id:: + + char ctrl_data[CMSG_SPACE(sizeof(struct dmabuf_tx_cmsg))]; + struct dmabuf_tx_cmsg ddmabuf; + struct msghdr msg = {}; + struct cmsghdr *cmsg; + struct iovec iov[2]; + + iov[0].iov_base = (void*)100; + iov[0].iov_len = 1024; + iov[1].iov_base = (void*)2000; + iov[1].iov_len = 2048; + + msg.msg_iov = iov; + msg.msg_iovlen = 2; + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct dmabuf_tx_cmsg)); + + ddmabuf.dmabuf_id = tx_dmabuf_id; + + *((struct dmabuf_tx_cmsg *)CMSG_DATA(cmsg)) = ddmabuf; + + sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + + +Reusing TX dmabufs +------------------ + +Similar to MSG_ZEROCOPY with regular memory, the user should not modify the +contents of the dma-buf while a send operation is in progress. This is because +the kernel does not keep a copy of the dmabuf contents. Instead, the kernel +will pin and send data from the buffer available to the userspace. + +Just as in MSG_ZEROCOPY, the kernel notifies the userspace of send completions +using MSG_ERRQUEUE:: + + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + int retries = 10; + __u32 hi, lo; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + if (!do_poll(fd)) continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + serr = (void *)CMSG_DATA(cm); + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stdout, "tx complete [%d,%d]\n", lo, hi); + } + } + +After the associated sendmsg has been completed, the dmabuf can be reused by +the userspace. + + Implementation & Caveats ======================== diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index c64133d309bf..ac90b82f3ce9 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -48,7 +48,6 @@ Contents: ax25 bonding cdc_mbim - dccp dctcp devmem dns_resolver diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 5c63ab928b97..b43222ee57cf 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -37,8 +37,8 @@ ip_no_pmtu_disc - INTEGER Mode 3 is a hardened pmtu discover mode. The kernel will only accept fragmentation-needed errors if the underlying protocol can verify them besides a plain socket lookup. Current - protocols for which pmtu events will be honored are TCP, SCTP - and DCCP as they verify e.g. the sequence number or the + protocols for which pmtu events will be honored are TCP and + SCTP as they verify e.g. the sequence number or the association. This mode should not be enabled globally but is only intended to secure e.g. name servers in namespaces where TCP path mtu must still work but path MTU information of other diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 6327e689e8a8..c69cc89c958e 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -10,6 +10,7 @@ Type Name fastpath_tx_acce =================================== =========================== =================== =================== =================================================================================== unsigned_long:32 priv_flags read_mostly __dev_queue_xmit(tx) unsigned_long:1 lltx read_mostly HARD_TX_LOCK,HARD_TX_TRYLOCK,HARD_TX_UNLOCK(tx) +unsigned long:1 netmem_tx:1; read_mostly char name[16] struct netdev_name_node* name_node struct dev_ifalias* ifalias @@ -131,7 +132,7 @@ struct ref_tracker_dir refcnt_tracker struct list_head link_watch_list enum:8 reg_state bool dismantle -enum:16 rtnl_link_state +bool rtnl_link_initilizing bool needs_free_netdev void*priv_destructor struct net_device struct netpoll_info* npinfo read_mostly napi_poll/napi_poll_lock diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst index bc96efc92cf5..bd44b3eebbef 100644 --- a/Documentation/networking/net_cachelines/snmp.rst +++ b/Documentation/networking/net_cachelines/snmp.rst @@ -37,6 +37,8 @@ unsigned_long LINUX_MIB_TIMEWAITKILLED unsigned_long LINUX_MIB_PAWSACTIVEREJECTED unsigned_long LINUX_MIB_PAWSESTABREJECTED unsigned_long LINUX_MIB_TSECR_REJECTED +unsigned_long LINUX_MIB_PAWS_OLD_ACK +unsigned_long LINUX_MIB_PAWS_TW_REJECTED unsigned_long LINUX_MIB_DELAYEDACKLOST unsigned_long LINUX_MIB_LISTENOVERFLOWS unsigned_long LINUX_MIB_LISTENDROPS diff --git a/Documentation/networking/netdev-features.rst b/Documentation/networking/netdev-features.rst index 5014f7cc1398..02bd7536fc0c 100644 --- a/Documentation/networking/netdev-features.rst +++ b/Documentation/networking/netdev-features.rst @@ -188,3 +188,8 @@ Redundancy) frames from one port to another in hardware. This should be set for devices which duplicate outgoing HSR (High-availability Seamless Redundancy) or PRP (Parallel Redundancy Protocol) tags automatically frames in hardware. + +* netmem-tx + +This should be set for devices which support netmem TX. See +Documentation/networking/netmem.rst diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index eab601ab2db0..7ebb6c36482d 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -8,7 +8,7 @@ Network Devices, the Kernel, and You! Introduction ============ The following is a random collection of documentation regarding -network devices. +network devices. It is intended for driver developers. struct net_device lifetime rules ================================ @@ -314,13 +314,8 @@ napi->poll: softirq will be called with interrupts disabled by netconsole. -struct netdev_queue_mgmt_ops synchronization rules -================================================== - -All queue management ndo callbacks are holding netdev instance lock. - -RTNL and netdev instance lock -============================= +netdev instance lock +==================== Historically, all networking control operations were protected by a single global lock known as ``rtnl_lock``. There is an ongoing effort to replace this @@ -328,10 +323,13 @@ global lock with separate locks for each network namespace. Additionally, properties of individual netdev are increasingly protected by per-netdev locks. For device drivers that implement shaping or queue management APIs, all control -operations will be performed under the netdev instance lock. Currently, this -instance lock is acquired within the context of ``rtnl_lock``. The drivers -can also explicitly request instance lock to be acquired via -``request_ops_lock``. In the future, there will be an option for individual +operations will be performed under the netdev instance lock. +Drivers can also explicitly request instance lock to be held during ops +by setting ``request_ops_lock`` to true. Code comments and docs refer +to drivers which have ops called under the instance lock as "ops locked". +See also the documentation of the ``lock`` member of struct net_device. + +In the future, there will be an option for individual drivers to opt out of using ``rtnl_lock`` and instead perform their control operations directly under the netdev instance lock. @@ -344,18 +342,59 @@ functions handle acquiring the instance lock themselves, while the ``netif_xxx`` functions assume that the driver has already acquired the instance lock. +struct net_device_ops +--------------------- + +``ndos`` are called without holding the instance lock for most drivers. + +"Ops locked" drivers will have most of the ``ndos`` invoked under +the instance lock. + +struct ethtool_ops +------------------ + +Similarly to ``ndos`` the instance lock is only held for select drivers. +For "ops locked" drivers all ethtool ops without exceptions should +be called under the instance lock. + +struct netdev_stat_ops +---------------------- + +"qstat" ops are invoked under the instance lock for "ops locked" drivers, +and under rtnl_lock for all other drivers. + +struct net_shaper_ops +--------------------- + +All net shaper callbacks are invoked while holding the netdev instance +lock. ``rtnl_lock`` may or may not be held. + +Note that supporting net shapers automatically enables "ops locking". + +struct netdev_queue_mgmt_ops +---------------------------- + +All queue management callbacks are invoked while holding the netdev instance +lock. ``rtnl_lock`` may or may not be held. + +Note that supporting struct netdev_queue_mgmt_ops automatically enables +"ops locking". + Notifiers and netdev instance lock -================================== +---------------------------------- For device drivers that implement shaping or queue management APIs, some of the notifiers (``enum netdev_cmd``) are running under the netdev instance lock. +The following netdev notifiers are always run under the instance lock: +* ``NETDEV_XDP_FEAT_CHANGE`` + For devices with locked ops, currently only the following notifiers are running under the lock: +* ``NETDEV_CHANGE`` * ``NETDEV_REGISTER`` * ``NETDEV_UP`` -* ``NETDEV_CHANGE`` The following notifiers are running without the lock: * ``NETDEV_UNREGISTER`` diff --git a/Documentation/networking/netmem.rst b/Documentation/networking/netmem.rst index 7de21ddb5412..b63aded46337 100644 --- a/Documentation/networking/netmem.rst +++ b/Documentation/networking/netmem.rst @@ -19,8 +19,8 @@ Benefits of Netmem : * Simplified Development: Drivers interact with a consistent API, regardless of the underlying memory implementation. -Driver Requirements -=================== +Driver RX Requirements +====================== 1. The driver must support page_pool. @@ -77,3 +77,22 @@ Driver Requirements that purpose, but be mindful that some netmem types might have longer circulation times, such as when userspace holds a reference in zerocopy scenarios. + +Driver TX Requirements +====================== + +1. The Driver must not pass the netmem dma_addr to any of the dma-mapping APIs + directly. This is because netmem dma_addrs may come from a source like + dma-buf that is not compatible with the dma-mapping APIs. + + Helpers like netmem_dma_unmap_page_attrs() & netmem_dma_unmap_addr_set() + should be used in lieu of dma_unmap_page[_attrs](), dma_unmap_addr_set(). + The netmem variants will handle netmem dma_addrs correctly regardless of the + source, delegating to the dma-mapping APIs when appropriate. + + Not all dma-mapping APIs have netmem equivalents at the moment. If your + driver relies on a missing netmem API, feel free to add and propose to + netdev@, or reach out to the maintainers and/or almasrymina@google.com for + help adding the netmem API. + +2. Driver should declare support by setting `netdev->netmem_tx = true` diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index e807e18ba32a..d63e3e27dd06 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -1062,30 +1062,6 @@ The kernel interface functions are as follows: first function to change. Note that this must be called in TASK_RUNNING state. - (#) Get remote client epoch:: - - u32 rxrpc_kernel_get_epoch(struct socket *sock, - struct rxrpc_call *call) - - This allows the epoch that's contained in packets of an incoming client - call to be queried. This value is returned. The function always - successful if the call is still in progress. It shouldn't be called once - the call has expired. Note that calling this on a local client call only - returns the local epoch. - - This value can be used to determine if the remote client has been - restarted as it shouldn't change otherwise. - - (#) Set the maximum lifespan on a call:: - - void rxrpc_kernel_set_max_life(struct socket *sock, - struct rxrpc_call *call, - unsigned long hard_timeout) - - This sets the maximum lifespan on a call to hard_timeout (which is in - jiffies). In the event of the timeout occurring, the call will be - aborted and -ETIME or -ETIMEDOUT will be returned. - (#) Apply the RXRPC_MIN_SECURITY_LEVEL sockopt to a socket from within in the kernel:: @@ -1172,3 +1148,18 @@ adjusted through sysctls in /proc/net/rxrpc/: header plus exactly 1412 bytes of data. The terminal packet must contain a four byte header plus any amount of data. In any event, a jumbo packet may not exceed rxrpc_rx_mtu in size. + + +API Function Reference +====================== + +.. kernel-doc:: net/rxrpc/af_rxrpc.c +.. kernel-doc:: net/rxrpc/call_object.c +.. kernel-doc:: net/rxrpc/key.c +.. kernel-doc:: net/rxrpc/oob.c +.. kernel-doc:: net/rxrpc/peer_object.c +.. kernel-doc:: net/rxrpc/recvmsg.c +.. kernel-doc:: net/rxrpc/rxgk.c +.. kernel-doc:: net/rxrpc/rxkad.c +.. kernel-doc:: net/rxrpc/sendmsg.c +.. kernel-doc:: net/rxrpc/server_key.c diff --git a/Documentation/networking/tproxy.rst b/Documentation/networking/tproxy.rst index 7f7c1ff6f159..75e4990cc3db 100644 --- a/Documentation/networking/tproxy.rst +++ b/Documentation/networking/tproxy.rst @@ -69,9 +69,9 @@ add rules like this to the iptables ruleset above:: # iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \ --tproxy-mark 0x1/0x1 --on-port 50080 -Or the following rule to nft: +Or the following rule to nft:: -# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept + # nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept Note that for this to work you'll have to modify the proxy to enable (SOL_IP, IP_TRANSPARENT) for the listening socket. diff --git a/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst b/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst index c3f6a83294dc..4b3432753eb9 100644 --- a/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst +++ b/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst @@ -188,7 +188,7 @@ objdump 编行。如果没有调试符号,您将看到所示例程的汇编程序代码,但是如果内核有调试 符号,C代码也将可见(调试符号可以在内核配置菜单的hacking项中启用)。例如:: - $ objdump -r -S -l --disassemble net/dccp/ipv4.o + $ objdump -r -S -l --disassemble net/ipv4/tcp.o .. note:: diff --git a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst index b25ecc44d735..80ea5677ee52 100644 --- a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst +++ b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst @@ -191,7 +191,7 @@ objdump 編行。如果沒有調試符號,您將看到所示例程的彙編程序代碼,但是如果內核有調試 符號,C代碼也將可見(調試符號可以在內核配置菜單的hacking項中啓用)。例如:: - $ objdump -r -S -l --disassemble net/dccp/ipv4.o + $ objdump -r -S -l --disassemble net/ipv4/tcp.o .. note:: diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst index 1990eea772d0..31fc91020eb3 100644 --- a/Documentation/userspace-api/netlink/netlink-raw.rst +++ b/Documentation/userspace-api/netlink/netlink-raw.rst @@ -62,7 +62,7 @@ Sub-messages ------------ Several raw netlink families such as -:doc:`rt_link<../../networking/netlink_spec/rt_link>` and +:doc:`rt-link<../../networking/netlink_spec/rt-link>` and :doc:`tc<../../networking/netlink_spec/tc>` use attribute nesting as an abstraction to carry module specific information. diff --git a/MAINTAINERS b/MAINTAINERS index 3563492e4eba..84e99e991f53 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6555,15 +6555,6 @@ S: Maintained F: Documentation/scsi/dc395x.rst F: drivers/scsi/dc395x.* -DCCP PROTOCOL -L: dccp@vger.kernel.org -S: Orphan -W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp -F: include/linux/dccp.h -F: include/linux/tfrc.h -F: include/uapi/linux/dccp.h -F: net/dccp/ - DEBUGOBJECTS: M: Thomas Gleixner <tglx@linutronix.de> L: linux-kernel@vger.kernel.org @@ -9387,6 +9378,7 @@ F: Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml F: drivers/net/ethernet/freescale/enetc/ F: include/linux/fsl/enetc_mdio.h F: include/linux/fsl/netc_global.h +F: include/linux/fsl/ntmp.h FREESCALE eTSEC ETHERNET DRIVER (GIANFAR) M: Claudiu Manoil <claudiu.manoil@nxp.com> @@ -11909,7 +11901,7 @@ F: Documentation/networking/device_drivers/ethernet/intel/ F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h -F: include/linux/net/intel/iidc.h +F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA M: Mustafa Ismail <mustafa.ismail@intel.com> @@ -18262,6 +18254,17 @@ F: arch/openrisc/ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* +OPENVPN DATA CHANNEL OFFLOAD +M: Antonio Quartulli <antonio@openvpn.net> +L: openvpn-devel@lists.sourceforge.net (subscribers-only) +L: netdev@vger.kernel.org +S: Supported +T: git https://github.com/OpenVPN/linux-kernel-ovpn.git +F: Documentation/netlink/specs/ovpn.yaml +F: drivers/net/ovpn/ +F: include/uapi/linux/ovpn.h +F: tools/testing/selftests/net/ovpn/ + OPENVSWITCH M: Aaron Conole <aconole@redhat.com> M: Eelco Chaudron <echaudro@redhat.com> @@ -20756,6 +20759,14 @@ S: Maintained F: Documentation/devicetree/bindings/usb/renesas,rzn1-usbf.yaml F: drivers/usb/gadget/udc/renesas_usbf.c +RENESAS RZ/V2H(P) DWMAC GBETH GLUE LAYER DRIVER +M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> +L: netdev@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c + RENESAS RZ/V2M I2C DRIVER M: Fabrizio Castro <fabrizio.castro.jz@renesas.com> L: linux-i2c@vger.kernel.org diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 31ecb8b7b9f1..85bd696b0e90 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -267,8 +267,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 1f57514624d5..93003ceb51bb 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -263,8 +263,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 02db7a48e57e..fa6ed1bb01e3 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -270,8 +270,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index f0e673cb17eb..80ca001d5eab 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -260,8 +260,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e8ca5a50b86d..5e35e4b1fe16 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -262,8 +262,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index b3a270441bb1..11bc3f3c7576 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -261,8 +261,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index d215dba006ce..343f702210df 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -281,8 +281,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index a888ed93ff82..6104a5a24594 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -259,8 +259,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index b481782375f6..a99dd6c7c9c8 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -260,8 +260,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 6eba743d8eb5..0d87c119d4c4 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -261,8 +261,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 9bdbb418ffa8..841f9615b6ab 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -256,8 +256,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index e1cf20fa5343..12c5dc92ea4c 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -257,8 +257,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -# CONFIG_IP_DCCP_CCID3 is not set CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_RDS=m CONFIG_RDS_TCP=m diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 8f7c36868204..97d2cd997285 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -81,7 +81,6 @@ CONFIG_IP_VS_SH=m CONFIG_IP_VS_SED=m CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m -CONFIG_IP_DCCP=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 12f3eed8a946..df55fe6df8ba 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -84,7 +84,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m -CONFIG_IP_DCCP=m CONFIG_IP_SCTP=m CONFIG_TIPC=m CONFIG_ATM=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 06b7a0b97eca..487bd84043f7 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -130,7 +130,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m -CONFIG_IP_DCCP=m CONFIG_IP_SCTP=m CONFIG_TIPC=m CONFIG_ATM=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 1bc3466bc909..ae45f70b29f0 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -87,7 +87,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP_DCCP=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index a91a766b71a4..5e0f2cd7e62b 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -225,7 +225,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m CONFIG_TIPC=m CONFIG_ATM=m CONFIG_ATM_CLIP=m diff --git a/crypto/krb5/rfc3961_simplified.c b/crypto/krb5/rfc3961_simplified.c index 79180d28baa9..e49cbdec7c40 100644 --- a/crypto/krb5/rfc3961_simplified.c +++ b/crypto/krb5/rfc3961_simplified.c @@ -89,6 +89,7 @@ int crypto_shash_update_sg(struct shash_desc *desc, struct scatterlist *sg, sg_miter_start(&miter, sg, sg_nents(sg), SG_MITER_FROM_SG | SG_MITER_LOCAL); + sg_miter_skip(&miter, offset); for (i = 0; i < len; i += n) { sg_miter_next(&miter); n = min(miter.length, len - i); diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 5f90bac6bb09..f021e27644e0 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -26,12 +26,14 @@ static int bcma_gpio_get_value(struct gpio_chip *chip, unsigned gpio) return !!bcma_chipco_gpio_in(cc, 1 << gpio); } -static void bcma_gpio_set_value(struct gpio_chip *chip, unsigned gpio, - int value) +static int bcma_gpio_set_value(struct gpio_chip *chip, unsigned int gpio, + int value) { struct bcma_drv_cc *cc = gpiochip_get_data(chip); bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0); + + return 0; } static int bcma_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) @@ -184,7 +186,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->request = bcma_gpio_request; chip->free = bcma_gpio_free; chip->get = bcma_gpio_get_value; - chip->set = bcma_gpio_set_value; + chip->set_rv = bcma_gpio_set_value; chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; chip->parent = bus->dev; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1ee8969595d3..abb532bc8ce4 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "main.h" -#include "../../../net/ethernet/intel/ice/ice.h" MODULE_ALIAS("i40iw"); -MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA"); MODULE_LICENSE("Dual BSD/GPL"); @@ -61,7 +59,7 @@ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) } static void irdma_fill_qos_info(struct irdma_l2params *l2params, - struct iidc_qos_params *qos_info) + struct iidc_rdma_qos_params *qos_info) { int i; @@ -85,12 +83,13 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params, } } -static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event) +static void irdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info, + struct iidc_rdma_event *event) { - struct irdma_device *iwdev = dev_get_drvdata(&pf->adev->dev); + struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev); struct irdma_l2params l2params = {}; - if (*event->type & BIT(IIDC_EVENT_AFTER_MTU_CHANGE)) { + if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); if (iwdev->vsi.mtu != iwdev->netdev->mtu) { l2params.mtu = iwdev->netdev->mtu; @@ -98,25 +97,26 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); irdma_change_l2params(&iwdev->vsi, &l2params); } - } else if (*event->type & BIT(IIDC_EVENT_BEFORE_TC_CHANGE)) { + } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) { if (iwdev->vsi.tc_change_pending) return; irdma_prep_tc_change(iwdev); - } else if (*event->type & BIT(IIDC_EVENT_AFTER_TC_CHANGE)) { - struct iidc_qos_params qos_info = {}; + } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) { + struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv; if (!iwdev->vsi.tc_change_pending) return; l2params.tc_changed = true; ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); - ice_get_qos_params(pf, &qos_info); - irdma_fill_qos_info(&l2params, &qos_info); + + irdma_fill_qos_info(&l2params, &iidc_priv->qos_info); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) - iwdev->dcb_vlan_mode = qos_info.num_tc > 1 && !l2params.dscp_mode; + iwdev->dcb_vlan_mode = + l2params.num_tc > 1 && !l2params.dscp_mode; irdma_change_l2params(&iwdev->vsi, &l2params); - } else if (*event->type & BIT(IIDC_EVENT_CRIT_ERR)) { + } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) { ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", event->reg); if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) { @@ -151,10 +151,8 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event */ static void irdma_request_reset(struct irdma_pci_f *rf) { - struct ice_pf *pf = rf->cdev; - ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n"); - ice_rdma_request_reset(pf, IIDC_PFR); + ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET); } /** @@ -166,14 +164,15 @@ static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; - struct ice_pf *pf = iwdev->rf->cdev; + struct iidc_rdma_core_dev_info *cdev_info; struct iidc_rdma_qset_params qset = {}; int ret; + cdev_info = iwdev->rf->cdev; qset.qs_handle = tc_node->qs_handle; qset.tc = tc_node->traffic_class; qset.vport_id = vsi->vsi_idx; - ret = ice_add_rdma_qset(pf, &qset); + ret = ice_add_rdma_qset(cdev_info, &qset); if (ret) { ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n"); return ret; @@ -194,19 +193,20 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; - struct ice_pf *pf = iwdev->rf->cdev; + struct iidc_rdma_core_dev_info *cdev_info; struct iidc_rdma_qset_params qset = {}; + cdev_info = iwdev->rf->cdev; qset.qs_handle = tc_node->qs_handle; qset.tc = tc_node->traffic_class; qset.vport_id = vsi->vsi_idx; qset.teid = tc_node->l2_sched_node_id; - if (ice_del_rdma_qset(pf, &qset)) + if (ice_del_rdma_qset(cdev_info, &qset)) ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); } -static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +static int irdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) { int i; @@ -217,12 +217,12 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) return -ENOMEM; for (i = 0; i < rf->msix_count; i++) - if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i])) + if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i])) break; if (i < IRDMA_MIN_MSIX) { for (; i > 0; i--) - ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); kfree(rf->msix_entries); return -ENOMEM; @@ -233,54 +233,63 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) return 0; } -static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev) { int i; for (i = 0; i < rf->msix_count; i++) - ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + ice_free_rdma_qvector(cdev, &rf->msix_entries[i]); kfree(rf->msix_entries); } static void irdma_remove(struct auxiliary_device *aux_dev) { - struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev, - struct iidc_auxiliary_dev, - adev); - struct ice_pf *pf = iidc_adev->pf; struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); + struct iidc_rdma_core_auxiliary_dev *iidc_adev; + struct iidc_rdma_core_dev_info *cdev_info; + + iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + cdev_info = iidc_adev->cdev_info; + ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false); irdma_ib_unregister_device(iwdev); - ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); - irdma_deinit_interrupts(iwdev->rf, pf); + irdma_deinit_interrupts(iwdev->rf, cdev_info); - pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); + pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(cdev_info->pdev->devfn)); } -static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf, - struct ice_vsi *vsi) +static void irdma_fill_device_info(struct irdma_device *iwdev, + struct iidc_rdma_core_dev_info *cdev_info) { + struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv; struct irdma_pci_f *rf = iwdev->rf; - rf->cdev = pf; + rf->sc_dev.hw = &rf->hw; + rf->iwdev = iwdev; + rf->cdev = cdev_info; + rf->hw.hw_addr = iidc_priv->hw_addr; + rf->pcidev = cdev_info->pdev; + rf->hw.device = &rf->pcidev->dev; + rf->pf_id = iidc_priv->pf_id; rf->gen_ops.register_qset = irdma_lan_register_qset; rf->gen_ops.unregister_qset = irdma_lan_unregister_qset; - rf->hw.hw_addr = pf->hw.hw_addr; - rf->pcidev = pf->pdev; - rf->pf_id = pf->hw.pf_id; - rf->default_vsi.vsi_idx = vsi->vsi_num; - rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? - IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; + + rf->default_vsi.vsi_idx = iidc_priv->vport_id; + rf->protocol_used = + cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ? + IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; rf->rdma_ver = IRDMA_GEN_2; rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; rf->gen_ops.request_reset = irdma_request_reset; rf->limits_sel = 7; rf->iwdev = iwdev; + mutex_init(&iwdev->ah_tbl_lock); - iwdev->netdev = vsi->netdev; - iwdev->vsi_num = vsi->vsi_num; + + iwdev->netdev = iidc_priv->netdev; + iwdev->vsi_num = iidc_priv->vport_id; iwdev->init_state = INITIAL_STATE; iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; @@ -292,19 +301,18 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id) { - struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev, - struct iidc_auxiliary_dev, - adev); - struct ice_pf *pf = iidc_adev->pf; - struct ice_vsi *vsi = ice_get_main_vsi(pf); - struct iidc_qos_params qos_info = {}; + struct iidc_rdma_core_auxiliary_dev *iidc_adev; + struct iidc_rdma_core_dev_info *cdev_info; + struct iidc_rdma_priv_dev_info *iidc_priv; + struct irdma_l2params l2params = {}; struct irdma_device *iwdev; struct irdma_pci_f *rf; - struct irdma_l2params l2params = {}; int err; - if (!vsi) - return -EIO; + iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev); + cdev_info = iidc_adev->cdev_info; + iidc_priv = cdev_info->iidc_priv; + iwdev = ib_alloc_device(irdma_device, ibdev); if (!iwdev) return -ENOMEM; @@ -314,10 +322,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ return -ENOMEM; } - irdma_fill_device_info(iwdev, pf, vsi); + irdma_fill_device_info(iwdev, cdev_info); rf = iwdev->rf; - err = irdma_init_interrupts(rf, pf); + err = irdma_init_interrupts(rf, cdev_info); if (err) goto err_init_interrupts; @@ -326,8 +334,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ goto err_ctrl_init; l2params.mtu = iwdev->netdev->mtu; - ice_get_qos_params(pf, &qos_info); - irdma_fill_qos_info(&l2params, &qos_info); + irdma_fill_qos_info(&l2params, &iidc_priv->qos_info); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; @@ -339,7 +346,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ if (err) goto err_ibreg; - ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, true); + ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true); ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn)); auxiliary_set_drvdata(aux_dev, iwdev); @@ -351,7 +358,7 @@ err_ibreg: err_rt_init: irdma_ctrl_deinit_hw(rf); err_ctrl_init: - irdma_deinit_interrupts(rf, pf); + irdma_deinit_interrupts(rf, cdev_info); err_init_interrupts: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); @@ -367,7 +374,7 @@ static const struct auxiliary_device_id irdma_auxiliary_id_table[] = { MODULE_DEVICE_TABLE(auxiliary, irdma_auxiliary_id_table); -static struct iidc_auxiliary_drv irdma_auxiliary_drv = { +static struct iidc_rdma_core_auxiliary_drv irdma_auxiliary_drv = { .adrv = { .id_table = irdma_auxiliary_id_table, .probe = irdma_probe, diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index bb0b6494ccb2..674acc952168 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -29,7 +29,8 @@ #include <linux/io-64-nonatomic-lo-hi.h> #endif #include <linux/auxiliary_bus.h> -#include <linux/net/intel/iidc.h> +#include <linux/net/intel/iidc_rdma.h> +#include <linux/net/intel/iidc_rdma_ice.h> #include <rdma/ib_smi.h> #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index 4b4f78288d12..3f73ceacccb6 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -5,8 +5,8 @@ #include <linux/pci.h> #include <linux/bitfield.h> -#include <linux/net/intel/iidc.h> #include <rdma/ib_verbs.h> +#include <net/dscp.h> #define STATS_TIMER_DELAY 60000 diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 59b34afa867b..527c6da2c1ac 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -567,7 +567,7 @@ struct irdma_sc_vsi { u8 qos_rel_bw; u8 qos_prio_type; u8 stats_idx; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; + u8 dscp_map[DSCP_MAX]; struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1]; bool dscp_mode:1; @@ -695,7 +695,7 @@ struct irdma_l2params { u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY]; u16 mtu; u8 up2tc[IRDMA_MAX_USER_PRIORITY]; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; + u8 dscp_map[DSCP_MAX]; u8 num_tc; u8 vsi_rel_bw; u8 vsi_prio_type; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 271520510b5f..b29628d46be9 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -115,6 +115,21 @@ config WIREGUARD_DEBUG Say N here unless you know what you're doing. +config OVPN + tristate "OpenVPN data channel offload" + depends on NET && INET + depends on IPV6 || !IPV6 + select DST_CACHE + select NET_UDP_TUNNEL + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM + select CRYPTO_CHACHA20POLY1305 + select STREAM_PARSER + help + This module enhances the performance of the OpenVPN userspace software + by offloading the data channel processing to kernelspace. + config EQUALIZER tristate "EQL (serial line load balancing) support" help diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 75333251a01a..73bc63ecd65f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_WIREGUARD) += wireguard/ +obj-$(CONFIG_OVPN) += ovpn/ obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACSEC) += macsec.o diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index d1473c5f8eef..a9dffdcac805 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -777,27 +777,19 @@ static __net_init int bareudp_init_net(struct net *net) return 0; } -static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) +static void __net_exit bareudp_exit_rtnl_net(struct net *net, + struct list_head *dev_kill_list) { struct bareudp_net *bn = net_generic(net, bareudp_net_id); struct bareudp_dev *bareudp, *next; list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next) - unregister_netdevice_queue(bareudp->dev, head); -} - -static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_kill_list) -{ - struct net *net; - - list_for_each_entry(net, net_list, exit_list) - bareudp_destroy_tunnels(net, dev_kill_list); + bareudp_dellink(bareudp->dev, dev_kill_list); } static struct pernet_operations bareudp_net_ops = { .init = bareudp_init_net, - .exit_batch_rtnl = bareudp_exit_batch_rtnl, + .exit_rtnl = bareudp_exit_rtnl_net, .id = &bareudp_net_id, .size = sizeof(struct bareudp_net), }; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8ea183da8d53..d05226484c64 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2118,15 +2118,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * set the master's mac address to that of the first slave */ memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); - ss.ss_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, - extack); - if (res) { - slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); - goto err_restore_mtu; - } + } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW && + BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && + memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) { + /* Set slave to random address to avoid duplicate mac + * address in later fail over. + */ + eth_random_addr(ss.__data); + } else { + goto skip_mac_set; + } + + ss.ss_family = slave_dev->type; + res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, extack); + if (res) { + slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); + goto err_restore_mtu; } +skip_mac_set: + /* set no_addrconf flag before open to prevent IPv6 addrconf */ slave_dev->priv_flags |= IFF_NO_ADDRCONF; @@ -6563,7 +6574,7 @@ static int __net_init bond_net_init(struct net *net) /* According to commit 69b0216ac255 ("bonding: fix bonding_masters * race condition in bond unloading") we need to remove sysfs files - * before we remove our devices (done later in bond_net_exit_batch_rtnl()) + * before we remove our devices (done later in bond_net_exit_rtnl()) */ static void __net_exit bond_net_pre_exit(struct net *net) { @@ -6572,25 +6583,20 @@ static void __net_exit bond_net_pre_exit(struct net *net) bond_destroy_sysfs(bn); } -static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_kill_list) +static void __net_exit bond_net_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { - struct bond_net *bn; - struct net *net; + struct bond_net *bn = net_generic(net, bond_net_id); + struct bonding *bond, *tmp_bond; /* Kill off any bonds created after unregistering bond rtnl ops */ - list_for_each_entry(net, net_list, exit_list) { - struct bonding *bond, *tmp_bond; - - bn = net_generic(net, bond_net_id); - list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) - unregister_netdevice_queue(bond->dev, dev_kill_list); - } + list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) + unregister_netdevice_queue(bond->dev, dev_kill_list); } /* According to commit 23fa5c2caae0 ("bonding: destroy proc directory * only after all bonds are gone") bond_destroy_proc_dir() is called - * after bond_net_exit_batch_rtnl() has completed. + * after bond_net_exit_rtnl() has completed. */ static void __net_exit bond_net_exit_batch(struct list_head *net_list) { @@ -6606,7 +6612,7 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list) static struct pernet_operations bond_net_ops = { .init = bond_net_init, .pre_exit = bond_net_pre_exit, - .exit_batch_rtnl = bond_net_exit_batch_rtnl, + .exit_rtnl = bond_net_exit_rtnl, .exit_batch = bond_net_exit_batch, .id = &bond_net_id, .size = sizeof(struct bond_net), diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 7216eb8f9493..132683ed3abe 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -21,6 +21,7 @@ #include <linux/export.h> #include <linux/gpio.h> #include <linux/kernel.h> +#include <linux/math.h> #include <linux/module.h> #include <linux/platform_data/b53.h> #include <linux/phy.h> @@ -1202,6 +1203,10 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_vlan_aware_bridge_pvid = true; + /* Ageing time is set in seconds */ + ds->ageing_time_min = 1 * 1000; + ds->ageing_time_max = AGE_TIME_MAX * 1000; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); @@ -2406,6 +2411,28 @@ static int b53_get_max_mtu(struct dsa_switch *ds, int port) return B53_MAX_MTU; } +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct b53_device *dev = ds->priv; + u32 atc; + int reg; + + if (is63xx(dev)) + reg = B53_AGING_TIME_CONTROL_63XX; + else + reg = B53_AGING_TIME_CONTROL; + + atc = DIV_ROUND_CLOSEST(msecs, 1000); + + if (!is5325(dev) && !is5365(dev)) + atc |= AGE_CHANGE; + + b53_write32(dev, B53_MGMT_PAGE, reg, atc); + + return 0; +} +EXPORT_SYMBOL_GPL(b53_set_ageing_time); + static const struct phylink_mac_ops b53_phylink_mac_ops = { .mac_select_pcs = b53_phylink_mac_select_pcs, .mac_config = b53_phylink_mac_config, @@ -2429,6 +2456,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_disable = b53_disable_port, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 2cf3e6a81e37..a5ef7071ba07 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -343,6 +343,7 @@ void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds, int port, int sset); void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data); +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs); int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 5f7a0e5c5709..1fbc5a204bc7 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -220,6 +220,13 @@ #define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */ #define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */ +/* Aging Time control register (32 bit) */ +#define B53_AGING_TIME_CONTROL 0x06 +#define B53_AGING_TIME_CONTROL_63XX 0x08 +#define AGE_CHANGE BIT(20) +#define AGE_TIME_MASK 0x7ffff +#define AGE_TIME_MAX 1048575 + /* Mirror capture control register (16 bit) */ #define B53_MIR_CAP_CTL 0x10 #define CAP_PORT_MASK 0xf diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 454a8c7fd7ee..960685596093 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1235,6 +1235,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h index 9c2ed2ba79da..bebf0d3ff330 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.h +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -244,7 +244,7 @@ struct hellcreek_port_hwtstamp { struct sk_buff *tx_skb; /* Current timestamp configuration */ - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; }; struct hellcreek_port { diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c index ca2500aba96f..99941ff1ebf9 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -40,7 +40,7 @@ int hellcreek_get_ts_info(struct dsa_switch *ds, int port, * the user requested what is actually available or not */ static int hellcreek_set_hwtstamp_config(struct hellcreek *hellcreek, int port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct hellcreek_port_hwtstamp *ps = &hellcreek->ports[port].port_hwtstamp; @@ -110,41 +110,35 @@ static int hellcreek_set_hwtstamp_config(struct hellcreek *hellcreek, int port, } int hellcreek_port_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct hellcreek *hellcreek = ds->priv; struct hellcreek_port_hwtstamp *ps; - struct hwtstamp_config config; int err; ps = &hellcreek->ports[port].port_hwtstamp; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = hellcreek_set_hwtstamp_config(hellcreek, port, &config); + err = hellcreek_set_hwtstamp_config(hellcreek, port, config); if (err) return err; /* Save the chosen configuration to be returned later */ - memcpy(&ps->tstamp_config, &config, sizeof(config)); + ps->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config) { struct hellcreek *hellcreek = ds->priv; struct hellcreek_port_hwtstamp *ps; - struct hwtstamp_config *config; ps = &hellcreek->ports[port].port_hwtstamp; - config = &ps->tstamp_config; + *config = ps->tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + return 0; } /* Returns a pointer to the PTP header if the caller should time stamp, or NULL diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h index 7d88da2134f2..388821c4aa10 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h @@ -38,9 +38,10 @@ #define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) int hellcreek_port_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config); bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *clone, unsigned int type); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f95a9aac56ee..f492fa9f6dd4 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -4078,6 +4078,89 @@ static int ksz_ets_band_to_queue(struct tc_ets_qopt_offload_replace_params *p, return p->bands - 1 - band; } +/** + * ksz88x3_tc_ets_add - Configure ETS (Enhanced Transmission Selection) + * for a port on KSZ88x3 switch + * @dev: Pointer to the KSZ switch device structure + * @port: Port number to configure + * @p: Pointer to offload replace parameters describing ETS bands and mapping + * + * The KSZ88x3 supports two scheduling modes: Strict Priority and + * Weighted Fair Queuing (WFQ). Both modes have fixed behavior: + * - No configurable queue-to-priority mapping + * - No weight adjustment in WFQ mode + * + * This function configures the switch to use strict priority mode by + * clearing the WFQ enable bit for all queues associated with ETS bands. + * If strict priority is not explicitly requested, the switch will default + * to WFQ mode. + * + * Return: 0 on success, or a negative error code on failure + */ +static int ksz88x3_tc_ets_add(struct ksz_device *dev, int port, + struct tc_ets_qopt_offload_replace_params *p) +{ + int ret, band; + + /* Only strict priority mode is supported for now. + * WFQ is implicitly enabled when strict mode is disabled. + */ + for (band = 0; band < p->bands; band++) { + int queue = ksz_ets_band_to_queue(p, band); + u8 reg; + + /* Calculate TXQ Split Control register address for this + * port/queue + */ + reg = KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue); + + /* Clear WFQ enable bit to select strict priority scheduling */ + ret = ksz_rmw8(dev, reg, KSZ8873_TXQ_WFQ_ENABLE, 0); + if (ret) + return ret; + } + + return 0; +} + +/** + * ksz88x3_tc_ets_del - Reset ETS (Enhanced Transmission Selection) config + * for a port on KSZ88x3 switch + * @dev: Pointer to the KSZ switch device structure + * @port: Port number to reset + * + * The KSZ88x3 supports only fixed scheduling modes: Strict Priority or + * Weighted Fair Queuing (WFQ), with no reconfiguration of weights or + * queue mapping. This function resets the port’s scheduling mode to + * the default, which is WFQ, by enabling the WFQ bit for all queues. + * + * Return: 0 on success, or a negative error code on failure + */ +static int ksz88x3_tc_ets_del(struct ksz_device *dev, int port) +{ + int ret, queue; + + /* Iterate over all transmit queues for this port */ + for (queue = 0; queue < dev->info->num_tx_queues; queue++) { + u8 reg; + + /* Calculate TXQ Split Control register address for this + * port/queue + */ + reg = KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue); + + /* Set WFQ enable bit to revert back to default scheduling + * mode + */ + ret = ksz_rmw8(dev, reg, KSZ8873_TXQ_WFQ_ENABLE, + KSZ8873_TXQ_WFQ_ENABLE); + if (ret) + return ret; + } + + return 0; +} + static int ksz_queue_set_strict(struct ksz_device *dev, int port, int queue) { int ret; @@ -4159,6 +4242,7 @@ static int ksz_tc_ets_del(struct ksz_device *dev, int port) for (queue = 0; queue < dev->info->num_tx_queues; queue++) { ret = ksz_queue_set_wrr(dev, port, queue, KSZ9477_DEFAULT_WRR_WEIGHT); + if (ret) return ret; } @@ -4211,7 +4295,7 @@ static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port, struct ksz_device *dev = ds->priv; int ret; - if (is_ksz8(dev)) + if (is_ksz8(dev) && !ksz_is_ksz88x3(dev)) return -EOPNOTSUPP; if (qopt->parent != TC_H_ROOT) { @@ -4225,9 +4309,16 @@ static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port, if (ret) return ret; - return ksz_tc_ets_add(dev, port, &qopt->replace_params); + if (ksz_is_ksz88x3(dev)) + return ksz88x3_tc_ets_add(dev, port, + &qopt->replace_params); + else + return ksz_tc_ets_add(dev, port, &qopt->replace_params); case TC_ETS_DESTROY: - return ksz_tc_ets_del(dev, port); + if (ksz_is_ksz88x3(dev)) + return ksz88x3_tc_ets_del(dev, port); + else + return ksz_tc_ets_del(dev, port); case TC_ETS_STATS: case TC_ETS_GRAFT: return -EOPNOTSUPP; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index af17a9c030d4..a034017568cd 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -142,7 +142,7 @@ struct ksz_port { struct ksz_irq pirq; u8 num; #if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_PTP) - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; bool hwts_tx_en; bool hwts_rx_en; struct ksz_irq ptpirq; @@ -836,6 +836,25 @@ static inline bool is_lan937x_tx_phy(struct ksz_device *dev, int port) #define SW_HI_SPEED_DRIVE_STRENGTH_S 4 #define SW_LO_SPEED_DRIVE_STRENGTH_S 0 +/* TXQ Split Control Register for per-port, per-queue configuration. + * Register 0xAF is TXQ Split for Q3 on Port 1. + * Register offset formula: 0xAF + (port * 4) + (3 - queue) + * where: port = 0..2, queue = 0..3 + */ +#define KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue) \ + (0xAF + ((port) * 4) + (3 - (queue))) + +/* Bit 7 selects between: + * 0 = Strict priority mode (highest-priority queue first) + * 1 = Weighted Fair Queuing (WFQ) mode: + * Queue weights: Q3:Q2:Q1:Q0 = 8:4:2:1 + * If any queues are empty, weight is redistributed. + * + * Note: This is referred to as "Weighted Fair Queuing" (WFQ) in KSZ8863/8873 + * documentation, and as "Weighted Round Robin" (WRR) in KSZ9477 family docs. + */ +#define KSZ8873_TXQ_WFQ_ENABLE BIT(7) + #define KSZ9477_REG_PORT_OUT_RATE_0 0x0420 #define KSZ9477_OUT_RATE_NO_LIMIT 0 diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 22fb9ef4645c..bc54a96ba646 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -319,22 +319,21 @@ int ksz_get_ts_info(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_in return 0; } -int ksz_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) +int ksz_hwtstamp_get(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config) { struct ksz_device *dev = ds->priv; - struct hwtstamp_config *config; struct ksz_port *prt; prt = &dev->ports[port]; - config = &prt->tstamp_config; + *config = prt->tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + return 0; } static int ksz_set_hwtstamp_config(struct ksz_device *dev, struct ksz_port *prt, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { int ret; @@ -404,26 +403,21 @@ static int ksz_set_hwtstamp_config(struct ksz_device *dev, return ksz_ptp_enable_mode(dev); } -int ksz_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) +int ksz_hwtstamp_set(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct ksz_device *dev = ds->priv; - struct hwtstamp_config config; struct ksz_port *prt; int ret; prt = &dev->ports[port]; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - ret = ksz_set_hwtstamp_config(dev, prt, &config); + ret = ksz_set_hwtstamp_config(dev, prt, config); if (ret) return ret; - memcpy(&prt->tstamp_config, &config, sizeof(config)); - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + prt->tstamp_config = *config; return 0; } diff --git a/drivers/net/dsa/microchip/ksz_ptp.h b/drivers/net/dsa/microchip/ksz_ptp.h index 2f1783c0d723..3086e519b1b6 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.h +++ b/drivers/net/dsa/microchip/ksz_ptp.h @@ -39,8 +39,11 @@ void ksz_ptp_clock_unregister(struct dsa_switch *ds); int ksz_get_ts_info(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_info *ts); -int ksz_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr); -int ksz_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr); +int ksz_hwtstamp_get(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config); +int ksz_hwtstamp_set(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void ksz_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); void ksz_port_deferred_xmit(struct kthread_work *work); bool ksz_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c5d6628d7980..7361380ffb5f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -32,47 +32,15 @@ static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs) /* String, offset, and register size in bytes if different from 4 bytes */ static const struct mt7530_mib_desc mt7530_mib[] = { - MIB_DESC(1, 0x00, "TxDrop"), - MIB_DESC(1, 0x04, "TxCrcErr"), - MIB_DESC(1, 0x08, "TxUnicast"), - MIB_DESC(1, 0x0c, "TxMulticast"), - MIB_DESC(1, 0x10, "TxBroadcast"), - MIB_DESC(1, 0x14, "TxCollision"), - MIB_DESC(1, 0x18, "TxSingleCollision"), - MIB_DESC(1, 0x1c, "TxMultipleCollision"), - MIB_DESC(1, 0x20, "TxDeferred"), - MIB_DESC(1, 0x24, "TxLateCollision"), - MIB_DESC(1, 0x28, "TxExcessiveCollistion"), - MIB_DESC(1, 0x2c, "TxPause"), - MIB_DESC(1, 0x30, "TxPktSz64"), - MIB_DESC(1, 0x34, "TxPktSz65To127"), - MIB_DESC(1, 0x38, "TxPktSz128To255"), - MIB_DESC(1, 0x3c, "TxPktSz256To511"), - MIB_DESC(1, 0x40, "TxPktSz512To1023"), - MIB_DESC(1, 0x44, "Tx1024ToMax"), - MIB_DESC(2, 0x48, "TxBytes"), - MIB_DESC(1, 0x60, "RxDrop"), - MIB_DESC(1, 0x64, "RxFiltering"), - MIB_DESC(1, 0x68, "RxUnicast"), - MIB_DESC(1, 0x6c, "RxMulticast"), - MIB_DESC(1, 0x70, "RxBroadcast"), - MIB_DESC(1, 0x74, "RxAlignErr"), - MIB_DESC(1, 0x78, "RxCrcErr"), - MIB_DESC(1, 0x7c, "RxUnderSizeErr"), - MIB_DESC(1, 0x80, "RxFragErr"), - MIB_DESC(1, 0x84, "RxOverSzErr"), - MIB_DESC(1, 0x88, "RxJabberErr"), - MIB_DESC(1, 0x8c, "RxPause"), - MIB_DESC(1, 0x90, "RxPktSz64"), - MIB_DESC(1, 0x94, "RxPktSz65To127"), - MIB_DESC(1, 0x98, "RxPktSz128To255"), - MIB_DESC(1, 0x9c, "RxPktSz256To511"), - MIB_DESC(1, 0xa0, "RxPktSz512To1023"), - MIB_DESC(1, 0xa4, "RxPktSz1024ToMax"), - MIB_DESC(2, 0xa8, "RxBytes"), - MIB_DESC(1, 0xb0, "RxCtrlDrop"), - MIB_DESC(1, 0xb4, "RxIngressDrop"), - MIB_DESC(1, 0xb8, "RxArlDrop"), + MIB_DESC(1, MT7530_PORT_MIB_TX_DROP, "TxDrop"), + MIB_DESC(1, MT7530_PORT_MIB_TX_CRC_ERR, "TxCrcErr"), + MIB_DESC(1, MT7530_PORT_MIB_TX_COLLISION, "TxCollision"), + MIB_DESC(1, MT7530_PORT_MIB_RX_DROP, "RxDrop"), + MIB_DESC(1, MT7530_PORT_MIB_RX_FILTERING, "RxFiltering"), + MIB_DESC(1, MT7530_PORT_MIB_RX_CRC_ERR, "RxCrcErr"), + MIB_DESC(1, MT7530_PORT_MIB_RX_CTRL_DROP, "RxCtrlDrop"), + MIB_DESC(1, MT7530_PORT_MIB_RX_INGRESS_DROP, "RxIngressDrop"), + MIB_DESC(1, MT7530_PORT_MIB_RX_ARL_DROP, "RxArlDrop"), }; static void @@ -790,23 +758,33 @@ mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset, } static void +mt7530_read_port_stats(struct mt7530_priv *priv, int port, + u32 offset, u8 size, uint64_t *data) +{ + u32 val, reg = MT7530_PORT_MIB_COUNTER(port) + offset; + + val = mt7530_read(priv, reg); + *data = val; + + if (size == 2) { + val = mt7530_read(priv, reg + 4); + *data |= (u64)val << 32; + } +} + +static void mt7530_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { struct mt7530_priv *priv = ds->priv; const struct mt7530_mib_desc *mib; - u32 reg, i; - u64 hi; + int i; for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++) { mib = &mt7530_mib[i]; - reg = MT7530_PORT_MIB_COUNTER(port) + mib->offset; - data[i] = mt7530_read(priv, reg); - if (mib->size == 2) { - hi = mt7530_read(priv, reg + 4); - data[i] |= hi << 32; - } + mt7530_read_port_stats(priv, port, mib->offset, mib->size, + data + i); } } @@ -819,6 +797,172 @@ mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(mt7530_mib); } +static void mt7530_get_eth_mac_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mt7530_priv *priv = ds->priv; + + /* MIB counter doesn't provide a FramesTransmittedOK but instead + * provide stats for Unicast, Broadcast and Multicast frames separately. + * To simulate a global frame counter, read Unicast and addition Multicast + * and Broadcast later + */ + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_UNICAST, 1, + &mac_stats->FramesTransmittedOK); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_SINGLE_COLLISION, 1, + &mac_stats->SingleCollisionFrames); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTIPLE_COLLISION, 1, + &mac_stats->MultipleCollisionFrames); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNICAST, 1, + &mac_stats->FramesReceivedOK); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BYTES, 2, + &mac_stats->OctetsTransmittedOK); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_ALIGN_ERR, 1, + &mac_stats->AlignmentErrors); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_DEFERRED, 1, + &mac_stats->FramesWithDeferredXmissions); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_LATE_COLLISION, 1, + &mac_stats->LateCollisions); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_EXCESSIVE_COLLISION, 1, + &mac_stats->FramesAbortedDueToXSColls); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BYTES, 2, + &mac_stats->OctetsReceivedOK); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTICAST, 1, + &mac_stats->MulticastFramesXmittedOK); + mac_stats->FramesTransmittedOK += mac_stats->MulticastFramesXmittedOK; + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BROADCAST, 1, + &mac_stats->BroadcastFramesXmittedOK); + mac_stats->FramesTransmittedOK += mac_stats->BroadcastFramesXmittedOK; + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_MULTICAST, 1, + &mac_stats->MulticastFramesReceivedOK); + mac_stats->FramesReceivedOK += mac_stats->MulticastFramesReceivedOK; + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BROADCAST, 1, + &mac_stats->BroadcastFramesReceivedOK); + mac_stats->FramesReceivedOK += mac_stats->BroadcastFramesReceivedOK; +} + +static const struct ethtool_rmon_hist_range mt7530_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, MT7530_MAX_MTU }, + {} +}; + +static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mt7530_priv *priv = ds->priv; + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNDER_SIZE_ERR, 1, + &rmon_stats->undersize_pkts); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_OVER_SZ_ERR, 1, + &rmon_stats->oversize_pkts); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_FRAG_ERR, 1, + &rmon_stats->fragments); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_JABBER_ERR, 1, + &rmon_stats->jabbers); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_64, 1, + &rmon_stats->hist[0]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_65_TO_127, 1, + &rmon_stats->hist[1]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_128_TO_255, 1, + &rmon_stats->hist[2]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_256_TO_511, 1, + &rmon_stats->hist[3]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_512_TO_1023, 1, + &rmon_stats->hist[4]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_1024_TO_MAX, 1, + &rmon_stats->hist[5]); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_64, 1, + &rmon_stats->hist_tx[0]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_65_TO_127, 1, + &rmon_stats->hist_tx[1]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_128_TO_255, 1, + &rmon_stats->hist_tx[2]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_256_TO_511, 1, + &rmon_stats->hist_tx[3]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_512_TO_1023, 1, + &rmon_stats->hist_tx[4]); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_1024_TO_MAX, 1, + &rmon_stats->hist_tx[5]); + + *ranges = mt7530_rmon_ranges; +} + +static void mt7530_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *storage) +{ + struct mt7530_priv *priv = ds->priv; + uint64_t data; + + /* MIB counter doesn't provide a FramesTransmittedOK but instead + * provide stats for Unicast, Broadcast and Multicast frames separately. + * To simulate a global frame counter, read Unicast and addition Multicast + * and Broadcast later + */ + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNICAST, 1, + &storage->rx_packets); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_MULTICAST, 1, + &storage->multicast); + storage->rx_packets += storage->multicast; + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BROADCAST, 1, + &data); + storage->rx_packets += data; + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_UNICAST, 1, + &storage->tx_packets); + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTICAST, 1, + &data); + storage->tx_packets += data; + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BROADCAST, 1, + &data); + storage->tx_packets += data; + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BYTES, 2, + &storage->rx_bytes); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BYTES, 2, + &storage->tx_bytes); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_DROP, 1, + &storage->rx_dropped); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_DROP, 1, + &storage->tx_dropped); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_CRC_ERR, 1, + &storage->rx_crc_errors); +} + +static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mt7530_priv *priv = ds->priv; + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PAUSE, 1, + &ctrl_stats->MACControlFramesTransmitted); + + mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PAUSE, 1, + &ctrl_stats->MACControlFramesReceived); +} + static int mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) { @@ -3105,6 +3249,10 @@ const struct dsa_switch_ops mt7530_switch_ops = { .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, + .get_eth_mac_stats = mt7530_get_eth_mac_stats, + .get_rmon_stats = mt7530_get_rmon_stats, + .get_eth_ctrl_stats = mt7530_get_eth_ctrl_stats, + .get_stats64 = mt7530_get_stats64, .set_ageing_time = mt7530_set_ageing_time, .port_enable = mt7530_port_enable, .port_disable = mt7530_port_disable, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index c3ea403d7acf..d4b838a055ad 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -423,6 +423,48 @@ enum mt7530_vlan_port_acc_frm { /* Register for MIB */ #define MT7530_PORT_MIB_COUNTER(x) (0x4000 + (x) * 0x100) +/* Each define is an offset of MT7530_PORT_MIB_COUNTER */ +#define MT7530_PORT_MIB_TX_DROP 0x00 +#define MT7530_PORT_MIB_TX_CRC_ERR 0x04 +#define MT7530_PORT_MIB_TX_UNICAST 0x08 +#define MT7530_PORT_MIB_TX_MULTICAST 0x0c +#define MT7530_PORT_MIB_TX_BROADCAST 0x10 +#define MT7530_PORT_MIB_TX_COLLISION 0x14 +#define MT7530_PORT_MIB_TX_SINGLE_COLLISION 0x18 +#define MT7530_PORT_MIB_TX_MULTIPLE_COLLISION 0x1c +#define MT7530_PORT_MIB_TX_DEFERRED 0x20 +#define MT7530_PORT_MIB_TX_LATE_COLLISION 0x24 +#define MT7530_PORT_MIB_TX_EXCESSIVE_COLLISION 0x28 +#define MT7530_PORT_MIB_TX_PAUSE 0x2c +#define MT7530_PORT_MIB_TX_PKT_SZ_64 0x30 +#define MT7530_PORT_MIB_TX_PKT_SZ_65_TO_127 0x34 +#define MT7530_PORT_MIB_TX_PKT_SZ_128_TO_255 0x38 +#define MT7530_PORT_MIB_TX_PKT_SZ_256_TO_511 0x3c +#define MT7530_PORT_MIB_TX_PKT_SZ_512_TO_1023 0x40 +#define MT7530_PORT_MIB_TX_PKT_SZ_1024_TO_MAX 0x44 +#define MT7530_PORT_MIB_TX_BYTES 0x48 /* 64 bytes */ +#define MT7530_PORT_MIB_RX_DROP 0x60 +#define MT7530_PORT_MIB_RX_FILTERING 0x64 +#define MT7530_PORT_MIB_RX_UNICAST 0x68 +#define MT7530_PORT_MIB_RX_MULTICAST 0x6c +#define MT7530_PORT_MIB_RX_BROADCAST 0x70 +#define MT7530_PORT_MIB_RX_ALIGN_ERR 0x74 +#define MT7530_PORT_MIB_RX_CRC_ERR 0x78 +#define MT7530_PORT_MIB_RX_UNDER_SIZE_ERR 0x7c +#define MT7530_PORT_MIB_RX_FRAG_ERR 0x80 +#define MT7530_PORT_MIB_RX_OVER_SZ_ERR 0x84 +#define MT7530_PORT_MIB_RX_JABBER_ERR 0x88 +#define MT7530_PORT_MIB_RX_PAUSE 0x8c +#define MT7530_PORT_MIB_RX_PKT_SZ_64 0x90 +#define MT7530_PORT_MIB_RX_PKT_SZ_65_TO_127 0x94 +#define MT7530_PORT_MIB_RX_PKT_SZ_128_TO_255 0x98 +#define MT7530_PORT_MIB_RX_PKT_SZ_256_TO_511 0x9c +#define MT7530_PORT_MIB_RX_PKT_SZ_512_TO_1023 0xa0 +#define MT7530_PORT_MIB_RX_PKT_SZ_1024_TO_MAX 0xa4 +#define MT7530_PORT_MIB_RX_BYTES 0xa8 /* 64 bytes */ +#define MT7530_PORT_MIB_RX_CTRL_DROP 0xb0 +#define MT7530_PORT_MIB_RX_INGRESS_DROP 0xb4 +#define MT7530_PORT_MIB_RX_ARL_DROP 0xb8 #define MT7530_MIB_CCR 0x4fe0 #define CCR_MIB_ENABLE BIT(31) #define CCR_RX_OCT_CNT_GOOD BIT(7) diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 86bf113c9bfa..7d00482f53a3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -241,7 +241,7 @@ struct mv88e6xxx_port_hwtstamp { u16 tx_seq_id; /* Current timestamp configuration */ - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; }; enum mv88e6xxx_policy_mapping { diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index 49e6e1355142..f663799b0b3b 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -89,7 +89,7 @@ int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, } static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops; struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; @@ -169,42 +169,38 @@ static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port, } int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; - struct hwtstamp_config config; int err; if (!chip->info->ptp_support) return -EOPNOTSUPP; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = mv88e6xxx_set_hwtstamp_config(chip, port, &config); + err = mv88e6xxx_set_hwtstamp_config(chip, port, config); if (err) return err; /* Save the chosen configuration to be returned later. */ - memcpy(&ps->tstamp_config, &config, sizeof(config)); + ps->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config) { struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; - struct hwtstamp_config *config = &ps->tstamp_config; if (!chip->info->ptp_support) return -EOPNOTSUPP; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + *config = ps->tstamp_config; + + return 0; } /* Returns a pointer to the PTP header if the caller should time stamp, diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h index 85acc758e3eb..22e4acc957f0 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h @@ -111,9 +111,10 @@ #ifdef CONFIG_NET_DSA_MV88E6XXX_PTP int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *cfg); bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *clone, unsigned int type); @@ -132,14 +133,17 @@ int mv88e6165_global_disable(struct mv88e6xxx_chip *chip); #else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */ -static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, - int port, struct ifreq *ifr) +static inline int +mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, - int port, struct ifreq *ifr) +static inline int +mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c index aed4a4b07f34..1d3b2c94c53e 100644 --- a/drivers/net/dsa/mv88e6xxx/ptp.c +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -332,13 +332,6 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip, int pin; int err; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests to enable time stamping on both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -566,6 +559,10 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) chip->ptp_clock_info.verify = ptp_ops->ptp_verify; chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work; + chip->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + if (ptp_ops->set_ptp_cpu_port) { struct dsa_port *dp; int upstream = 0; diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 0a4e682a55ef..2dd4e56e1cf1 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1774,22 +1774,25 @@ static void felix_teardown(struct dsa_switch *ds) } static int felix_hwtstamp_get(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config) { struct ocelot *ocelot = ds->priv; - return ocelot_hwstamp_get(ocelot, port, ifr); + ocelot_hwstamp_get(ocelot, port, config); + + return 0; } static int felix_hwtstamp_set(struct dsa_switch *ds, int port, - struct ifreq *ifr) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); bool using_tag_8021q; int err; - err = ocelot_hwstamp_set(ocelot, port, ifr); + err = ocelot_hwstamp_set(ocelot, port, config, extack); if (err) return err; diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index 31ea8130a495..df7466d4fe8f 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -337,8 +337,9 @@ static void a5psw_port_rx_block_set(struct a5psw *a5psw, int port, bool block) static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port, bool set) { - u8 offsets[] = {A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK, - A5PSW_MCAST_DEF_MASK}; + static const u8 offsets[] = { + A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK, A5PSW_MCAST_DEF_MASK + }; int i; for (i = 0; i < ARRAY_SIZE(offsets); i++) diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 198e787e8560..fefe46e2a5e6 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -58,19 +58,17 @@ enum sja1105_ptp_clk_mode { #define ptp_data_to_sja1105(d) \ container_of((d), struct sja1105_private, ptp_data) -int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) +int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; unsigned long hwts_tx_en, hwts_rx_en; - struct hwtstamp_config config; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; hwts_tx_en = priv->hwts_tx_en; hwts_rx_en = priv->hwts_rx_en; - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: hwts_tx_en &= ~BIT(port); break; @@ -81,7 +79,7 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: hwts_rx_en &= ~BIT(port); break; @@ -92,32 +90,28 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) return -ERANGE; } - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; - priv->hwts_tx_en = hwts_tx_en; priv->hwts_rx_en = hwts_rx_en; return 0; } -int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) +int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config) { struct sja1105_private *priv = ds->priv; - struct hwtstamp_config config; - config.flags = 0; + config->flags = 0; if (priv->hwts_tx_en & BIT(port)) - config.tx_type = HWTSTAMP_TX_ON; + config->tx_type = HWTSTAMP_TX_ON; else - config.tx_type = HWTSTAMP_TX_OFF; + config->tx_type = HWTSTAMP_TX_OFF; if (priv->hwts_rx_en & BIT(port)) - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else - config.rx_filter = HWTSTAMP_FILTER_NONE; + config->rx_filter = HWTSTAMP_FILTER_NONE; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } int sja1105_get_ts_info(struct dsa_switch *ds, int port, @@ -737,10 +731,6 @@ static int sja1105_per_out_enable(struct sja1105_private *priv, if (perout->index != 0) return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ - if (perout->flags) - return -EOPNOTSUPP; - mutex_lock(&ptp_data->lock); rc = sja1105_change_ptp_clk_pin_func(priv, PTP_PF_PEROUT); @@ -820,13 +810,6 @@ static int sja1105_extts_enable(struct sja1105_private *priv, if (extts->index != 0) return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ - if (extts->flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* We can only enable time stamping on both edges, sadly. */ if ((extts->flags & PTP_STRICT_FLAGS) && (extts->flags & PTP_ENABLE_FEATURE) && @@ -912,6 +895,9 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) .n_pins = 1, .n_ext_ts = 1, .n_per_out = 1, + .supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS, }; /* Only used on SJA1105 */ diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 8add2bd5f728..325e3777ea07 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -112,9 +112,12 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, void sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); -int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr); +int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config); -int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr); +int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int __sja1105_ptp_gettimex(struct dsa_switch *ds, u64 *ns, struct ptp_system_timestamp *sts); diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d748dc6de923..16c7896f931f 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -34,37 +34,40 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val) return val; } -static void airoha_qdma_set_irqmask(struct airoha_qdma *qdma, int index, - u32 clear, u32 set) +static void airoha_qdma_set_irqmask(struct airoha_irq_bank *irq_bank, + int index, u32 clear, u32 set) { + struct airoha_qdma *qdma = irq_bank->qdma; + int bank = irq_bank - &qdma->irq_banks[0]; unsigned long flags; - if (WARN_ON_ONCE(index >= ARRAY_SIZE(qdma->irqmask))) + if (WARN_ON_ONCE(index >= ARRAY_SIZE(irq_bank->irqmask))) return; - spin_lock_irqsave(&qdma->irq_lock, flags); + spin_lock_irqsave(&irq_bank->irq_lock, flags); - qdma->irqmask[index] &= ~clear; - qdma->irqmask[index] |= set; - airoha_qdma_wr(qdma, REG_INT_ENABLE(index), qdma->irqmask[index]); + irq_bank->irqmask[index] &= ~clear; + irq_bank->irqmask[index] |= set; + airoha_qdma_wr(qdma, REG_INT_ENABLE(bank, index), + irq_bank->irqmask[index]); /* Read irq_enable register in order to guarantee the update above * completes in the spinlock critical section. */ - airoha_qdma_rr(qdma, REG_INT_ENABLE(index)); + airoha_qdma_rr(qdma, REG_INT_ENABLE(bank, index)); - spin_unlock_irqrestore(&qdma->irq_lock, flags); + spin_unlock_irqrestore(&irq_bank->irq_lock, flags); } -static void airoha_qdma_irq_enable(struct airoha_qdma *qdma, int index, - u32 mask) +static void airoha_qdma_irq_enable(struct airoha_irq_bank *irq_bank, + int index, u32 mask) { - airoha_qdma_set_irqmask(qdma, index, 0, mask); + airoha_qdma_set_irqmask(irq_bank, index, 0, mask); } -static void airoha_qdma_irq_disable(struct airoha_qdma *qdma, int index, - u32 mask) +static void airoha_qdma_irq_disable(struct airoha_irq_bank *irq_bank, + int index, u32 mask) { - airoha_qdma_set_irqmask(qdma, index, mask, 0); + airoha_qdma_set_irqmask(irq_bank, index, mask, 0); } static bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) @@ -527,6 +530,25 @@ static int airoha_fe_init(struct airoha_eth *eth) /* disable IFC by default */ airoha_fe_clear(eth, REG_FE_CSR_IFC_CFG, FE_IFC_EN_MASK); + airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(0), + FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM1) | + FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM1)); + airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(1), + FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM2) | + FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM2)); + /* enable 1:N vlan action, init vlan table */ airoha_fe_set(eth, REG_MC_VLAN_EN, MC_VLAN_EN_MASK); @@ -694,7 +716,7 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1); if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - airoha_ppe_check_skb(eth->ppe, hash); + airoha_ppe_check_skb(eth->ppe, q->skb, hash); done++; napi_gro_receive(&q->napi, q->skb); @@ -720,9 +742,20 @@ static int airoha_qdma_rx_napi_poll(struct napi_struct *napi, int budget) done += cur; } while (cur && done < budget); - if (done < budget && napi_complete(napi)) - airoha_qdma_irq_enable(q->qdma, QDMA_INT_REG_IDX1, - RX_DONE_INT_MASK); + if (done < budget && napi_complete(napi)) { + struct airoha_qdma *qdma = q->qdma; + int i, qid = q - &qdma->q_rx[0]; + int intr_reg = qid < RX_DONE_HIGH_OFFSET ? QDMA_INT_REG_IDX1 + : QDMA_INT_REG_IDX2; + + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + if (!(BIT(qid) & RX_IRQ_BANK_PIN_MASK(i))) + continue; + + airoha_qdma_irq_enable(&qdma->irq_banks[i], intr_reg, + BIT(qid % RX_DONE_HIGH_OFFSET)); + } + } return done; } @@ -925,7 +958,7 @@ unlock: } if (done < budget && napi_complete(napi)) - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0, + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX0, TX_DONE_INT_MASK(id)); return done; @@ -1155,14 +1188,24 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma) { int i; - /* clear pending irqs */ - for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++) + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + /* clear pending irqs */ airoha_qdma_wr(qdma, REG_INT_STATUS(i), 0xffffffff); - - /* setup irqs */ - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX0, INT_IDX0_MASK); - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX1, INT_IDX1_MASK); - airoha_qdma_irq_enable(qdma, QDMA_INT_REG_IDX4, INT_IDX4_MASK); + /* setup rx irqs */ + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX0, + INT_RX0_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX1, + INT_RX1_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX2, + INT_RX2_MASK(RX_IRQ_BANK_PIN_MASK(i))); + airoha_qdma_irq_enable(&qdma->irq_banks[i], QDMA_INT_REG_IDX3, + INT_RX3_MASK(RX_IRQ_BANK_PIN_MASK(i))); + } + /* setup tx irqs */ + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX0, + TX_COHERENT_LOW_INT_MASK | INT_TX_MASK); + airoha_qdma_irq_enable(&qdma->irq_banks[0], QDMA_INT_REG_IDX4, + TX_COHERENT_HIGH_INT_MASK); /* setup irq binding */ for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { @@ -1207,30 +1250,39 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma) static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) { - struct airoha_qdma *qdma = dev_instance; - u32 intr[ARRAY_SIZE(qdma->irqmask)]; + struct airoha_irq_bank *irq_bank = dev_instance; + struct airoha_qdma *qdma = irq_bank->qdma; + u32 rx_intr_mask = 0, rx_intr1, rx_intr2; + u32 intr[ARRAY_SIZE(irq_bank->irqmask)]; int i; - for (i = 0; i < ARRAY_SIZE(qdma->irqmask); i++) { + for (i = 0; i < ARRAY_SIZE(intr); i++) { intr[i] = airoha_qdma_rr(qdma, REG_INT_STATUS(i)); - intr[i] &= qdma->irqmask[i]; + intr[i] &= irq_bank->irqmask[i]; airoha_qdma_wr(qdma, REG_INT_STATUS(i), intr[i]); } if (!test_bit(DEV_STATE_INITIALIZED, &qdma->eth->state)) return IRQ_NONE; - if (intr[1] & RX_DONE_INT_MASK) { - airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX1, - RX_DONE_INT_MASK); + rx_intr1 = intr[1] & RX_DONE_LOW_INT_MASK; + if (rx_intr1) { + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX1, rx_intr1); + rx_intr_mask |= rx_intr1; + } - for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { - if (!qdma->q_rx[i].ndesc) - continue; + rx_intr2 = intr[2] & RX_DONE_HIGH_INT_MASK; + if (rx_intr2) { + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX2, rx_intr2); + rx_intr_mask |= (rx_intr2 << 16); + } - if (intr[1] & BIT(i)) - napi_schedule(&qdma->q_rx[i].napi); - } + for (i = 0; rx_intr_mask && i < ARRAY_SIZE(qdma->q_rx); i++) { + if (!qdma->q_rx[i].ndesc) + continue; + + if (rx_intr_mask & BIT(i)) + napi_schedule(&qdma->q_rx[i].napi); } if (intr[0] & INT_TX_MASK) { @@ -1238,7 +1290,7 @@ static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) if (!(intr[0] & TX_DONE_INT_MASK(i))) continue; - airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX0, + airoha_qdma_irq_disable(irq_bank, QDMA_INT_REG_IDX0, TX_DONE_INT_MASK(i)); napi_schedule(&qdma->q_tx_irq[i].napi); } @@ -1247,6 +1299,39 @@ static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) return IRQ_HANDLED; } +static int airoha_qdma_init_irq_banks(struct platform_device *pdev, + struct airoha_qdma *qdma) +{ + struct airoha_eth *eth = qdma->eth; + int i, id = qdma - ð->qdma[0]; + + for (i = 0; i < ARRAY_SIZE(qdma->irq_banks); i++) { + struct airoha_irq_bank *irq_bank = &qdma->irq_banks[i]; + int err, irq_index = 4 * id + i; + const char *name; + + spin_lock_init(&irq_bank->irq_lock); + irq_bank->qdma = qdma; + + irq_bank->irq = platform_get_irq(pdev, irq_index); + if (irq_bank->irq < 0) + return irq_bank->irq; + + name = devm_kasprintf(eth->dev, GFP_KERNEL, + KBUILD_MODNAME ".%d", irq_index); + if (!name) + return -ENOMEM; + + err = devm_request_irq(eth->dev, irq_bank->irq, + airoha_irq_handler, IRQF_SHARED, name, + irq_bank); + if (err) + return err; + } + + return 0; +} + static int airoha_qdma_init(struct platform_device *pdev, struct airoha_eth *eth, struct airoha_qdma *qdma) @@ -1254,9 +1339,7 @@ static int airoha_qdma_init(struct platform_device *pdev, int err, id = qdma - ð->qdma[0]; const char *res; - spin_lock_init(&qdma->irq_lock); qdma->eth = eth; - res = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d", id); if (!res) return -ENOMEM; @@ -1266,12 +1349,7 @@ static int airoha_qdma_init(struct platform_device *pdev, return dev_err_probe(eth->dev, PTR_ERR(qdma->regs), "failed to iomap qdma%d regs\n", id); - qdma->irq = platform_get_irq(pdev, 4 * id); - if (qdma->irq < 0) - return qdma->irq; - - err = devm_request_irq(eth->dev, qdma->irq, airoha_irq_handler, - IRQF_SHARED, KBUILD_MODNAME, qdma); + err = airoha_qdma_init_irq_banks(pdev, qdma); if (err) return err; @@ -1631,7 +1709,6 @@ static void airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) if (port->id == 3) { /* FIXME: handle XSI_PCE1_PORT */ - airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(0), 0x5500); airoha_fe_rmw(eth, REG_FE_WAN_PORT, WAN1_EN_MASK | WAN1_MASK | WAN0_MASK, FIELD_PREP(WAN0_MASK, HSGMII_LAN_PCIE0_SRCPORT)); @@ -2109,6 +2186,125 @@ static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port, } } +static int airoha_qdma_get_rl_param(struct airoha_qdma *qdma, int queue_id, + u32 addr, enum trtcm_param_type param, + u32 *val_low, u32 *val_high) +{ + u32 idx = QDMA_METER_IDX(queue_id), group = QDMA_METER_GROUP(queue_id); + u32 val, config = FIELD_PREP(RATE_LIMIT_PARAM_TYPE_MASK, param) | + FIELD_PREP(RATE_LIMIT_METER_GROUP_MASK, group) | + FIELD_PREP(RATE_LIMIT_PARAM_INDEX_MASK, idx); + + airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config); + if (read_poll_timeout(airoha_qdma_rr, val, + val & RATE_LIMIT_PARAM_RW_DONE_MASK, + USEC_PER_MSEC, 10 * USEC_PER_MSEC, true, qdma, + REG_TRTCM_CFG_PARAM(addr))) + return -ETIMEDOUT; + + *val_low = airoha_qdma_rr(qdma, REG_TRTCM_DATA_LOW(addr)); + if (val_high) + *val_high = airoha_qdma_rr(qdma, REG_TRTCM_DATA_HIGH(addr)); + + return 0; +} + +static int airoha_qdma_set_rl_param(struct airoha_qdma *qdma, int queue_id, + u32 addr, enum trtcm_param_type param, + u32 val) +{ + u32 idx = QDMA_METER_IDX(queue_id), group = QDMA_METER_GROUP(queue_id); + u32 config = RATE_LIMIT_PARAM_RW_MASK | + FIELD_PREP(RATE_LIMIT_PARAM_TYPE_MASK, param) | + FIELD_PREP(RATE_LIMIT_METER_GROUP_MASK, group) | + FIELD_PREP(RATE_LIMIT_PARAM_INDEX_MASK, idx); + + airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val); + airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config); + + return read_poll_timeout(airoha_qdma_rr, val, + val & RATE_LIMIT_PARAM_RW_DONE_MASK, + USEC_PER_MSEC, 10 * USEC_PER_MSEC, true, + qdma, REG_TRTCM_CFG_PARAM(addr)); +} + +static int airoha_qdma_set_rl_config(struct airoha_qdma *qdma, int queue_id, + u32 addr, bool enable, u32 enable_mask) +{ + u32 val; + int err; + + err = airoha_qdma_get_rl_param(qdma, queue_id, addr, TRTCM_MISC_MODE, + &val, NULL); + if (err) + return err; + + val = enable ? val | enable_mask : val & ~enable_mask; + + return airoha_qdma_set_rl_param(qdma, queue_id, addr, TRTCM_MISC_MODE, + val); +} + +static int airoha_qdma_set_rl_token_bucket(struct airoha_qdma *qdma, + int queue_id, u32 rate_val, + u32 bucket_size) +{ + u32 val, config, tick, unit, rate, rate_frac; + int err; + + err = airoha_qdma_get_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG, + TRTCM_MISC_MODE, &config, NULL); + if (err) + return err; + + val = airoha_qdma_rr(qdma, REG_INGRESS_TRTCM_CFG); + tick = FIELD_GET(INGRESS_FAST_TICK_MASK, val); + if (config & TRTCM_TICK_SEL) + tick *= FIELD_GET(INGRESS_SLOW_TICK_RATIO_MASK, val); + if (!tick) + return -EINVAL; + + unit = (config & TRTCM_PKT_MODE) ? 1000000 / tick : 8000 / tick; + if (!unit) + return -EINVAL; + + rate = rate_val / unit; + rate_frac = rate_val % unit; + rate_frac = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate_frac) / unit; + rate = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate) | + FIELD_PREP(TRTCM_TOKEN_RATE_FRACTION_MASK, rate_frac); + + err = airoha_qdma_set_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG, + TRTCM_TOKEN_RATE_MODE, rate); + if (err) + return err; + + val = bucket_size; + if (!(config & TRTCM_PKT_MODE)) + val = max_t(u32, val, MIN_TOKEN_SIZE); + val = min_t(u32, __fls(val), MAX_TOKEN_SIZE_OFFSET); + + return airoha_qdma_set_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG, + TRTCM_BUCKETSIZE_SHIFT_MODE, val); +} + +static int airoha_qdma_init_rl_config(struct airoha_qdma *qdma, int queue_id, + bool enable, enum trtcm_unit_type unit) +{ + bool tick_sel = queue_id == 0 || queue_id == 2 || queue_id == 8; + enum trtcm_param mode = TRTCM_METER_MODE; + int err; + + mode |= unit == TRTCM_PACKET_UNIT ? TRTCM_PKT_MODE : 0; + err = airoha_qdma_set_rl_config(qdma, queue_id, REG_INGRESS_TRTCM_CFG, + enable, mode); + if (err) + return err; + + return airoha_qdma_set_rl_config(qdma, queue_id, REG_INGRESS_TRTCM_CFG, + tick_sel, TRTCM_TICK_SEL); +} + static int airoha_qdma_get_trtcm_param(struct airoha_qdma *qdma, int channel, u32 addr, enum trtcm_param_type param, enum trtcm_mode_type mode, @@ -2273,10 +2469,142 @@ static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port, return 0; } +static int airoha_qdma_set_rx_meter(struct airoha_gdm_port *port, + u32 rate, u32 bucket_size, + enum trtcm_unit_type unit_type) +{ + struct airoha_qdma *qdma = port->qdma; + int i; + + for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { + int err; + + if (!qdma->q_rx[i].ndesc) + continue; + + err = airoha_qdma_init_rl_config(qdma, i, !!rate, unit_type); + if (err) + return err; + + err = airoha_qdma_set_rl_token_bucket(qdma, i, rate, + bucket_size); + if (err) + return err; + } + + return 0; +} + +static int airoha_tc_matchall_act_validate(struct tc_cls_matchall_offload *f) +{ + const struct flow_action *actions = &f->rule->action; + const struct flow_action_entry *act; + + if (!flow_action_has_entries(actions)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "filter run with no actions"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(actions)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "only once action per filter is supported"); + return -EOPNOTSUPP; + } + + act = &actions->entries[0]; + if (act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(f->common.extack, "unsupported action"); + return -EOPNOTSUPP; + } + + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "invalid exceed action id"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "invalid notexceed action id"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(actions, act)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "action accept must be last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || act->police.avrate || + act->police.overhead || act->police.mtu) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "peakrate/avrate/overhead/mtu unsupported"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int airoha_dev_tc_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *f) +{ + enum trtcm_unit_type unit_type = TRTCM_BYTE_UNIT; + struct airoha_gdm_port *port = netdev_priv(dev); + u32 rate = 0, bucket_size = 0; + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: { + const struct flow_action_entry *act; + int err; + + err = airoha_tc_matchall_act_validate(f); + if (err) + return err; + + act = &f->rule->action.entries[0]; + if (act->police.rate_pkt_ps) { + rate = act->police.rate_pkt_ps; + bucket_size = act->police.burst_pkt; + unit_type = TRTCM_PACKET_UNIT; + } else { + rate = div_u64(act->police.rate_bytes_ps, 1000); + rate = rate << 3; /* Kbps */ + bucket_size = act->police.burst; + } + fallthrough; + } + case TC_CLSMATCHALL_DESTROY: + return airoha_qdma_set_rx_meter(port, rate, bucket_size, + unit_type); + default: + return -EOPNOTSUPP; + } +} + +static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct net_device *dev = cb_priv; + + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return airoha_ppe_setup_tc_block_cb(dev, type_data); + case TC_SETUP_CLSMATCHALL: + return airoha_dev_tc_matchall(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + static int airoha_dev_setup_tc_block(struct airoha_gdm_port *port, struct flow_block_offload *f) { - flow_setup_cb_t *cb = airoha_ppe_setup_tc_block_cb; + flow_setup_cb_t *cb = airoha_dev_setup_tc_block_cb; static LIST_HEAD(block_cb_list); struct flow_block_cb *block_cb; @@ -2515,7 +2843,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, dev->features |= dev->hw_features; dev->vlan_features = dev->hw_features; dev->dev.of_node = np; - dev->irq = qdma->irq; + dev->irq = qdma->irq_banks[0].irq; SET_NETDEV_DEV(dev, eth->dev); /* reserve hw queues for HTB offloading */ diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index ec8908f904c6..53f39083a8b0 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -17,6 +17,7 @@ #define AIROHA_MAX_NUM_GDM_PORTS 4 #define AIROHA_MAX_NUM_QDMA 2 +#define AIROHA_MAX_NUM_IRQ_BANKS 4 #define AIROHA_MAX_DSA_PORTS 7 #define AIROHA_MAX_NUM_RSTS 3 #define AIROHA_MAX_NUM_XSI_RSTS 5 @@ -127,6 +128,11 @@ enum tx_sched_mode { TC_SCH_WRR2, }; +enum trtcm_unit_type { + TRTCM_BYTE_UNIT, + TRTCM_PACKET_UNIT, +}; + enum trtcm_param_type { TRTCM_MISC_MODE, /* meter_en, pps_mode, tick_sel */ TRTCM_TOKEN_RATE_MODE, @@ -422,27 +428,59 @@ struct airoha_flow_data { } pppoe; }; +enum airoha_flow_entry_type { + FLOW_TYPE_L4, + FLOW_TYPE_L2, + FLOW_TYPE_L2_SUBFLOW, +}; + struct airoha_flow_table_entry { - struct hlist_node list; + union { + struct hlist_node list; /* PPE L3 flow entry */ + struct { + struct rhash_head l2_node; /* L2 flow entry */ + struct hlist_head l2_flows; /* PPE L2 subflows list */ + }; + }; struct airoha_foe_entry data; + struct hlist_node l2_subflow_node; /* PPE L2 subflow entry */ u32 hash; + enum airoha_flow_entry_type type; + struct rhash_head node; unsigned long cookie; }; -struct airoha_qdma { - struct airoha_eth *eth; - void __iomem *regs; +/* RX queue to IRQ mapping: BIT(q) in IRQ(n) */ +#define RX_IRQ0_BANK_PIN_MASK 0x839f +#define RX_IRQ1_BANK_PIN_MASK 0x7fe00000 +#define RX_IRQ2_BANK_PIN_MASK 0x20 +#define RX_IRQ3_BANK_PIN_MASK 0x40 +#define RX_IRQ_BANK_PIN_MASK(_n) \ + (((_n) == 3) ? RX_IRQ3_BANK_PIN_MASK : \ + ((_n) == 2) ? RX_IRQ2_BANK_PIN_MASK : \ + ((_n) == 1) ? RX_IRQ1_BANK_PIN_MASK : \ + RX_IRQ0_BANK_PIN_MASK) + +struct airoha_irq_bank { + struct airoha_qdma *qdma; /* protect concurrent irqmask accesses */ spinlock_t irq_lock; u32 irqmask[QDMA_INT_REG_MAX]; int irq; +}; + +struct airoha_qdma { + struct airoha_eth *eth; + void __iomem *regs; atomic_t users; + struct airoha_irq_bank irq_banks[AIROHA_MAX_NUM_IRQ_BANKS]; + struct airoha_tx_irq_queue q_tx_irq[AIROHA_NUM_TX_IRQ]; struct airoha_queue q_tx[AIROHA_NUM_TX_RING]; @@ -480,6 +518,8 @@ struct airoha_ppe { void *foe; dma_addr_t foe_dma; + struct rhashtable l2_flows; + struct hlist_head *foe_flow; u16 foe_check_time[PPE_NUM_ENTRIES]; @@ -535,9 +575,9 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); -void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash); -int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv); +void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, + u16 hash); +int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index f10dab935cab..6e9787c2843b 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -24,6 +24,13 @@ static const struct rhashtable_params airoha_flow_table_params = { .automatic_shrinking = true, }; +static const struct rhashtable_params airoha_l2_flow_table_params = { + .head_offset = offsetof(struct airoha_flow_table_entry, l2_node), + .key_offset = offsetof(struct airoha_flow_table_entry, data.bridge), + .key_len = 2 * ETH_ALEN, + .automatic_shrinking = true, +}; + static bool airoha_ppe2_is_enabled(struct airoha_eth *eth) { return airoha_fe_rr(eth, REG_PPE_GLO_CFG(1)) & PPE_GLO_CFG_EN_MASK; @@ -197,6 +204,15 @@ static int airoha_get_dsa_port(struct net_device **dev) #endif } +static void airoha_ppe_foe_set_bridge_addrs(struct airoha_foe_bridge *br, + struct ethhdr *eh) +{ + br->dest_mac_hi = get_unaligned_be32(eh->h_dest); + br->dest_mac_lo = get_unaligned_be16(eh->h_dest + 4); + br->src_mac_hi = get_unaligned_be16(eh->h_source); + br->src_mac_lo = get_unaligned_be32(eh->h_source + 2); +} + static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, struct airoha_foe_entry *hwe, struct net_device *dev, int type, @@ -247,13 +263,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f); if (type == PPE_PKT_TYPE_BRIDGE) { - hwe->bridge.dest_mac_hi = get_unaligned_be32(data->eth.h_dest); - hwe->bridge.dest_mac_lo = - get_unaligned_be16(data->eth.h_dest + 4); - hwe->bridge.src_mac_hi = - get_unaligned_be16(data->eth.h_source); - hwe->bridge.src_mac_lo = - get_unaligned_be32(data->eth.h_source + 2); + airoha_ppe_foe_set_bridge_addrs(&hwe->bridge, &data->eth); hwe->bridge.data = qdata; hwe->bridge.ib2 = val; l2 = &hwe->bridge.l2.common; @@ -378,6 +388,19 @@ static u32 airoha_ppe_foe_get_entry_hash(struct airoha_foe_entry *hwe) hv3 = hwe->ipv6.src_ip[1] ^ hwe->ipv6.dest_ip[1]; hv3 ^= hwe->ipv6.src_ip[0]; break; + case PPE_PKT_TYPE_BRIDGE: { + struct airoha_foe_mac_info *l2 = &hwe->bridge.l2; + + hv1 = l2->common.src_mac_hi & 0xffff; + hv1 = hv1 << 16 | l2->src_mac_lo; + + hv2 = l2->common.dest_mac_lo; + hv2 = hv2 << 16; + hv2 = hv2 | ((l2->common.src_mac_hi & 0xffff0000) >> 16); + + hv3 = l2->common.dest_mac_hi; + break; + } case PPE_PKT_TYPE_IPV4_DSLITE: case PPE_PKT_TYPE_IPV6_6RD: default: @@ -476,10 +499,102 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, return 0; } -static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, u32 hash) +static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe, + struct airoha_flow_table_entry *e) +{ + lockdep_assert_held(&ppe_lock); + + hlist_del_init(&e->list); + if (e->hash != 0xffff) { + e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE; + e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE, + AIROHA_FOE_STATE_INVALID); + airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash); + e->hash = 0xffff; + } + if (e->type == FLOW_TYPE_L2_SUBFLOW) { + hlist_del_init(&e->l2_subflow_node); + kfree(e); + } +} + +static void airoha_ppe_foe_remove_l2_flow(struct airoha_ppe *ppe, + struct airoha_flow_table_entry *e) +{ + struct hlist_head *head = &e->l2_flows; + struct hlist_node *n; + + lockdep_assert_held(&ppe_lock); + + rhashtable_remove_fast(&ppe->l2_flows, &e->l2_node, + airoha_l2_flow_table_params); + hlist_for_each_entry_safe(e, n, head, l2_subflow_node) + airoha_ppe_foe_remove_flow(ppe, e); +} + +static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe, + struct airoha_flow_table_entry *e) +{ + spin_lock_bh(&ppe_lock); + + if (e->type == FLOW_TYPE_L2) + airoha_ppe_foe_remove_l2_flow(ppe, e); + else + airoha_ppe_foe_remove_flow(ppe, e); + + spin_unlock_bh(&ppe_lock); +} + +static int +airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, + struct airoha_flow_table_entry *e, + u32 hash) +{ + u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP; + struct airoha_foe_entry *hwe_p, hwe; + struct airoha_flow_table_entry *f; + struct airoha_foe_mac_info *l2; + int type; + + hwe_p = airoha_ppe_foe_get_entry(ppe, hash); + if (!hwe_p) + return -EINVAL; + + f = kzalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -ENOMEM; + + hlist_add_head(&f->l2_subflow_node, &e->l2_flows); + f->type = FLOW_TYPE_L2_SUBFLOW; + f->hash = hash; + + memcpy(&hwe, hwe_p, sizeof(*hwe_p)); + hwe.ib1 = (hwe.ib1 & mask) | (e->data.ib1 & ~mask); + l2 = &hwe.bridge.l2; + memcpy(l2, &e->data.bridge.l2, sizeof(*l2)); + + type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe.ib1); + if (type == PPE_PKT_TYPE_IPV4_HNAPT) + memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple, + sizeof(hwe.ipv4.new_tuple)); + else if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T && + l2->common.etype == ETH_P_IP) + l2->common.etype = ETH_P_IPV6; + + hwe.bridge.ib2 = e->data.bridge.ib2; + airoha_ppe_foe_commit_entry(ppe, &hwe, hash); + + return 0; +} + +static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, + struct sk_buff *skb, + u32 hash) { struct airoha_flow_table_entry *e; + struct airoha_foe_bridge br = {}; struct airoha_foe_entry *hwe; + bool commit_done = false; struct hlist_node *n; u32 index, state; @@ -495,21 +610,68 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, u32 hash) index = airoha_ppe_foe_get_entry_hash(hwe); hlist_for_each_entry_safe(e, n, &ppe->foe_flow[index], list) { - if (airoha_ppe_foe_compare_entry(e, hwe)) { - airoha_ppe_foe_commit_entry(ppe, &e->data, hash); - e->hash = hash; - break; + if (e->type == FLOW_TYPE_L2_SUBFLOW) { + state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, hwe->ib1); + if (state != AIROHA_FOE_STATE_BIND) { + e->hash = 0xffff; + airoha_ppe_foe_remove_flow(ppe, e); + } + continue; } + + if (commit_done || !airoha_ppe_foe_compare_entry(e, hwe)) { + e->hash = 0xffff; + continue; + } + + airoha_ppe_foe_commit_entry(ppe, &e->data, hash); + commit_done = true; + e->hash = hash; } + + if (commit_done) + goto unlock; + + airoha_ppe_foe_set_bridge_addrs(&br, eth_hdr(skb)); + e = rhashtable_lookup_fast(&ppe->l2_flows, &br, + airoha_l2_flow_table_params); + if (e) + airoha_ppe_foe_commit_subflow_entry(ppe, e, hash); unlock: spin_unlock_bh(&ppe_lock); } +static int +airoha_ppe_foe_l2_flow_commit_entry(struct airoha_ppe *ppe, + struct airoha_flow_table_entry *e) +{ + struct airoha_flow_table_entry *prev; + + e->type = FLOW_TYPE_L2; + prev = rhashtable_lookup_get_insert_fast(&ppe->l2_flows, &e->l2_node, + airoha_l2_flow_table_params); + if (!prev) + return 0; + + if (IS_ERR(prev)) + return PTR_ERR(prev); + + return rhashtable_replace_fast(&ppe->l2_flows, &prev->l2_node, + &e->l2_node, + airoha_l2_flow_table_params); +} + static int airoha_ppe_foe_flow_commit_entry(struct airoha_ppe *ppe, struct airoha_flow_table_entry *e) { - u32 hash = airoha_ppe_foe_get_entry_hash(&e->data); + int type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, e->data.ib1); + u32 hash; + + if (type == PPE_PKT_TYPE_BRIDGE) + return airoha_ppe_foe_l2_flow_commit_entry(ppe, e); + hash = airoha_ppe_foe_get_entry_hash(&e->data); + e->type = FLOW_TYPE_L4; e->hash = 0xffff; spin_lock_bh(&ppe_lock); @@ -519,23 +681,6 @@ static int airoha_ppe_foe_flow_commit_entry(struct airoha_ppe *ppe, return 0; } -static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe, - struct airoha_flow_table_entry *e) -{ - spin_lock_bh(&ppe_lock); - - hlist_del_init(&e->list); - if (e->hash != 0xffff) { - e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE; - e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE, - AIROHA_FOE_STATE_INVALID); - airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash); - e->hash = 0xffff; - } - - spin_unlock_bh(&ppe_lock); -} - static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port, struct flow_cls_offload *f) { @@ -822,18 +967,13 @@ error_npu_put: return err; } -int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) +int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) { - struct flow_cls_offload *cls = type_data; - struct net_device *dev = cb_priv; struct airoha_gdm_port *port = netdev_priv(dev); + struct flow_cls_offload *cls = type_data; struct airoha_eth *eth = port->qdma->eth; int err = 0; - if (!tc_can_offload(dev) || type != TC_SETUP_CLSFLOWER) - return -EOPNOTSUPP; - mutex_lock(&flow_offload_mutex); if (!eth->npu) @@ -846,7 +986,8 @@ int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return err; } -void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash) +void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, + u16 hash) { u16 now, diff; @@ -859,7 +1000,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash) return; ppe->foe_check_time[hash] = now; - airoha_ppe_foe_insert_entry(ppe, hash); + airoha_ppe_foe_insert_entry(ppe, skb, hash); } int airoha_ppe_init(struct airoha_eth *eth) @@ -890,9 +1031,20 @@ int airoha_ppe_init(struct airoha_eth *eth) if (err) return err; + err = rhashtable_init(&ppe->l2_flows, &airoha_l2_flow_table_params); + if (err) + goto error_flow_table_destroy; + err = airoha_ppe_debugfs_init(ppe); if (err) - rhashtable_destroy(ð->flow_table); + goto error_l2_flow_table_destroy; + + return 0; + +error_l2_flow_table_destroy: + rhashtable_destroy(&ppe->l2_flows); +error_flow_table_destroy: + rhashtable_destroy(ð->flow_table); return err; } @@ -909,6 +1061,7 @@ void airoha_ppe_deinit(struct airoha_eth *eth) } rcu_read_unlock(); + rhashtable_destroy(ð->ppe->l2_flows); rhashtable_destroy(ð->flow_table); debugfs_remove(eth->ppe->debugfs_dir); } diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 8146cde4e8ba..d931530fc96f 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -283,6 +283,7 @@ #define PPE_HASH_SEED 0x12345678 #define REG_PPE_DFT_CPORT0(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x248) +#define DFT_CPORT_MASK(_n) GENMASK(3 + ((_n) << 2), ((_n) << 2)) #define REG_PPE_DFT_CPORT1(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x24c) @@ -422,11 +423,12 @@ ((_n) == 2) ? 0x0720 : \ ((_n) == 1) ? 0x0024 : 0x0020) -#define REG_INT_ENABLE(_n) \ - (((_n) == 4) ? 0x0750 : \ - ((_n) == 3) ? 0x0744 : \ - ((_n) == 2) ? 0x0740 : \ - ((_n) == 1) ? 0x002c : 0x0028) +#define REG_INT_ENABLE(_b, _n) \ + (((_n) == 4) ? 0x0750 + ((_b) << 5) : \ + ((_n) == 3) ? 0x0744 + ((_b) << 5) : \ + ((_n) == 2) ? 0x0740 + ((_b) << 5) : \ + ((_n) == 1) ? 0x002c + ((_b) << 3) : \ + 0x0028 + ((_b) << 3)) /* QDMA_CSR_INT_ENABLE1 */ #define RX15_COHERENT_INT_MASK BIT(31) @@ -461,6 +463,26 @@ #define IRQ0_FULL_INT_MASK BIT(1) #define IRQ0_INT_MASK BIT(0) +#define RX_COHERENT_LOW_INT_MASK \ + (RX15_COHERENT_INT_MASK | RX14_COHERENT_INT_MASK | \ + RX13_COHERENT_INT_MASK | RX12_COHERENT_INT_MASK | \ + RX11_COHERENT_INT_MASK | RX10_COHERENT_INT_MASK | \ + RX9_COHERENT_INT_MASK | RX8_COHERENT_INT_MASK | \ + RX7_COHERENT_INT_MASK | RX6_COHERENT_INT_MASK | \ + RX5_COHERENT_INT_MASK | RX4_COHERENT_INT_MASK | \ + RX3_COHERENT_INT_MASK | RX2_COHERENT_INT_MASK | \ + RX1_COHERENT_INT_MASK | RX0_COHERENT_INT_MASK) + +#define RX_COHERENT_LOW_OFFSET __ffs(RX_COHERENT_LOW_INT_MASK) +#define INT_RX0_MASK(_n) \ + (((_n) << RX_COHERENT_LOW_OFFSET) & RX_COHERENT_LOW_INT_MASK) + +#define TX_COHERENT_LOW_INT_MASK \ + (TX7_COHERENT_INT_MASK | TX6_COHERENT_INT_MASK | \ + TX5_COHERENT_INT_MASK | TX4_COHERENT_INT_MASK | \ + TX3_COHERENT_INT_MASK | TX2_COHERENT_INT_MASK | \ + TX1_COHERENT_INT_MASK | TX0_COHERENT_INT_MASK) + #define TX_DONE_INT_MASK(_n) \ ((_n) ? IRQ1_INT_MASK | IRQ1_FULL_INT_MASK \ : IRQ0_INT_MASK | IRQ0_FULL_INT_MASK) @@ -469,17 +491,6 @@ (IRQ1_INT_MASK | IRQ1_FULL_INT_MASK | \ IRQ0_INT_MASK | IRQ0_FULL_INT_MASK) -#define INT_IDX0_MASK \ - (TX0_COHERENT_INT_MASK | TX1_COHERENT_INT_MASK | \ - TX2_COHERENT_INT_MASK | TX3_COHERENT_INT_MASK | \ - TX4_COHERENT_INT_MASK | TX5_COHERENT_INT_MASK | \ - TX6_COHERENT_INT_MASK | TX7_COHERENT_INT_MASK | \ - RX0_COHERENT_INT_MASK | RX1_COHERENT_INT_MASK | \ - RX2_COHERENT_INT_MASK | RX3_COHERENT_INT_MASK | \ - RX4_COHERENT_INT_MASK | RX7_COHERENT_INT_MASK | \ - RX8_COHERENT_INT_MASK | RX9_COHERENT_INT_MASK | \ - RX15_COHERENT_INT_MASK | INT_TX_MASK) - /* QDMA_CSR_INT_ENABLE2 */ #define RX15_NO_CPU_DSCP_INT_MASK BIT(31) #define RX14_NO_CPU_DSCP_INT_MASK BIT(30) @@ -514,19 +525,121 @@ #define RX1_DONE_INT_MASK BIT(1) #define RX0_DONE_INT_MASK BIT(0) -#define RX_DONE_INT_MASK \ - (RX0_DONE_INT_MASK | RX1_DONE_INT_MASK | \ - RX2_DONE_INT_MASK | RX3_DONE_INT_MASK | \ - RX4_DONE_INT_MASK | RX7_DONE_INT_MASK | \ - RX8_DONE_INT_MASK | RX9_DONE_INT_MASK | \ - RX15_DONE_INT_MASK) -#define INT_IDX1_MASK \ - (RX_DONE_INT_MASK | \ - RX0_NO_CPU_DSCP_INT_MASK | RX1_NO_CPU_DSCP_INT_MASK | \ - RX2_NO_CPU_DSCP_INT_MASK | RX3_NO_CPU_DSCP_INT_MASK | \ - RX4_NO_CPU_DSCP_INT_MASK | RX7_NO_CPU_DSCP_INT_MASK | \ - RX8_NO_CPU_DSCP_INT_MASK | RX9_NO_CPU_DSCP_INT_MASK | \ - RX15_NO_CPU_DSCP_INT_MASK) +#define RX_NO_CPU_DSCP_LOW_INT_MASK \ + (RX15_NO_CPU_DSCP_INT_MASK | RX14_NO_CPU_DSCP_INT_MASK | \ + RX13_NO_CPU_DSCP_INT_MASK | RX12_NO_CPU_DSCP_INT_MASK | \ + RX11_NO_CPU_DSCP_INT_MASK | RX10_NO_CPU_DSCP_INT_MASK | \ + RX9_NO_CPU_DSCP_INT_MASK | RX8_NO_CPU_DSCP_INT_MASK | \ + RX7_NO_CPU_DSCP_INT_MASK | RX6_NO_CPU_DSCP_INT_MASK | \ + RX5_NO_CPU_DSCP_INT_MASK | RX4_NO_CPU_DSCP_INT_MASK | \ + RX3_NO_CPU_DSCP_INT_MASK | RX2_NO_CPU_DSCP_INT_MASK | \ + RX1_NO_CPU_DSCP_INT_MASK | RX0_NO_CPU_DSCP_INT_MASK) + +#define RX_DONE_LOW_INT_MASK \ + (RX15_DONE_INT_MASK | RX14_DONE_INT_MASK | \ + RX13_DONE_INT_MASK | RX12_DONE_INT_MASK | \ + RX11_DONE_INT_MASK | RX10_DONE_INT_MASK | \ + RX9_DONE_INT_MASK | RX8_DONE_INT_MASK | \ + RX7_DONE_INT_MASK | RX6_DONE_INT_MASK | \ + RX5_DONE_INT_MASK | RX4_DONE_INT_MASK | \ + RX3_DONE_INT_MASK | RX2_DONE_INT_MASK | \ + RX1_DONE_INT_MASK | RX0_DONE_INT_MASK) + +#define RX_NO_CPU_DSCP_LOW_OFFSET __ffs(RX_NO_CPU_DSCP_LOW_INT_MASK) +#define INT_RX1_MASK(_n) \ + ((((_n) << RX_NO_CPU_DSCP_LOW_OFFSET) & RX_NO_CPU_DSCP_LOW_INT_MASK) | \ + (RX_DONE_LOW_INT_MASK & (_n))) + +/* QDMA_CSR_INT_ENABLE3 */ +#define RX31_NO_CPU_DSCP_INT_MASK BIT(31) +#define RX30_NO_CPU_DSCP_INT_MASK BIT(30) +#define RX29_NO_CPU_DSCP_INT_MASK BIT(29) +#define RX28_NO_CPU_DSCP_INT_MASK BIT(28) +#define RX27_NO_CPU_DSCP_INT_MASK BIT(27) +#define RX26_NO_CPU_DSCP_INT_MASK BIT(26) +#define RX25_NO_CPU_DSCP_INT_MASK BIT(25) +#define RX24_NO_CPU_DSCP_INT_MASK BIT(24) +#define RX23_NO_CPU_DSCP_INT_MASK BIT(23) +#define RX22_NO_CPU_DSCP_INT_MASK BIT(22) +#define RX21_NO_CPU_DSCP_INT_MASK BIT(21) +#define RX20_NO_CPU_DSCP_INT_MASK BIT(20) +#define RX19_NO_CPU_DSCP_INT_MASK BIT(19) +#define RX18_NO_CPU_DSCP_INT_MASK BIT(18) +#define RX17_NO_CPU_DSCP_INT_MASK BIT(17) +#define RX16_NO_CPU_DSCP_INT_MASK BIT(16) +#define RX31_DONE_INT_MASK BIT(15) +#define RX30_DONE_INT_MASK BIT(14) +#define RX29_DONE_INT_MASK BIT(13) +#define RX28_DONE_INT_MASK BIT(12) +#define RX27_DONE_INT_MASK BIT(11) +#define RX26_DONE_INT_MASK BIT(10) +#define RX25_DONE_INT_MASK BIT(9) +#define RX24_DONE_INT_MASK BIT(8) +#define RX23_DONE_INT_MASK BIT(7) +#define RX22_DONE_INT_MASK BIT(6) +#define RX21_DONE_INT_MASK BIT(5) +#define RX20_DONE_INT_MASK BIT(4) +#define RX19_DONE_INT_MASK BIT(3) +#define RX18_DONE_INT_MASK BIT(2) +#define RX17_DONE_INT_MASK BIT(1) +#define RX16_DONE_INT_MASK BIT(0) + +#define RX_NO_CPU_DSCP_HIGH_INT_MASK \ + (RX31_NO_CPU_DSCP_INT_MASK | RX30_NO_CPU_DSCP_INT_MASK | \ + RX29_NO_CPU_DSCP_INT_MASK | RX28_NO_CPU_DSCP_INT_MASK | \ + RX27_NO_CPU_DSCP_INT_MASK | RX26_NO_CPU_DSCP_INT_MASK | \ + RX25_NO_CPU_DSCP_INT_MASK | RX24_NO_CPU_DSCP_INT_MASK | \ + RX23_NO_CPU_DSCP_INT_MASK | RX22_NO_CPU_DSCP_INT_MASK | \ + RX21_NO_CPU_DSCP_INT_MASK | RX20_NO_CPU_DSCP_INT_MASK | \ + RX19_NO_CPU_DSCP_INT_MASK | RX18_NO_CPU_DSCP_INT_MASK | \ + RX17_NO_CPU_DSCP_INT_MASK | RX16_NO_CPU_DSCP_INT_MASK) + +#define RX_DONE_HIGH_INT_MASK \ + (RX31_DONE_INT_MASK | RX30_DONE_INT_MASK | \ + RX29_DONE_INT_MASK | RX28_DONE_INT_MASK | \ + RX27_DONE_INT_MASK | RX26_DONE_INT_MASK | \ + RX25_DONE_INT_MASK | RX24_DONE_INT_MASK | \ + RX23_DONE_INT_MASK | RX22_DONE_INT_MASK | \ + RX21_DONE_INT_MASK | RX20_DONE_INT_MASK | \ + RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \ + RX17_DONE_INT_MASK | RX16_DONE_INT_MASK) + +#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK) +#define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK) + +#define INT_RX2_MASK(_n) \ + ((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \ + (((_n) >> RX_DONE_HIGH_OFFSET) & RX_DONE_HIGH_INT_MASK)) + +/* QDMA_CSR_INT_ENABLE4 */ +#define RX31_COHERENT_INT_MASK BIT(31) +#define RX30_COHERENT_INT_MASK BIT(30) +#define RX29_COHERENT_INT_MASK BIT(29) +#define RX28_COHERENT_INT_MASK BIT(28) +#define RX27_COHERENT_INT_MASK BIT(27) +#define RX26_COHERENT_INT_MASK BIT(26) +#define RX25_COHERENT_INT_MASK BIT(25) +#define RX24_COHERENT_INT_MASK BIT(24) +#define RX23_COHERENT_INT_MASK BIT(23) +#define RX22_COHERENT_INT_MASK BIT(22) +#define RX21_COHERENT_INT_MASK BIT(21) +#define RX20_COHERENT_INT_MASK BIT(20) +#define RX19_COHERENT_INT_MASK BIT(19) +#define RX18_COHERENT_INT_MASK BIT(18) +#define RX17_COHERENT_INT_MASK BIT(17) +#define RX16_COHERENT_INT_MASK BIT(16) + +#define RX_COHERENT_HIGH_INT_MASK \ + (RX31_COHERENT_INT_MASK | RX30_COHERENT_INT_MASK | \ + RX29_COHERENT_INT_MASK | RX28_COHERENT_INT_MASK | \ + RX27_COHERENT_INT_MASK | RX26_COHERENT_INT_MASK | \ + RX25_COHERENT_INT_MASK | RX24_COHERENT_INT_MASK | \ + RX23_COHERENT_INT_MASK | RX22_COHERENT_INT_MASK | \ + RX21_COHERENT_INT_MASK | RX20_COHERENT_INT_MASK | \ + RX19_COHERENT_INT_MASK | RX18_COHERENT_INT_MASK | \ + RX17_COHERENT_INT_MASK | RX16_COHERENT_INT_MASK) + +#define INT_RX3_MASK(_n) (RX_COHERENT_HIGH_INT_MASK & (_n)) /* QDMA_CSR_INT_ENABLE5 */ #define TX31_COHERENT_INT_MASK BIT(31) @@ -554,19 +667,19 @@ #define TX9_COHERENT_INT_MASK BIT(9) #define TX8_COHERENT_INT_MASK BIT(8) -#define INT_IDX4_MASK \ - (TX8_COHERENT_INT_MASK | TX9_COHERENT_INT_MASK | \ - TX10_COHERENT_INT_MASK | TX11_COHERENT_INT_MASK | \ - TX12_COHERENT_INT_MASK | TX13_COHERENT_INT_MASK | \ - TX14_COHERENT_INT_MASK | TX15_COHERENT_INT_MASK | \ - TX16_COHERENT_INT_MASK | TX17_COHERENT_INT_MASK | \ - TX18_COHERENT_INT_MASK | TX19_COHERENT_INT_MASK | \ - TX20_COHERENT_INT_MASK | TX21_COHERENT_INT_MASK | \ - TX22_COHERENT_INT_MASK | TX23_COHERENT_INT_MASK | \ - TX24_COHERENT_INT_MASK | TX25_COHERENT_INT_MASK | \ - TX26_COHERENT_INT_MASK | TX27_COHERENT_INT_MASK | \ - TX28_COHERENT_INT_MASK | TX29_COHERENT_INT_MASK | \ - TX30_COHERENT_INT_MASK | TX31_COHERENT_INT_MASK) +#define TX_COHERENT_HIGH_INT_MASK \ + (TX31_COHERENT_INT_MASK | TX30_COHERENT_INT_MASK | \ + TX29_COHERENT_INT_MASK | TX28_COHERENT_INT_MASK | \ + TX27_COHERENT_INT_MASK | TX26_COHERENT_INT_MASK | \ + TX25_COHERENT_INT_MASK | TX24_COHERENT_INT_MASK | \ + TX23_COHERENT_INT_MASK | TX22_COHERENT_INT_MASK | \ + TX21_COHERENT_INT_MASK | TX20_COHERENT_INT_MASK | \ + TX19_COHERENT_INT_MASK | TX18_COHERENT_INT_MASK | \ + TX17_COHERENT_INT_MASK | TX16_COHERENT_INT_MASK | \ + TX15_COHERENT_INT_MASK | TX14_COHERENT_INT_MASK | \ + TX13_COHERENT_INT_MASK | TX12_COHERENT_INT_MASK | \ + TX11_COHERENT_INT_MASK | TX10_COHERENT_INT_MASK | \ + TX9_COHERENT_INT_MASK | TX8_COHERENT_INT_MASK) #define REG_TX_IRQ_BASE(_n) ((_n) ? 0x0048 : 0x0050) @@ -691,6 +804,12 @@ #define REG_TRTCM_DATA_LOW(_n) ((_n) + 0x8) #define REG_TRTCM_DATA_HIGH(_n) ((_n) + 0xc) +#define RATE_LIMIT_PARAM_RW_MASK BIT(31) +#define RATE_LIMIT_PARAM_RW_DONE_MASK BIT(30) +#define RATE_LIMIT_PARAM_TYPE_MASK GENMASK(29, 28) +#define RATE_LIMIT_METER_GROUP_MASK GENMASK(27, 26) +#define RATE_LIMIT_PARAM_INDEX_MASK GENMASK(23, 16) + #define REG_TXWRR_MODE_CFG 0x1020 #define TWRR_WEIGHT_SCALE_MASK BIT(31) #define TWRR_WEIGHT_BASE_MASK BIT(3) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 897720fdf5d8..7d9ee9867a40 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1777,7 +1777,7 @@ static void ena_init_napi_in_range(struct ena_adapter *adapter, if (ENA_IS_XDP_INDEX(adapter, i)) napi_handler = ena_xdp_io_poll; - netif_napi_add(adapter->netdev, &napi->napi, napi_handler); + netif_napi_add_config(adapter->netdev, &napi->napi, napi_handler, i); if (!ENA_IS_XDP_INDEX(adapter, i)) napi->rx_ring = rx_ring; diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 506f682d15c1..097bb092bdb8 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -225,7 +225,7 @@ int pdsc_adminq_post(struct pdsc *pdsc, union pds_core_adminq_comp *comp, bool fast_poll) { - unsigned long poll_interval = 1; + unsigned long poll_interval = 200; unsigned long poll_jiffies; unsigned long time_limit; unsigned long time_start; @@ -252,7 +252,7 @@ int pdsc_adminq_post(struct pdsc *pdsc, time_limit = time_start + HZ * pdsc->devcmd_timeout; do { /* Timeslice the actual wait to catch IO errors etc early */ - poll_jiffies = msecs_to_jiffies(poll_interval); + poll_jiffies = usecs_to_jiffies(poll_interval); remaining = wait_for_completion_timeout(wc, poll_jiffies); if (remaining) break; diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 9512aa4083f0..076dfe2910c7 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -402,6 +402,7 @@ err_out_uninit: static struct pdsc_viftype pdsc_viftype_defaults[] = { [PDS_DEV_TYPE_FWCTL] = { .name = PDS_DEV_TYPE_FWCTL_STR, + .enabled = true, .vif_id = PDS_DEV_TYPE_FWCTL, .dl_id = -1 }, [PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR, @@ -414,7 +415,8 @@ static int pdsc_viftypes_init(struct pdsc *pdsc) { enum pds_core_vif_types vt; - pdsc->viftype_status = kzalloc(sizeof(pdsc_viftype_defaults), + pdsc->viftype_status = kcalloc(ARRAY_SIZE(pdsc_viftype_defaults), + sizeof(*pdsc->viftype_status), GFP_KERNEL); if (!pdsc->viftype_status) return -ENOMEM; @@ -431,9 +433,6 @@ static int pdsc_viftypes_init(struct pdsc *pdsc) /* See what the Core device has for support */ vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]); - if (vt == PDS_DEV_TYPE_FWCTL) - pdsc->viftype_status[vt].enabled = true; - dev_dbg(pdsc->dev, "VIF %s is %ssupported\n", pdsc->viftype_status[vt].name, vt_support ? "" : "not "); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 3b70f6737633..e1296cbf4ff3 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #ifndef __XGBE_COMMON_H__ @@ -900,6 +791,11 @@ #define PCS_V2_RV_WINDOW_SELECT 0x1064 #define PCS_V2_YC_WINDOW_DEF 0x18060 #define PCS_V2_YC_WINDOW_SELECT 0x18064 +#define PCS_V3_RN_WINDOW_DEF 0xf8078 +#define PCS_V3_RN_WINDOW_SELECT 0xf807c + +#define PCS_RN_SMN_BASE_ADDR 0x11e00000 +#define PCS_RN_PORT_ADDR_SIZE 0x100000 /* PCS register entry bit positions and sizes */ #define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c index c68ace804e37..1474df5544fa 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/netdevice.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index b35808d3d07f..d9157c4acde9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/debugfs.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index d41b58fad37b..7c8a19988a52 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include "xgbe.h" diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f1b0fb02b3cd..466b5f6e5578 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/phy.h> @@ -120,9 +11,11 @@ #include <linux/bitrev.h> #include <linux/crc32.h> #include <linux/crc32poly.h> +#include <linux/pci.h> #include "xgbe.h" #include "xgbe-common.h" +#include "xgbe-smn.h" static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata) { @@ -1162,18 +1055,19 @@ static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) return 0; } -static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, - int mmd_reg) +static unsigned int xgbe_get_mmd_address(struct xgbe_prv_data *pdata, + int mmd_reg) { - unsigned long flags; - unsigned int mmd_address, index, offset; - int mmd_data; - - if (mmd_reg & XGBE_ADDR_C45) - mmd_address = mmd_reg & ~XGBE_ADDR_C45; - else - mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + return (mmd_reg & XGBE_ADDR_C45) ? + mmd_reg & ~XGBE_ADDR_C45 : + (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); +} +static void xgbe_get_pcs_index_and_offset(struct xgbe_prv_data *pdata, + unsigned int mmd_address, + unsigned int *index, + unsigned int *offset) +{ /* The PCS registers are accessed using mmio. The underlying * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two @@ -1184,8 +1078,98 @@ static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, * offset 1 bit and reading 16 bits of data. */ mmd_address <<= 1; - index = mmd_address & ~pdata->xpcs_window_mask; - offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + *index = mmd_address & ~pdata->xpcs_window_mask; + *offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); +} + +static int xgbe_read_mmd_regs_v3(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + unsigned int mmd_address, index, offset; + u32 smn_address; + int mmd_data; + int ret; + + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); + + xgbe_get_pcs_index_and_offset(pdata, mmd_address, &index, &offset); + + smn_address = pdata->smn_base + pdata->xpcs_window_sel_reg; + ret = amd_smn_write(0, smn_address, index); + if (ret) + return ret; + + ret = amd_smn_read(0, pdata->smn_base + offset, &mmd_data); + if (ret) + return ret; + + mmd_data = (offset % 4) ? FIELD_GET(XGBE_GEN_HI_MASK, mmd_data) : + FIELD_GET(XGBE_GEN_LO_MASK, mmd_data); + + return mmd_data; +} + +static void xgbe_write_mmd_regs_v3(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) +{ + unsigned int pci_mmd_data, hi_mask, lo_mask; + unsigned int mmd_address, index, offset; + struct pci_dev *dev; + u32 smn_address; + int ret; + + dev = pdata->pcidev; + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); + + xgbe_get_pcs_index_and_offset(pdata, mmd_address, &index, &offset); + + smn_address = pdata->smn_base + pdata->xpcs_window_sel_reg; + ret = amd_smn_write(0, smn_address, index); + if (ret) { + pci_err(dev, "Failed to write data 0x%x\n", index); + return; + } + + ret = amd_smn_read(0, pdata->smn_base + offset, &pci_mmd_data); + if (ret) { + pci_err(dev, "Failed to read data\n"); + return; + } + + if (offset % 4) { + hi_mask = FIELD_PREP(XGBE_GEN_HI_MASK, mmd_data); + lo_mask = FIELD_GET(XGBE_GEN_LO_MASK, pci_mmd_data); + } else { + hi_mask = FIELD_PREP(XGBE_GEN_HI_MASK, + FIELD_GET(XGBE_GEN_HI_MASK, pci_mmd_data)); + lo_mask = FIELD_GET(XGBE_GEN_LO_MASK, mmd_data); + } + + pci_mmd_data = hi_mask | lo_mask; + + ret = amd_smn_write(0, smn_address, index); + if (ret) { + pci_err(dev, "Failed to write data 0x%x\n", index); + return; + } + + ret = amd_smn_write(0, (pdata->smn_base + offset), pci_mmd_data); + if (ret) { + pci_err(dev, "Failed to write data 0x%x\n", pci_mmd_data); + return; + } +} + +static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + unsigned int mmd_address, index, offset; + unsigned long flags; + int mmd_data; + + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); + + xgbe_get_pcs_index_and_offset(pdata, mmd_address, &index, &offset); spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); @@ -1201,23 +1185,9 @@ static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, unsigned long flags; unsigned int mmd_address, index, offset; - if (mmd_reg & XGBE_ADDR_C45) - mmd_address = mmd_reg & ~XGBE_ADDR_C45; - else - mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); - /* The PCS registers are accessed using mmio. The underlying - * management interface uses indirect addressing to access the MMD - * register sets. This requires accessing of the PCS register in two - * phases, an address phase and a data phase. - * - * The mmio interface is based on 16-bit offsets and values. All - * register offsets must therefore be adjusted by left shifting the - * offset 1 bit and writing 16 bits of data. - */ - mmd_address <<= 1; - index = mmd_address & ~pdata->xpcs_window_mask; - offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + xgbe_get_pcs_index_and_offset(pdata, mmd_address, &index, &offset); spin_lock_irqsave(&pdata->xpcs_lock, flags); XPCS32_IOWRITE(pdata, pdata->xpcs_window_sel_reg, index); @@ -1232,10 +1202,7 @@ static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, unsigned int mmd_address; int mmd_data; - if (mmd_reg & XGBE_ADDR_C45) - mmd_address = mmd_reg & ~XGBE_ADDR_C45; - else - mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); /* The PCS registers are accessed using mmio. The underlying APB3 * management interface uses indirect addressing to access the MMD @@ -1260,10 +1227,7 @@ static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, unsigned int mmd_address; unsigned long flags; - if (mmd_reg & XGBE_ADDR_C45) - mmd_address = mmd_reg & ~XGBE_ADDR_C45; - else - mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + mmd_address = xgbe_get_mmd_address(pdata, mmd_reg); /* The PCS registers are accessed using mmio. The underlying APB3 * management interface uses indirect addressing to access the MMD @@ -1290,6 +1254,9 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, case XGBE_XPCS_ACCESS_V2: default: return xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg); + + case XGBE_XPCS_ACCESS_V3: + return xgbe_read_mmd_regs_v3(pdata, prtad, mmd_reg); } } @@ -1300,6 +1267,9 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, case XGBE_XPCS_ACCESS_V1: return xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data); + case XGBE_XPCS_ACCESS_V3: + return xgbe_write_mmd_regs_v3(pdata, prtad, mmd_reg, mmd_data); + case XGBE_XPCS_ACCESS_V2: default: return xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 8e09ad8fa022..76c328721fcd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 4431ab1c18b3..be0d2c7d08dc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/spinlock.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index 7a833894f52a..d40011e8ddf2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 0e8698928e4d..4ebdd123c435 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 07f4f3418d01..71449edbb76d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/interrupt.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index c636999a6a84..097ec5e4f261 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -1,123 +1,15 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> #include <linux/device.h> #include <linux/pci.h> #include <linux/log2.h> +#include "xgbe-smn.h" #include "xgbe.h" #include "xgbe-common.h" @@ -207,14 +99,14 @@ out: static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct xgbe_prv_data *pdata; - struct device *dev = &pdev->dev; void __iomem * const *iomap_table; - struct pci_dev *rdev; + unsigned int port_addr_size, reg; + struct device *dev = &pdev->dev; + struct xgbe_prv_data *pdata; unsigned int ma_lo, ma_hi; - unsigned int reg; - int bar_mask; - int ret; + struct pci_dev *rdev; + int bar_mask, ret; + u32 address; pdata = xgbe_alloc_pdata(dev); if (IS_ERR(pdata)) { @@ -274,20 +166,31 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Set the PCS indirect addressing definition registers */ rdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); - if (rdev && - (rdev->vendor == PCI_VENDOR_ID_AMD) && (rdev->device == 0x15d0)) { - pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; - pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; - } else if (rdev && (rdev->vendor == PCI_VENDOR_ID_AMD) && - (rdev->device == 0x14b5)) { - pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF; - pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT; - - /* Yellow Carp devices do not need cdr workaround */ - pdata->vdata->an_cdr_workaround = 0; - - /* Yellow Carp devices do not need rrc */ - pdata->vdata->enable_rrc = 0; + if (rdev && rdev->vendor == PCI_VENDOR_ID_AMD) { + switch (rdev->device) { + case XGBE_RV_PCI_DEVICE_ID: + pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; + break; + case XGBE_YC_PCI_DEVICE_ID: + pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT; + + /* Yellow Carp devices do not need cdr workaround */ + pdata->vdata->an_cdr_workaround = 0; + + /* Yellow Carp devices do not need rrc */ + pdata->vdata->enable_rrc = 0; + break; + case XGBE_RN_PCI_DEVICE_ID: + pdata->xpcs_window_def_reg = PCS_V3_RN_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V3_RN_WINDOW_SELECT; + break; + default: + pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; + break; + } } else { pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; @@ -295,7 +198,22 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_dev_put(rdev); /* Configure the PCS indirect addressing support */ - reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg); + if (pdata->vdata->xpcs_access == XGBE_XPCS_ACCESS_V3) { + reg = XP_IOREAD(pdata, XP_PROP_0); + port_addr_size = PCS_RN_PORT_ADDR_SIZE * + XP_GET_BITS(reg, XP_PROP_0, PORT_ID); + pdata->smn_base = PCS_RN_SMN_BASE_ADDR + port_addr_size; + + address = pdata->smn_base + (pdata->xpcs_window_def_reg); + ret = amd_smn_read(0, address, ®); + if (ret) { + pci_err(pdata->pcidev, "Failed to read data\n"); + goto err_pci_enable; + } + } else { + reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg); + } + pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); pdata->xpcs_window <<= 6; pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); @@ -473,6 +391,22 @@ static int __maybe_unused xgbe_pci_resume(struct device *dev) return ret; } +static struct xgbe_version_data xgbe_v3 = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, + .xpcs_access = XGBE_XPCS_ACCESS_V3, + .mmc_64bit = 1, + .tx_max_fifo_size = 65536, + .rx_max_fifo_size = 65536, + .tx_tstamp_workaround = 1, + .ecc_support = 1, + .i2c_support = 1, + .irq_reissue_support = 1, + .tx_desc_prefetch = 5, + .rx_desc_prefetch = 5, + .an_cdr_workaround = 0, + .enable_rrc = 0, +}; + static struct xgbe_version_data xgbe_v2a = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, @@ -510,6 +444,8 @@ static const struct pci_device_id xgbe_pci_table[] = { .driver_data = (kernel_ulong_t)&xgbe_v2a }, { PCI_VDEVICE(AMD, 0x1459), .driver_data = (kernel_ulong_t)&xgbe_v2b }, + { PCI_VDEVICE(AMD, 0x1641), + .driver_data = (kernel_ulong_t)&xgbe_v3 }, /* Last entry must be zero */ { 0, } }; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c index d16eae415f72..2e6b8ffe785c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 268399dfcf22..7a4dfa4e19c7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 4365bd62942c..47d53e59ccf6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/module.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c index 7051bd7cf6dc..978c4dd01fa0 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #include <linux/clk.h> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-smn.h b/drivers/net/ethernet/amd/xgbe/xgbe-smn.h new file mode 100644 index 000000000000..3fd03d39c18a --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-smn.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) */ +/* + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved + * + * Author: Raju Rangoju <Raju.Rangoju@amd.com> + */ + +#ifndef __SMN_H__ +#define __SMN_H__ + +#ifdef CONFIG_AMD_NB + +#include <asm/amd_nb.h> + +#else + +static inline int amd_smn_write(u16 node, u32 address, u32 value) +{ + return -ENODEV; +} + +static inline int amd_smn_read(u16 node, u32 address, u32 *value) +{ + return -ENODEV; +} + +#endif +#endif diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ed5d43c16d0e..6359bb87dc13 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1,117 +1,8 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) /* - * AMD 10Gb Ethernet driver - * - * This file is available to you under your choice of the following two - * licenses: - * - * License 1: GPLv2 - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * - * This file is free software; you may copy, redistribute and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or (at - * your option) any later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - * - * - * License 2: Modified BSD - * - * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Advanced Micro Devices, Inc. nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This file incorporates work covered by the following copyright and - * permission notice: - * The Synopsys DWC ETHER XGMAC Software Driver and documentation - * (hereinafter "Software") is an unsupported proprietary work of Synopsys, - * Inc. unless otherwise expressly agreed to in writing between Synopsys - * and you. - * - * The Software IS NOT an item of Licensed Software or Licensed Product - * under any End User Software License Agreement or Agreement for Licensed - * Product with Synopsys or any supplement thereto. Permission is hereby - * granted, free of charge, to any person obtaining a copy of this software - * annotated with this license and the Software, to deal in the Software - * without restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" - * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved */ #ifndef __XGBE_H__ @@ -347,6 +238,15 @@ (_src)->link_modes._sname, \ __ETHTOOL_LINK_MODE_MASK_NBITS) +/* XGBE PCI device id */ +#define XGBE_RV_PCI_DEVICE_ID 0x15d0 +#define XGBE_YC_PCI_DEVICE_ID 0x14b5 +#define XGBE_RN_PCI_DEVICE_ID 0x1630 + + /* Generic low and high masks */ +#define XGBE_GEN_HI_MASK GENMASK(31, 16) +#define XGBE_GEN_LO_MASK GENMASK(15, 0) + struct xgbe_prv_data; struct xgbe_packet_data { @@ -565,6 +465,7 @@ enum xgbe_speed { enum xgbe_xpcs_access { XGBE_XPCS_ACCESS_V1 = 0, XGBE_XPCS_ACCESS_V2, + XGBE_XPCS_ACCESS_V3, }; enum xgbe_an_mode { @@ -1060,6 +961,7 @@ struct xgbe_prv_data { struct device *dev; struct platform_device *phy_platdev; struct device *phy_dev; + unsigned int smn_base; /* Version related data */ struct xgbe_version_data *vdata; diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index b9fdd61f1fdb..b50052c25a91 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -20,7 +20,6 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/crc32.h> -#include <linux/crc32poly.h> #include <linux/bitrev.h> #include <linux/ethtool.h> #include <linux/slab.h> @@ -796,59 +795,6 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id) } #ifndef SUNHME_MULTICAST -/* Real fast bit-reversal algorithm, 6-bit values */ -static int reverse6[64] = { - 0x0,0x20,0x10,0x30,0x8,0x28,0x18,0x38, - 0x4,0x24,0x14,0x34,0xc,0x2c,0x1c,0x3c, - 0x2,0x22,0x12,0x32,0xa,0x2a,0x1a,0x3a, - 0x6,0x26,0x16,0x36,0xe,0x2e,0x1e,0x3e, - 0x1,0x21,0x11,0x31,0x9,0x29,0x19,0x39, - 0x5,0x25,0x15,0x35,0xd,0x2d,0x1d,0x3d, - 0x3,0x23,0x13,0x33,0xb,0x2b,0x1b,0x3b, - 0x7,0x27,0x17,0x37,0xf,0x2f,0x1f,0x3f -}; - -static unsigned int -crc416(unsigned int curval, unsigned short nxtval) -{ - unsigned int counter, cur = curval, next = nxtval; - int high_crc_set, low_data_set; - - /* Swap bytes */ - next = ((next & 0x00FF) << 8) | (next >> 8); - - /* Compute bit-by-bit */ - for (counter = 0; counter < 16; ++counter) { - /* is high CRC bit set? */ - if ((cur & 0x80000000) == 0) high_crc_set = 0; - else high_crc_set = 1; - - cur = cur << 1; - - if ((next & 0x0001) == 0) low_data_set = 0; - else low_data_set = 1; - - next = next >> 1; - - /* do the XOR */ - if (high_crc_set ^ low_data_set) cur = cur ^ CRC32_POLY_BE; - } - return cur; -} - -static unsigned int -bmac_crc(unsigned short *address) -{ - unsigned int newcrc; - - XXDEBUG(("bmac_crc: addr=%#04x, %#04x, %#04x\n", *address, address[1], address[2])); - newcrc = crc416(0xffffffff, *address); /* address bits 47 - 32 */ - newcrc = crc416(newcrc, address[1]); /* address bits 31 - 16 */ - newcrc = crc416(newcrc, address[2]); /* address bits 15 - 0 */ - - return(newcrc); -} - /* * Add requested mcast addr to BMac's hash table filter. * @@ -861,8 +807,7 @@ bmac_addhash(struct bmac_data *bp, unsigned char *addr) unsigned short mask; if (!(*addr)) return; - crc = bmac_crc((unsigned short *)addr) & 0x3f; /* Big-endian alert! */ - crc = reverse6[crc]; /* Hyperfast bit-reversing algorithm */ + crc = crc32(~0, addr, ETH_ALEN) >> 26; if (bp->hash_use_count[crc]++) return; /* This bit is already set */ mask = crc % 16; mask = (unsigned char)1 << mask; @@ -876,8 +821,7 @@ bmac_removehash(struct bmac_data *bp, unsigned char *addr) unsigned char mask; /* Now, delete the address from the filter copy, as indicated */ - crc = bmac_crc((unsigned short *)addr) & 0x3f; /* Big-endian alert! */ - crc = reverse6[crc]; /* Hyperfast bit-reversing algorithm */ + crc = crc32(~0, addr, ETH_ALEN) >> 26; if (bp->hash_use_count[crc] == 0) return; /* That bit wasn't in use! */ if (--bp->hash_use_count[crc]) return; /* That bit is still in use */ mask = crc % 16; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index c1d1673c5749..b565189e5913 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -123,7 +123,6 @@ static netdev_tx_t aq_ndev_start_xmit(struct sk_buff *skb, struct net_device *nd } #endif - skb_tx_timestamp(skb); return aq_nic_xmit(aq_nic, skb); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index bf3aa46887a1..e71cd10e4e1f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -898,6 +898,8 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) frags = aq_nic_map_skb(self, skb, ring); + skb_tx_timestamp(skb); + if (likely(frags)) { err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, ring, frags); diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 1bd4313215d7..636520bb4b8c 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -123,6 +123,7 @@ config TIGON3 tristate "Broadcom Tigon3 support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select CRC32 select PHYLIB help This driver supports Broadcom Tigon3 based gigabit Ethernet cards. diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index a68fab1b05f0..fd35f4b4dc50 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -141,7 +141,7 @@ void bcmasp_flush_rx_port(struct bcmasp_intf *intf) return; } - rx_ctrl_core_wl(priv, mask, priv->hw_info->rx_ctrl_flush); + rx_ctrl_core_wl(priv, mask, ASP_RX_CTRL_FLUSH); } static void bcmasp_netfilt_hw_en_wake(struct bcmasp_priv *priv, @@ -518,7 +518,7 @@ void bcmasp_netfilt_suspend(struct bcmasp_intf *intf) int ret, i; /* Write all filters to HW */ - for (i = 0; i < NUM_NET_FILTERS; i++) { + for (i = 0; i < priv->num_net_filters; i++) { /* If the filter does not match the port, skip programming. */ if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) @@ -551,7 +551,7 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, struct bcmasp_priv *priv = intf->parent; int j = 0, i; - for (i = 0; i < NUM_NET_FILTERS; i++) { + for (i = 0; i < priv->num_net_filters; i++) { if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -577,7 +577,7 @@ int bcmasp_netfilt_get_active(struct bcmasp_intf *intf) struct bcmasp_priv *priv = intf->parent; int cnt = 0, i; - for (i = 0; i < NUM_NET_FILTERS; i++) { + for (i = 0; i < priv->num_net_filters; i++) { if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -602,7 +602,7 @@ bool bcmasp_netfilt_check_dup(struct bcmasp_intf *intf, size_t fs_size = 0; int i; - for (i = 0; i < NUM_NET_FILTERS; i++) { + for (i = 0; i < priv->num_net_filters; i++) { if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -670,7 +670,7 @@ struct bcmasp_net_filter *bcmasp_netfilt_get_init(struct bcmasp_intf *intf, int i, open_index = -1; /* Check whether we exceed the filter table capacity */ - if (loc != RX_CLS_LOC_ANY && loc >= NUM_NET_FILTERS) + if (loc != RX_CLS_LOC_ANY && loc >= priv->num_net_filters) return ERR_PTR(-EINVAL); /* If the filter location is busy (already claimed) and we are initializing @@ -686,7 +686,7 @@ struct bcmasp_net_filter *bcmasp_netfilt_get_init(struct bcmasp_intf *intf, /* Initialize the loop index based on the desired location or from 0 */ i = loc == RX_CLS_LOC_ANY ? 0 : loc; - for ( ; i < NUM_NET_FILTERS; i++) { + for ( ; i < priv->num_net_filters; i++) { /* Found matching network filter */ if (!init && priv->net_filters[i].claimed && @@ -779,7 +779,7 @@ static void bcmasp_en_mda_filter(struct bcmasp_intf *intf, bool en, priv->mda_filters[i].en = en; priv->mda_filters[i].port = intf->port; - rx_filter_core_wl(priv, ((intf->channel + 8) | + rx_filter_core_wl(priv, ((intf->channel + priv->tx_chan_offset) | (en << ASP_RX_FILTER_MDA_CFG_EN_SHIFT) | ASP_RX_FILTER_MDA_CFG_UMC_SEL(intf->port)), ASP_RX_FILTER_MDA_CFG(i)); @@ -865,7 +865,7 @@ void bcmasp_disable_all_filters(struct bcmasp_intf *intf) res_count = bcmasp_total_res_mda_cnt(intf->parent); /* Disable all filters held by this port */ - for (i = res_count; i < NUM_MDA_FILTERS; i++) { + for (i = res_count; i < priv->num_mda_filters; i++) { if (priv->mda_filters[i].en && priv->mda_filters[i].port == intf->port) bcmasp_en_mda_filter(intf, 0, i); @@ -909,7 +909,7 @@ int bcmasp_set_en_mda_filter(struct bcmasp_intf *intf, unsigned char *addr, res_count = bcmasp_total_res_mda_cnt(intf->parent); - for (i = res_count; i < NUM_MDA_FILTERS; i++) { + for (i = res_count; i < priv->num_mda_filters; i++) { /* If filter not enabled or belongs to another port skip */ if (!priv->mda_filters[i].en || priv->mda_filters[i].port != intf->port) @@ -924,7 +924,7 @@ int bcmasp_set_en_mda_filter(struct bcmasp_intf *intf, unsigned char *addr, } /* Create new filter if possible */ - for (i = res_count; i < NUM_MDA_FILTERS; i++) { + for (i = res_count; i < priv->num_mda_filters; i++) { if (priv->mda_filters[i].en) continue; @@ -944,12 +944,12 @@ static void bcmasp_core_init_filters(struct bcmasp_priv *priv) /* Disable all filters and reset software view since the HW * can lose context while in deep sleep suspend states */ - for (i = 0; i < NUM_MDA_FILTERS; i++) { + for (i = 0; i < priv->num_mda_filters; i++) { rx_filter_core_wl(priv, 0x0, ASP_RX_FILTER_MDA_CFG(i)); priv->mda_filters[i].en = 0; } - for (i = 0; i < NUM_NET_FILTERS; i++) + for (i = 0; i < priv->num_net_filters; i++) rx_filter_core_wl(priv, 0x0, ASP_RX_FILTER_NET_CFG(i)); /* Top level filter enable bit should be enabled at all times, set @@ -966,18 +966,8 @@ static void bcmasp_core_init_filters(struct bcmasp_priv *priv) /* ASP core initialization */ static void bcmasp_core_init(struct bcmasp_priv *priv) { - tx_analytics_core_wl(priv, 0x0, ASP_TX_ANALYTICS_CTRL); - rx_analytics_core_wl(priv, 0x4, ASP_RX_ANALYTICS_CTRL); - - rx_edpkt_core_wl(priv, (ASP_EDPKT_HDR_SZ_128 << ASP_EDPKT_HDR_SZ_SHIFT), - ASP_EDPKT_HDR_CFG); - rx_edpkt_core_wl(priv, - (ASP_EDPKT_ENDI_BT_SWP_WD << ASP_EDPKT_ENDI_DESC_SHIFT), - ASP_EDPKT_ENDI); - rx_edpkt_core_wl(priv, 0x1b, ASP_EDPKT_BURST_BUF_PSCAL_TOUT); rx_edpkt_core_wl(priv, 0x3e8, ASP_EDPKT_BURST_BUF_WRITE_TOUT); - rx_edpkt_core_wl(priv, 0x3e8, ASP_EDPKT_BURST_BUF_READ_TOUT); rx_edpkt_core_wl(priv, ASP_EDPKT_ENABLE_EN, ASP_EDPKT_ENABLE); @@ -1020,6 +1010,18 @@ static void bcmasp_core_clock_select_one(struct bcmasp_priv *priv, bool slow) ctrl_core_wl(priv, reg, ASP_CTRL_CORE_CLOCK_SELECT); } +static void bcmasp_core_clock_select_one_ctrl2(struct bcmasp_priv *priv, bool slow) +{ + u32 reg; + + reg = ctrl2_core_rl(priv, ASP_CTRL2_CORE_CLOCK_SELECT); + if (slow) + reg &= ~ASP_CTRL2_CORE_CLOCK_SELECT_MAIN; + else + reg |= ASP_CTRL2_CORE_CLOCK_SELECT_MAIN; + ctrl2_core_wl(priv, reg, ASP_CTRL2_CORE_CLOCK_SELECT); +} + static void bcmasp_core_clock_set_ll(struct bcmasp_priv *priv, u32 clr, u32 set) { u32 reg; @@ -1108,7 +1110,7 @@ static int bcmasp_get_and_request_irq(struct bcmasp_priv *priv, int i) return irq; } -static void bcmasp_init_wol_shared(struct bcmasp_priv *priv) +static void bcmasp_init_wol(struct bcmasp_priv *priv) { struct platform_device *pdev = priv->pdev; struct device *dev = &pdev->dev; @@ -1125,7 +1127,7 @@ static void bcmasp_init_wol_shared(struct bcmasp_priv *priv) device_set_wakeup_capable(&pdev->dev, 1); } -static void bcmasp_enable_wol_shared(struct bcmasp_intf *intf, bool en) +void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en) { struct bcmasp_priv *priv = intf->parent; struct device *dev = &priv->pdev->dev; @@ -1154,54 +1156,12 @@ static void bcmasp_enable_wol_shared(struct bcmasp_intf *intf, bool en) } } -static void bcmasp_wol_irq_destroy_shared(struct bcmasp_priv *priv) +static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv) { if (priv->wol_irq > 0) free_irq(priv->wol_irq, priv); } -static void bcmasp_init_wol_per_intf(struct bcmasp_priv *priv) -{ - struct platform_device *pdev = priv->pdev; - struct device *dev = &pdev->dev; - struct bcmasp_intf *intf; - int irq; - - list_for_each_entry(intf, &priv->intfs, list) { - irq = bcmasp_get_and_request_irq(priv, intf->port + 1); - if (irq < 0) { - dev_warn(dev, "Failed to init WoL irq(port %d): %d\n", - intf->port, irq); - continue; - } - - intf->wol_irq = irq; - intf->wol_irq_enabled = false; - device_set_wakeup_capable(&pdev->dev, 1); - } -} - -static void bcmasp_enable_wol_per_intf(struct bcmasp_intf *intf, bool en) -{ - struct device *dev = &intf->parent->pdev->dev; - - if (en ^ intf->wol_irq_enabled) - irq_set_irq_wake(intf->wol_irq, en); - - intf->wol_irq_enabled = en; - device_set_wakeup_enable(dev, en); -} - -static void bcmasp_wol_irq_destroy_per_intf(struct bcmasp_priv *priv) -{ - struct bcmasp_intf *intf; - - list_for_each_entry(intf, &priv->intfs, list) { - if (intf->wol_irq > 0) - free_irq(intf->wol_irq, priv); - } -} - static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) { u32 reg, phy_lpi_overwrite; @@ -1220,70 +1180,53 @@ static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) usleep_range(50, 100); } -static struct bcmasp_hw_info v20_hw_info = { - .rx_ctrl_flush = ASP_RX_CTRL_FLUSH, - .umac2fb = UMAC2FB_OFFSET, - .rx_ctrl_fb_out_frame_count = ASP_RX_CTRL_FB_OUT_FRAME_COUNT, - .rx_ctrl_fb_filt_out_frame_count = ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT, - .rx_ctrl_fb_rx_fifo_depth = ASP_RX_CTRL_FB_RX_FIFO_DEPTH, -}; - -static const struct bcmasp_plat_data v20_plat_data = { - .init_wol = bcmasp_init_wol_per_intf, - .enable_wol = bcmasp_enable_wol_per_intf, - .destroy_wol = bcmasp_wol_irq_destroy_per_intf, - .core_clock_select = bcmasp_core_clock_select_one, - .hw_info = &v20_hw_info, -}; - -static struct bcmasp_hw_info v21_hw_info = { - .rx_ctrl_flush = ASP_RX_CTRL_FLUSH_2_1, - .umac2fb = UMAC2FB_OFFSET_2_1, - .rx_ctrl_fb_out_frame_count = ASP_RX_CTRL_FB_OUT_FRAME_COUNT_2_1, - .rx_ctrl_fb_filt_out_frame_count = - ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT_2_1, - .rx_ctrl_fb_rx_fifo_depth = ASP_RX_CTRL_FB_RX_FIFO_DEPTH_2_1, -}; - static const struct bcmasp_plat_data v21_plat_data = { - .init_wol = bcmasp_init_wol_shared, - .enable_wol = bcmasp_enable_wol_shared, - .destroy_wol = bcmasp_wol_irq_destroy_shared, .core_clock_select = bcmasp_core_clock_select_one, - .hw_info = &v21_hw_info, + .num_mda_filters = 32, + .num_net_filters = 32, + .tx_chan_offset = 8, + .rx_ctrl_offset = 0x0, }; static const struct bcmasp_plat_data v22_plat_data = { - .init_wol = bcmasp_init_wol_shared, - .enable_wol = bcmasp_enable_wol_shared, - .destroy_wol = bcmasp_wol_irq_destroy_shared, .core_clock_select = bcmasp_core_clock_select_many, - .hw_info = &v21_hw_info, .eee_fixup = bcmasp_eee_fixup, + .num_mda_filters = 32, + .num_net_filters = 32, + .tx_chan_offset = 8, + .rx_ctrl_offset = 0x0, +}; + +static const struct bcmasp_plat_data v30_plat_data = { + .core_clock_select = bcmasp_core_clock_select_one_ctrl2, + .num_mda_filters = 20, + .num_net_filters = 16, + .tx_chan_offset = 0, + .rx_ctrl_offset = 0x10000, }; static void bcmasp_set_pdata(struct bcmasp_priv *priv, const struct bcmasp_plat_data *pdata) { - priv->init_wol = pdata->init_wol; - priv->enable_wol = pdata->enable_wol; - priv->destroy_wol = pdata->destroy_wol; priv->core_clock_select = pdata->core_clock_select; priv->eee_fixup = pdata->eee_fixup; - priv->hw_info = pdata->hw_info; + priv->num_mda_filters = pdata->num_mda_filters; + priv->num_net_filters = pdata->num_net_filters; + priv->tx_chan_offset = pdata->tx_chan_offset; + priv->rx_ctrl_offset = pdata->rx_ctrl_offset; } static const struct of_device_id bcmasp_of_match[] = { - { .compatible = "brcm,asp-v2.0", .data = &v20_plat_data }, { .compatible = "brcm,asp-v2.1", .data = &v21_plat_data }, { .compatible = "brcm,asp-v2.2", .data = &v22_plat_data }, + { .compatible = "brcm,asp-v3.0", .data = &v30_plat_data }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, bcmasp_of_match); static const struct of_device_id bcmasp_mdio_of_match[] = { - { .compatible = "brcm,asp-v2.2-mdio", }, { .compatible = "brcm,asp-v2.1-mdio", }, - { .compatible = "brcm,asp-v2.0-mdio", }, + { .compatible = "brcm,asp-v2.2-mdio", }, + { .compatible = "brcm,asp-v3.0-mdio", }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, bcmasp_mdio_of_match); @@ -1365,6 +1308,17 @@ static int bcmasp_probe(struct platform_device *pdev) * how many interfaces come up. */ bcmasp_core_init(priv); + + priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters, + sizeof(*priv->mda_filters), GFP_KERNEL); + if (!priv->mda_filters) + return -ENOMEM; + + priv->net_filters = devm_kcalloc(dev, priv->num_net_filters, + sizeof(*priv->net_filters), GFP_KERNEL); + if (!priv->net_filters) + return -ENOMEM; + bcmasp_core_init_filters(priv); ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); @@ -1387,7 +1341,7 @@ static int bcmasp_probe(struct platform_device *pdev) } /* Check and enable WoL */ - priv->init_wol(priv); + bcmasp_init_wol(priv); /* Drop the clock reference count now and let ndo_open()/ndo_close() * manage it for us from now on. @@ -1404,7 +1358,7 @@ static int bcmasp_probe(struct platform_device *pdev) if (ret) { netdev_err(intf->ndev, "failed to register net_device: %d\n", ret); - priv->destroy_wol(priv); + bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); goto of_put_exit; } @@ -1425,7 +1379,7 @@ static void bcmasp_remove(struct platform_device *pdev) if (!priv) return; - priv->destroy_wol(priv); + bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); } diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h index 8fc75bcedb70..74adfdb50e11 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h @@ -53,22 +53,15 @@ #define ASP_RX_CTRL_FB_0_FRAME_COUNT 0x14 #define ASP_RX_CTRL_FB_1_FRAME_COUNT 0x18 #define ASP_RX_CTRL_FB_8_FRAME_COUNT 0x1c -/* asp2.1 diverges offsets here */ -/* ASP2.0 */ -#define ASP_RX_CTRL_FB_OUT_FRAME_COUNT 0x20 -#define ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT 0x24 -#define ASP_RX_CTRL_FLUSH 0x28 -#define ASP_CTRL_UMAC0_FLUSH_MASK (BIT(0) | BIT(12)) -#define ASP_CTRL_UMAC1_FLUSH_MASK (BIT(1) | BIT(13)) -#define ASP_CTRL_SPB_FLUSH_MASK (BIT(8) | BIT(20)) -#define ASP_RX_CTRL_FB_RX_FIFO_DEPTH 0x30 -/* ASP2.1 */ -#define ASP_RX_CTRL_FB_9_FRAME_COUNT_2_1 0x20 -#define ASP_RX_CTRL_FB_10_FRAME_COUNT_2_1 0x24 -#define ASP_RX_CTRL_FB_OUT_FRAME_COUNT_2_1 0x28 -#define ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT_2_1 0x2c -#define ASP_RX_CTRL_FLUSH_2_1 0x30 -#define ASP_RX_CTRL_FB_RX_FIFO_DEPTH_2_1 0x38 +#define ASP_RX_CTRL_FB_9_FRAME_COUNT 0x20 +#define ASP_RX_CTRL_FB_10_FRAME_COUNT 0x24 +#define ASP_RX_CTRL_FB_OUT_FRAME_COUNT 0x28 +#define ASP_RX_CTRL_FB_FILT_OUT_FRAME_COUNT 0x2c +#define ASP_RX_CTRL_FLUSH 0x30 +#define ASP_CTRL_UMAC0_FLUSH_MASK (BIT(0) | BIT(12)) +#define ASP_CTRL_UMAC1_FLUSH_MASK (BIT(1) | BIT(13)) +#define ASP_CTRL_SPB_FLUSH_MASK (BIT(8) | BIT(20)) +#define ASP_RX_CTRL_FB_RX_FIFO_DEPTH 0x38 #define ASP_RX_FILTER_OFFSET 0x80000 #define ASP_RX_FILTER_BLK_CTRL 0x0 @@ -345,9 +338,6 @@ struct bcmasp_intf { u32 wolopts; u8 sopass[SOPASS_MAX]; - /* Used if per intf wol irq */ - int wol_irq; - unsigned int wol_irq_enabled:1; }; #define NUM_NET_FILTERS 32 @@ -370,21 +360,13 @@ struct bcmasp_mda_filter { u8 mask[ETH_ALEN]; }; -struct bcmasp_hw_info { - u32 rx_ctrl_flush; - u32 umac2fb; - u32 rx_ctrl_fb_out_frame_count; - u32 rx_ctrl_fb_filt_out_frame_count; - u32 rx_ctrl_fb_rx_fifo_depth; -}; - struct bcmasp_plat_data { - void (*init_wol)(struct bcmasp_priv *priv); - void (*enable_wol)(struct bcmasp_intf *intf, bool en); - void (*destroy_wol)(struct bcmasp_priv *priv); void (*core_clock_select)(struct bcmasp_priv *priv, bool slow); void (*eee_fixup)(struct bcmasp_intf *priv, bool en); - struct bcmasp_hw_info *hw_info; + unsigned int num_mda_filters; + unsigned int num_net_filters; + unsigned int tx_chan_offset; + unsigned int rx_ctrl_offset; }; struct bcmasp_priv { @@ -399,18 +381,18 @@ struct bcmasp_priv { int wol_irq; unsigned long wol_irq_enabled_mask; - void (*init_wol)(struct bcmasp_priv *priv); - void (*enable_wol)(struct bcmasp_intf *intf, bool en); - void (*destroy_wol)(struct bcmasp_priv *priv); void (*core_clock_select)(struct bcmasp_priv *priv, bool slow); void (*eee_fixup)(struct bcmasp_intf *intf, bool en); + unsigned int num_mda_filters; + unsigned int num_net_filters; + unsigned int tx_chan_offset; + unsigned int rx_ctrl_offset; void __iomem *base; - struct bcmasp_hw_info *hw_info; struct list_head intfs; - struct bcmasp_mda_filter mda_filters[NUM_MDA_FILTERS]; + struct bcmasp_mda_filter *mda_filters; /* MAC destination address filters lock */ spinlock_t mda_lock; @@ -418,7 +400,7 @@ struct bcmasp_priv { /* Protects accesses to ASP_CTRL_CLOCK_CTRL */ spinlock_t clk_lock; - struct bcmasp_net_filter net_filters[NUM_NET_FILTERS]; + struct bcmasp_net_filter *net_filters; /* Network filter lock */ struct mutex net_lock; @@ -508,8 +490,8 @@ BCMASP_FP_IO_MACRO_Q(rx_edpkt_cfg); #define PKT_OFFLOAD_EPKT_IP(x) ((x) << 21) #define PKT_OFFLOAD_EPKT_TP(x) ((x) << 19) #define PKT_OFFLOAD_EPKT_LEN(x) ((x) << 16) -#define PKT_OFFLOAD_EPKT_CSUM_L3 BIT(15) -#define PKT_OFFLOAD_EPKT_CSUM_L2 BIT(14) +#define PKT_OFFLOAD_EPKT_CSUM_L4 BIT(15) +#define PKT_OFFLOAD_EPKT_CSUM_L3 BIT(14) #define PKT_OFFLOAD_EPKT_ID(x) ((x) << 12) #define PKT_OFFLOAD_EPKT_SEQ(x) ((x) << 10) #define PKT_OFFLOAD_EPKT_TS(x) ((x) << 8) @@ -541,12 +523,27 @@ BCMASP_CORE_IO_MACRO(intr2, ASP_INTR2_OFFSET); BCMASP_CORE_IO_MACRO(wakeup_intr2, ASP_WAKEUP_INTR2_OFFSET); BCMASP_CORE_IO_MACRO(tx_analytics, ASP_TX_ANALYTICS_OFFSET); BCMASP_CORE_IO_MACRO(rx_analytics, ASP_RX_ANALYTICS_OFFSET); -BCMASP_CORE_IO_MACRO(rx_ctrl, ASP_RX_CTRL_OFFSET); BCMASP_CORE_IO_MACRO(rx_filter, ASP_RX_FILTER_OFFSET); BCMASP_CORE_IO_MACRO(rx_edpkt, ASP_EDPKT_OFFSET); BCMASP_CORE_IO_MACRO(ctrl, ASP_CTRL_OFFSET); BCMASP_CORE_IO_MACRO(ctrl2, ASP_CTRL2_OFFSET); +#define BCMASP_CORE_IO_MACRO_OFFSET(name, offset) \ +static inline u32 name##_core_rl(struct bcmasp_priv *priv, \ + u32 off) \ +{ \ + u32 reg = readl_relaxed(priv->base + priv->name##_offset + \ + (offset) + off); \ + return reg; \ +} \ +static inline void name##_core_wl(struct bcmasp_priv *priv, \ + u32 val, u32 off) \ +{ \ + writel_relaxed(val, priv->base + priv->name##_offset + \ + (offset) + off); \ +} +BCMASP_CORE_IO_MACRO_OFFSET(rx_ctrl, ASP_RX_CTRL_OFFSET); + struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv, struct device_node *ndev_dn, int i); @@ -599,4 +596,5 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, void bcmasp_netfilt_suspend(struct bcmasp_intf *intf); +void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en); #endif diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c index a537c121d3e2..4381a4cfd8c6 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c @@ -10,7 +10,6 @@ #include "bcmasp_intf_defs.h" enum bcmasp_stat_type { - BCMASP_STAT_RX_EDPKT, BCMASP_STAT_RX_CTRL, BCMASP_STAT_RX_CTRL_PER_INTF, BCMASP_STAT_SOFT, @@ -33,8 +32,6 @@ struct bcmasp_stats { .reg_offset = offset, \ } -#define STAT_BCMASP_RX_EDPKT(str, offset) \ - STAT_BCMASP_OFFSET(str, BCMASP_STAT_RX_EDPKT, offset) #define STAT_BCMASP_RX_CTRL(str, offset) \ STAT_BCMASP_OFFSET(str, BCMASP_STAT_RX_CTRL, offset) #define STAT_BCMASP_RX_CTRL_PER_INTF(str, offset) \ @@ -42,11 +39,6 @@ struct bcmasp_stats { /* Must match the order of struct bcmasp_mib_counters */ static const struct bcmasp_stats bcmasp_gstrings_stats[] = { - /* EDPKT counters */ - STAT_BCMASP_RX_EDPKT("RX Time Stamp", ASP_EDPKT_RX_TS_COUNTER), - STAT_BCMASP_RX_EDPKT("RX PKT Count", ASP_EDPKT_RX_PKT_CNT), - STAT_BCMASP_RX_EDPKT("RX PKT Buffered", ASP_EDPKT_HDR_EXTR_CNT), - STAT_BCMASP_RX_EDPKT("RX PKT Pushed to DRAM", ASP_EDPKT_HDR_OUT_CNT), /* ASP RX control */ STAT_BCMASP_RX_CTRL_PER_INTF("Frames From Unimac", ASP_RX_CTRL_UMAC_0_FRAME_COUNT), @@ -71,23 +63,6 @@ static const struct bcmasp_stats bcmasp_gstrings_stats[] = { #define BCMASP_STATS_LEN ARRAY_SIZE(bcmasp_gstrings_stats) -static u16 bcmasp_stat_fixup_offset(struct bcmasp_intf *intf, - const struct bcmasp_stats *s) -{ - struct bcmasp_priv *priv = intf->parent; - - if (!strcmp("Frames Out(Buffer)", s->stat_string)) - return priv->hw_info->rx_ctrl_fb_out_frame_count; - - if (!strcmp("Frames Out(Filters)", s->stat_string)) - return priv->hw_info->rx_ctrl_fb_filt_out_frame_count; - - if (!strcmp("RX Buffer FIFO Depth", s->stat_string)) - return priv->hw_info->rx_ctrl_fb_rx_fifo_depth; - - return s->reg_offset; -} - static int bcmasp_get_sset_count(struct net_device *dev, int string_set) { switch (string_set) { @@ -126,13 +101,10 @@ static void bcmasp_update_mib_counters(struct bcmasp_intf *intf) char *p; s = &bcmasp_gstrings_stats[i]; - offset = bcmasp_stat_fixup_offset(intf, s); + offset = s->reg_offset; switch (s->type) { case BCMASP_STAT_SOFT: continue; - case BCMASP_STAT_RX_EDPKT: - val = rx_edpkt_core_rl(intf->parent, offset); - break; case BCMASP_STAT_RX_CTRL: val = rx_ctrl_core_rl(intf->parent, offset); break; @@ -215,7 +187,7 @@ static int bcmasp_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) memcpy(intf->sopass, wol->sopass, sizeof(wol->sopass)); mutex_lock(&priv->wol_lock); - priv->enable_wol(intf, !!intf->wolopts); + bcmasp_enable_wol(intf, !!intf->wolopts); mutex_unlock(&priv->wol_lock); return 0; @@ -289,7 +261,7 @@ static int bcmasp_flow_get(struct bcmasp_intf *intf, struct ethtool_rxnfc *cmd) memcpy(&cmd->fs, &nfilter->fs, sizeof(nfilter->fs)); - cmd->data = NUM_NET_FILTERS; + cmd->data = intf->parent->num_net_filters; return 0; } @@ -336,7 +308,7 @@ static int bcmasp_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, break; case ETHTOOL_GRXCLSRLALL: err = bcmasp_netfilt_get_all_active(intf, rule_locs, &cmd->rule_cnt); - cmd->data = NUM_NET_FILTERS; + cmd->data = intf->parent->num_net_filters; break; default: err = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 45ec1a9214a2..0d61b8580d72 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -180,14 +180,14 @@ static struct sk_buff *bcmasp_csum_offload(struct net_device *dev, case htons(ETH_P_IP): header |= PKT_OFFLOAD_HDR_SIZE_2((ip_hdrlen(skb) >> 8) & 0xf); header2 |= PKT_OFFLOAD_HDR2_SIZE_2(ip_hdrlen(skb) & 0xff); - epkt |= PKT_OFFLOAD_EPKT_IP(0) | PKT_OFFLOAD_EPKT_CSUM_L2; + epkt |= PKT_OFFLOAD_EPKT_IP(0); ip_proto = ip_hdr(skb)->protocol; header_cnt += 2; break; case htons(ETH_P_IPV6): header |= PKT_OFFLOAD_HDR_SIZE_2((IP6_HLEN >> 8) & 0xf); header2 |= PKT_OFFLOAD_HDR2_SIZE_2(IP6_HLEN & 0xff); - epkt |= PKT_OFFLOAD_EPKT_IP(1) | PKT_OFFLOAD_EPKT_CSUM_L2; + epkt |= PKT_OFFLOAD_EPKT_IP(1); ip_proto = ipv6_hdr(skb)->nexthdr; header_cnt += 2; break; @@ -198,12 +198,12 @@ static struct sk_buff *bcmasp_csum_offload(struct net_device *dev, switch (ip_proto) { case IPPROTO_TCP: header2 |= PKT_OFFLOAD_HDR2_SIZE_3(tcp_hdrlen(skb)); - epkt |= PKT_OFFLOAD_EPKT_TP(0) | PKT_OFFLOAD_EPKT_CSUM_L3; + epkt |= PKT_OFFLOAD_EPKT_TP(0) | PKT_OFFLOAD_EPKT_CSUM_L4; header_cnt++; break; case IPPROTO_UDP: header2 |= PKT_OFFLOAD_HDR2_SIZE_3(UDP_HLEN); - epkt |= PKT_OFFLOAD_EPKT_TP(1) | PKT_OFFLOAD_EPKT_CSUM_L3; + epkt |= PKT_OFFLOAD_EPKT_TP(1) | PKT_OFFLOAD_EPKT_CSUM_L4; header_cnt++; break; default: @@ -818,9 +818,7 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf) /* Tx SPB */ tx_spb_ctrl_wl(intf, ((intf->channel + 8) << TX_SPB_CTRL_XF_BID_SHIFT), TX_SPB_CTRL_XF_CTRL2); - tx_pause_ctrl_wl(intf, (1 << (intf->channel + 8)), TX_PAUSE_MAP_VECTOR); tx_spb_top_wl(intf, 0x1e, TX_SPB_TOP_BLKOUT); - tx_spb_top_wl(intf, 0x0, TX_SPB_TOP_SPRE_BW_CTRL); tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_READ); tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_BASE); @@ -1185,7 +1183,7 @@ static void bcmasp_map_res(struct bcmasp_priv *priv, struct bcmasp_intf *intf) { /* Per port */ intf->res.umac = priv->base + UMC_OFFSET(intf); - intf->res.umac2fb = priv->base + (priv->hw_info->umac2fb + + intf->res.umac2fb = priv->base + (UMAC2FB_OFFSET + priv->rx_ctrl_offset + (intf->port * 0x4)); intf->res.rgmii = priv->base + RGMII_OFFSET(intf); @@ -1200,7 +1198,6 @@ static void bcmasp_map_res(struct bcmasp_priv *priv, struct bcmasp_intf *intf) intf->rx_edpkt_cfg = priv->base + RX_EDPKT_CFG_OFFSET(intf); } -#define MAX_IRQ_STR_LEN 64 struct bcmasp_intf *bcmasp_interface_create(struct bcmasp_priv *priv, struct device_node *ndev_dn, int i) { diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h index ad742612895f..af7418348e81 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h @@ -118,8 +118,7 @@ #define UMC_PSW_MS 0x624 #define UMC_PSW_LS 0x628 -#define UMAC2FB_OFFSET_2_1 0x9f044 -#define UMAC2FB_OFFSET 0x9f03c +#define UMAC2FB_OFFSET 0x9f044 #define UMAC2FB_CFG 0x0 #define UMAC2FB_CFG_OPUT_EN BIT(0) #define UMAC2FB_CFG_VLAN_EN BIT(1) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6afc2ab6fad2..d5495762c945 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -893,9 +893,9 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) bnapi->events &= ~BNXT_TX_CMP_EVENT; } -static bool bnxt_separate_head_pool(void) +static bool bnxt_separate_head_pool(struct bnxt_rx_ring_info *rxr) { - return PAGE_SIZE > BNXT_RX_PAGE_SIZE; + return rxr->need_head_pool || PAGE_SIZE > BNXT_RX_PAGE_SIZE; } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -919,6 +919,20 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, return page; } +static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping, + struct bnxt_rx_ring_info *rxr, + gfp_t gfp) +{ + netmem_ref netmem; + + netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + if (!netmem) + return 0; + + *mapping = page_pool_get_dma_addr_netmem(netmem); + return netmem; +} + static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, gfp_t gfp) @@ -999,21 +1013,19 @@ static inline u16 bnxt_find_next_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx) return next; } -static inline int bnxt_alloc_rx_page(struct bnxt *bp, - struct bnxt_rx_ring_info *rxr, - u16 prod, gfp_t gfp) +static int bnxt_alloc_rx_netmem(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + u16 prod, gfp_t gfp) { struct rx_bd *rxbd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)]; struct bnxt_sw_rx_agg_bd *rx_agg_buf; - struct page *page; - dma_addr_t mapping; u16 sw_prod = rxr->rx_sw_agg_prod; unsigned int offset = 0; + dma_addr_t mapping; + netmem_ref netmem; - page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp); - - if (!page) + netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, gfp); + if (!netmem) return -ENOMEM; if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap))) @@ -1023,7 +1035,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, rx_agg_buf = &rxr->rx_agg_ring[sw_prod]; rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod)); - rx_agg_buf->page = page; + rx_agg_buf->netmem = netmem; rx_agg_buf->offset = offset; rx_agg_buf->mapping = mapping; rxbd->rx_bd_haddr = cpu_to_le64(mapping); @@ -1067,11 +1079,11 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, p5_tpa = true; for (i = 0; i < agg_bufs; i++) { - u16 cons; - struct rx_agg_cmp *agg; struct bnxt_sw_rx_agg_bd *cons_rx_buf, *prod_rx_buf; + struct rx_agg_cmp *agg; struct rx_bd *prod_bd; - struct page *page; + netmem_ref netmem; + u16 cons; if (p5_tpa) agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i); @@ -1088,11 +1100,11 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, cons_rx_buf = &rxr->rx_agg_ring[cons]; /* It is possible for sw_prod to be equal to cons, so - * set cons_rx_buf->page to NULL first. + * set cons_rx_buf->netmem to 0 first. */ - page = cons_rx_buf->page; - cons_rx_buf->page = NULL; - prod_rx_buf->page = page; + netmem = cons_rx_buf->netmem; + cons_rx_buf->netmem = 0; + prod_rx_buf->netmem = netmem; prod_rx_buf->offset = cons_rx_buf->offset; prod_rx_buf->mapping = cons_rx_buf->mapping; @@ -1218,29 +1230,35 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp, return skb; } -static u32 __bnxt_rx_agg_pages(struct bnxt *bp, - struct bnxt_cp_ring_info *cpr, - struct skb_shared_info *shinfo, - u16 idx, u32 agg_bufs, bool tpa, - struct xdp_buff *xdp) +static u32 __bnxt_rx_agg_netmems(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + u16 idx, u32 agg_bufs, bool tpa, + struct sk_buff *skb, + struct xdp_buff *xdp) { struct bnxt_napi *bnapi = cpr->bnapi; - struct pci_dev *pdev = bp->pdev; - struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - u16 prod = rxr->rx_agg_prod; + struct skb_shared_info *shinfo; + struct bnxt_rx_ring_info *rxr; u32 i, total_frag_len = 0; bool p5_tpa = false; + u16 prod; + + rxr = bnapi->rx_ring; + prod = rxr->rx_agg_prod; if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa) p5_tpa = true; + if (skb) + shinfo = skb_shinfo(skb); + else + shinfo = xdp_get_shared_info_from_buff(xdp); + for (i = 0; i < agg_bufs; i++) { - skb_frag_t *frag = &shinfo->frags[i]; - u16 cons, frag_len; - struct rx_agg_cmp *agg; struct bnxt_sw_rx_agg_bd *cons_rx_buf; - struct page *page; - dma_addr_t mapping; + struct rx_agg_cmp *agg; + u16 cons, frag_len; + netmem_ref netmem; if (p5_tpa) agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i); @@ -1251,27 +1269,41 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_ring[cons]; - skb_frag_fill_page_desc(frag, cons_rx_buf->page, - cons_rx_buf->offset, frag_len); - shinfo->nr_frags = i + 1; + if (skb) { + skb_add_rx_frag_netmem(skb, i, cons_rx_buf->netmem, + cons_rx_buf->offset, + frag_len, BNXT_RX_PAGE_SIZE); + } else { + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, cons_rx_buf->netmem, + cons_rx_buf->offset, + frag_len); + shinfo->nr_frags = i + 1; + } __clear_bit(cons, rxr->rx_agg_bmap); - /* It is possible for bnxt_alloc_rx_page() to allocate + /* It is possible for bnxt_alloc_rx_netmem() to allocate * a sw_prod index that equals the cons index, so we * need to clear the cons entry now. */ - mapping = cons_rx_buf->mapping; - page = cons_rx_buf->page; - cons_rx_buf->page = NULL; + netmem = cons_rx_buf->netmem; + cons_rx_buf->netmem = 0; - if (xdp && page_is_pfmemalloc(page)) + if (xdp && netmem_is_pfmemalloc(netmem)) xdp_buff_set_frag_pfmemalloc(xdp); - if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) { + if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_ATOMIC) != 0) { + if (skb) { + skb->len -= frag_len; + skb->data_len -= frag_len; + skb->truesize -= BNXT_RX_PAGE_SIZE; + } + --shinfo->nr_frags; - cons_rx_buf->page = page; + cons_rx_buf->netmem = netmem; - /* Update prod since possibly some pages have been + /* Update prod since possibly some netmems have been * allocated already. */ rxr->rx_agg_prod = prod; @@ -1279,8 +1311,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, return 0; } - dma_sync_single_for_cpu(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE, - bp->rx_dir); + page_pool_dma_sync_netmem_for_cpu(rxr->page_pool, netmem, 0, + BNXT_RX_PAGE_SIZE); total_frag_len += frag_len; prod = NEXT_RX_AGG(prod); @@ -1289,32 +1321,28 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, return total_frag_len; } -static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp, - struct bnxt_cp_ring_info *cpr, - struct sk_buff *skb, u16 idx, - u32 agg_bufs, bool tpa) +static struct sk_buff *bnxt_rx_agg_netmems_skb(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + struct sk_buff *skb, u16 idx, + u32 agg_bufs, bool tpa) { - struct skb_shared_info *shinfo = skb_shinfo(skb); u32 total_frag_len = 0; - total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, idx, - agg_bufs, tpa, NULL); + total_frag_len = __bnxt_rx_agg_netmems(bp, cpr, idx, agg_bufs, tpa, + skb, NULL); if (!total_frag_len) { skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } - skb->data_len += total_frag_len; - skb->len += total_frag_len; - skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs; return skb; } -static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp, - struct bnxt_cp_ring_info *cpr, - struct xdp_buff *xdp, u16 idx, - u32 agg_bufs, bool tpa) +static u32 bnxt_rx_agg_netmems_xdp(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + struct xdp_buff *xdp, u16 idx, + u32 agg_bufs, bool tpa) { struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp); u32 total_frag_len = 0; @@ -1322,8 +1350,8 @@ static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp, if (!xdp_buff_has_frags(xdp)) shinfo->nr_frags = 0; - total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, - idx, agg_bufs, tpa, xdp); + total_frag_len = __bnxt_rx_agg_netmems(bp, cpr, idx, agg_bufs, tpa, + NULL, xdp); if (total_frag_len) { xdp_buff_set_frags_flag(xdp); shinfo->nr_frags = agg_bufs; @@ -1895,7 +1923,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, } if (agg_bufs) { - skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); + skb = bnxt_rx_agg_netmems_skb(bp, cpr, skb, idx, agg_bufs, + true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ cpr->sw_stats->rx.rx_oom_discards += 1; @@ -2176,9 +2205,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (bnxt_xdp_attached(bp, rxr)) { bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp); if (agg_bufs) { - u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, - cp_cons, agg_bufs, - false); + u32 frag_len = bnxt_rx_agg_netmems_xdp(bp, cpr, &xdp, + cp_cons, + agg_bufs, + false); if (!frag_len) goto oom_next_rx; @@ -2230,7 +2260,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (agg_bufs) { if (!xdp_active) { - skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); + skb = bnxt_rx_agg_netmems_skb(bp, cpr, skb, cp_cons, + agg_bufs, false); if (!skb) goto oom_next_rx; } else { @@ -3449,15 +3480,15 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info for (i = 0; i < max_idx; i++) { struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i]; - struct page *page = rx_agg_buf->page; + netmem_ref netmem = rx_agg_buf->netmem; - if (!page) + if (!netmem) continue; - rx_agg_buf->page = NULL; + rx_agg_buf->netmem = 0; __clear_bit(i, rxr->rx_agg_bmap); - page_pool_recycle_direct(rxr->page_pool, page); + page_pool_recycle_direct_netmem(rxr->page_pool, netmem); } } @@ -3750,7 +3781,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool()) + if (bnxt_separate_head_pool(rxr)) page_pool_destroy(rxr->head_pool); rxr->page_pool = rxr->head_pool = NULL; @@ -3781,15 +3812,19 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pp.dev = &bp->pdev->dev; pp.dma_dir = bp->rx_dir; pp.max_len = PAGE_SIZE; - pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | + PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp.queue_idx = rxr->bnapi->index; pool = page_pool_create(&pp); if (IS_ERR(pool)) return PTR_ERR(pool); rxr->page_pool = pool; - if (bnxt_separate_head_pool()) { + rxr->need_head_pool = page_pool_is_unreadable(pool); + if (bnxt_separate_head_pool(rxr)) { pp.pool_size = max(bp->rx_ring_size, 1024); + pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; pool = page_pool_create(&pp); if (IS_ERR(pool)) goto err_destroy_pp; @@ -4201,6 +4236,8 @@ static void bnxt_reset_rx_ring_struct(struct bnxt *bp, rxr->page_pool->p.napi = NULL; rxr->page_pool = NULL; + rxr->head_pool->p.napi = NULL; + rxr->head_pool = NULL; memset(&rxr->xdp_rxq, 0, sizeof(struct xdp_rxq_info)); ring = &rxr->rx_ring_struct; @@ -4325,16 +4362,16 @@ static void bnxt_alloc_one_rx_ring_skb(struct bnxt *bp, rxr->rx_prod = prod; } -static void bnxt_alloc_one_rx_ring_page(struct bnxt *bp, - struct bnxt_rx_ring_info *rxr, - int ring_nr) +static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr, + int ring_nr) { u32 prod; int i; prod = rxr->rx_agg_prod; for (i = 0; i < bp->rx_agg_ring_size; i++) { - if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_KERNEL)) { + if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) { netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n", ring_nr, i, bp->rx_ring_size); break; @@ -4375,7 +4412,7 @@ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr) if (!(bp->flags & BNXT_FLAG_AGG_RINGS)) return 0; - bnxt_alloc_one_rx_ring_page(bp, rxr, ring_nr); + bnxt_alloc_one_rx_ring_netmem(bp, rxr, ring_nr); if (rxr->rx_tpa) { rc = bnxt_alloc_one_tpa_info_data(bp, rxr); @@ -10077,7 +10114,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) struct hwrm_ver_get_input *req; u16 fw_maj, fw_min, fw_bld, fw_rsv; u32 dev_caps_cfg, hwrm_ver; - int rc, len; + int rc, len, max_tmo_secs; rc = hwrm_req_init(bp, req, HWRM_VER_GET); if (rc) @@ -10150,9 +10187,14 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) bp->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000; if (!bp->hwrm_cmd_max_timeout) bp->hwrm_cmd_max_timeout = HWRM_CMD_MAX_TIMEOUT; - else if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT) - netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog\n", - bp->hwrm_cmd_max_timeout / 1000); + max_tmo_secs = bp->hwrm_cmd_max_timeout / 1000; +#ifdef CONFIG_DETECT_HUNG_TASK + if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT || + max_tmo_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) { + netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog (kernel default %ds)\n", + max_tmo_secs, CONFIG_DEFAULT_HUNG_TASK_TIMEOUT); + } +#endif if (resp->hwrm_intf_maj_8b >= 1) { bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); @@ -15729,6 +15771,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) clone->rx_agg_prod = 0; clone->rx_sw_agg_prod = 0; clone->rx_next_cons = 0; + clone->need_head_pool = false; rc = bnxt_alloc_rx_page_pool(bp, clone, rxr->page_pool->p.nid); if (rc) @@ -15771,7 +15814,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) bnxt_alloc_one_rx_ring_skb(bp, clone, idx); if (bp->flags & BNXT_FLAG_AGG_RINGS) - bnxt_alloc_one_rx_ring_page(bp, clone, idx); + bnxt_alloc_one_rx_ring_netmem(bp, clone, idx); if (bp->flags & BNXT_FLAG_TPA) bnxt_alloc_one_tpa_info_data(bp, clone); @@ -15787,7 +15830,7 @@ err_rxq_info_unreg: xdp_rxq_info_unreg(&clone->xdp_rxq); err_page_pool_destroy: page_pool_destroy(clone->page_pool); - if (bnxt_separate_head_pool()) + if (bnxt_separate_head_pool(clone)) page_pool_destroy(clone->head_pool); clone->page_pool = NULL; clone->head_pool = NULL; @@ -15806,7 +15849,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool()) + if (bnxt_separate_head_pool(rxr)) page_pool_destroy(rxr->head_pool); rxr->page_pool = NULL; rxr->head_pool = NULL; @@ -15897,6 +15940,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) rxr->page_pool = clone->page_pool; rxr->head_pool = clone->head_pool; rxr->xdp_rxq = clone->xdp_rxq; + rxr->need_head_pool = clone->need_head_pool; bnxt_copy_rx_ring(bp, rxr, clone); @@ -15982,7 +16026,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) bnxt_hwrm_rx_ring_free(bp, rxr, false); bnxt_hwrm_rx_agg_ring_free(bp, rxr, false); page_pool_disable_direct_recycling(rxr->page_pool); - if (bnxt_separate_head_pool()) + if (bnxt_separate_head_pool(rxr)) page_pool_disable_direct_recycling(rxr->head_pool); if (bp->flags & BNXT_FLAG_SHARED_RINGS) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bc8b3b7e915d..fda0d3cc6227 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -903,7 +903,7 @@ struct bnxt_sw_rx_bd { }; struct bnxt_sw_rx_agg_bd { - struct page *page; + netmem_ref netmem; unsigned int offset; dma_addr_t mapping; }; @@ -1106,6 +1106,7 @@ struct bnxt_rx_ring_info { unsigned long *rx_agg_bmap; u16 rx_agg_bmap_size; + bool need_head_pool; dma_addr_t rx_desc_mapping[MAX_RX_PAGES]; dma_addr_t rx_agg_desc_mapping[MAX_RX_AGG_PAGES]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index a000d3f630bd..ce97befd3cb3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -506,9 +506,16 @@ err: start_utc, coredump.total_segs + 1, rc); kfree(coredump.data); - *dump_len += sizeof(struct bnxt_coredump_record); - if (rc == -ENOBUFS) + if (!rc) { + *dump_len += sizeof(struct bnxt_coredump_record); + /* The actual coredump length can be smaller than the FW + * reported length earlier. Use the ethtool provided length. + */ + if (buf_len) + *dump_len = buf_len; + } else if (rc == -ENOBUFS) { netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); + } return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h index 15ca51b5d204..fb5f5b063c3d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h @@ -58,7 +58,7 @@ void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s); #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) -#define HWRM_CMD_MAX_TIMEOUT 40000U +#define HWRM_CMD_MAX_TIMEOUT 60000U #define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..238db9a1aebf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -148,7 +148,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) struct net_device *dev = edev->net; struct bnxt *bp = netdev_priv(dev); struct bnxt_ulp *ulp; - int i = 0; ulp = edev->ulp_tbl; netdev_lock(dev); @@ -164,10 +163,6 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) synchronize_rcu(); ulp->max_async_event_id = 0; ulp->async_events_bmap = NULL; - while (atomic_read(&ulp->ref_count) != 0 && i < 10) { - msleep(100); - i++; - } mutex_unlock(&edev->en_dev_lock); netdev_unlock(dev); return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 7fa3b8d1ebd2..7b9dd8ebe4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -10,9 +10,6 @@ #ifndef BNXT_ULP_H #define BNXT_ULP_H -#define BNXT_ROCE_ULP 0 -#define BNXT_MAX_ULP 1 - #define BNXT_MIN_ROCE_CP_RINGS 2 #define BNXT_MIN_ROCE_STAT_CTXS 1 @@ -50,7 +47,6 @@ struct bnxt_ulp { unsigned long *async_events_bmap; u16 max_async_event_id; u16 msix_requested; - atomic_t ref_count; }; struct bnxt_en_dev { diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d1f541af4e3b..ff47e96b9124 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -54,7 +54,7 @@ #include <linux/ssb/ssb_driver_gige.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> -#include <linux/crc32poly.h> +#include <linux/crc32.h> #include <linux/dmi.h> #include <net/checksum.h> @@ -9809,26 +9809,7 @@ static void tg3_setup_rxbd_thresholds(struct tg3 *tp) static inline u32 calc_crc(unsigned char *buf, int len) { - u32 reg; - u32 tmp; - int j, k; - - reg = 0xffffffff; - - for (j = 0; j < len; j++) { - reg ^= buf[j]; - - for (k = 0; k < 8; k++) { - tmp = reg & 0x01; - - reg >>= 1; - - if (tmp) - reg ^= CRC32_POLY_LE; - } - } - - return ~reg; + return ~crc32(~0, buf, len); } static void tg3_set_multi(struct tg3 *tp, unsigned int accept_all) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index a03eff3d4425..50eb54ecf1ba 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -1735,7 +1735,7 @@ bnad_iocpf_sem_timeout(struct timer_list *t) * Time CPU m CPU n * 0 1 = test_bit * 1 clear_bit - * 2 del_timer_sync + * 2 timer_delete_sync * 3 mod_timer */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 06397cc8bb36..5211759bfe47 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1389,11 +1389,9 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); } - /* Check if timestamp is requested */ - if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - skb_tx_timestamp(skb); + /* Check if hw timestamp is requested */ + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) return; - } /* Tx timestamping not supported along with TSO, so ignore request */ if (skb_shinfo(skb)->gso_size) @@ -1472,6 +1470,8 @@ static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, netdev_tx_sent_queue(txq, skb->len); + skb_tx_timestamp(skb); + /* make sure all memory stores are done before ringing doorbell */ smp_wmb(); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 608cc6af5af1..3b7ad744b2dd 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1605,10 +1605,10 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return dev_err_probe(dev, err, "Failed to enable PCI device\n"); } - err = pci_request_regions(pdev, DRV_NAME); + err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); - goto err_disable_device; + goto err_zero_drv_data; } /* MAP configuration registers */ @@ -1616,7 +1616,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!bgx->reg_base) { dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n"); err = -ENOMEM; - goto err_release_regions; + goto err_zero_drv_data; } set_max_bgx_per_node(pdev); @@ -1688,10 +1688,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); -err_release_regions: - pci_release_regions(pdev); -err_disable_device: - pci_disable_device(pdev); +err_zero_drv_data: pci_set_drvdata(pdev, NULL); return err; } @@ -1710,8 +1707,6 @@ static void bgx_remove(struct pci_dev *pdev) pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); bgx_vnic[bgx->bgx_id] = NULL; - pci_release_regions(pdev); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 3b7068832f95..4a0e2d2eb60a 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -351,7 +351,7 @@ static void set_msglevel(struct net_device *dev, u32 val) adapter->msg_enable = val; } -static const char stats_strings[][ETH_GSTRING_LEN] = { +static const char stats_strings[][ETH_GSTRING_LEN] __nonstring_array = { "TxOctetsOK", "TxOctetsBad", "TxUnicastFramesOK", diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index f991a28a71c3..f2d533acb056 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1533,7 +1533,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } else { q = &adap->sge.ethtxq[qidx + pi->first_qset]; } - skb_tx_timestamp(skb); reclaim_completed_tx(adap, &q->q, -1, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; @@ -1706,6 +1705,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); + skb_tx_timestamp(skb); + if (immediate) { cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); @@ -2268,7 +2269,6 @@ static int ethofld_hard_xmit(struct net_device *dev, d = &eosw_txq->desc[eosw_txq->last_pidx]; skb = d->skb; - skb_tx_timestamp(skb); wr = (struct fw_eth_tx_eo_wr *)&eohw_txq->q.desc[eohw_txq->q.pidx]; if (unlikely(eosw_txq->state != CXGB4_EO_STATE_ACTIVE && @@ -2373,6 +2373,7 @@ write_wr_headers: eohw_txq->vlan_ins++; txq_advance(&eohw_txq->q, ndesc); + skb_tx_timestamp(skb); cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc); eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index e8e460a92e0e..4e2096e49684 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1640,6 +1640,7 @@ static int chcr_ktls_tunnel_pkt(struct chcr_ktls_info *tx_info, cxgb4_write_sgl(skb, &q->q, pos, end, 0, sgl_sdesc->addr); sgl_sdesc->skb = skb; chcr_txq_advance(&q->q, ndesc); + skb_tx_timestamp(skb); cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); return 0; } @@ -1903,7 +1904,6 @@ static int chcr_ktls_sw_fallback(struct sk_buff *skb, th = tcp_hdr(nskb); skb_offset = skb_tcp_all_headers(nskb); data_len = nskb->len - skb_offset; - skb_tx_timestamp(nskb); if (chcr_ktls_tunnel_pkt(tx_info, nskb, q)) goto out; diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 517a15904fb0..6a2004bbe87f 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1144,6 +1144,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, struct gmac_txdesc *txd; skb_frag_t *skb_frag; dma_addr_t mapping; + bool tcp = false; void *buffer; u16 mss; int ret; @@ -1151,6 +1152,13 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, word1 = skb->len; word3 = SOF_BIT; + /* Determine if we are doing TCP */ + if (skb->protocol == htons(ETH_P_IP)) + tcp = (ip_hdr(skb)->protocol == IPPROTO_TCP); + else + /* IPv6 */ + tcp = (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP); + mss = skb_shinfo(skb)->gso_size; if (mss) { /* This means we are dealing with TCP and skb->len is the @@ -1163,8 +1171,26 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, mss, skb->len); word1 |= TSS_MTU_ENABLE_BIT; word3 |= mss; + } else if (tcp) { + /* Even if we are not using TSO, use the hardware offloader + * for transferring the TCP frame: this hardware has partial + * TCP awareness (called TOE - TCP Offload Engine) and will + * according to the datasheet put packets belonging to the + * same TCP connection in the same queue for the TOE/TSO + * engine to process. The engine will deal with chopping + * up frames that exceed ETH_DATA_LEN which the + * checksumming engine cannot handle (see below) into + * manageable chunks. It flawlessly deals with quite big + * frames and frames containing custom DSA EtherTypes. + */ + mss = netdev->mtu + skb_tcp_all_headers(skb); + mss = min(mss, skb->len); + netdev_dbg(netdev, "TOE/TSO len %04x mtu %04x mss %04x\n", + skb->len, netdev->mtu, mss); + word1 |= TSS_MTU_ENABLE_BIT; + word3 |= mss; } else if (skb->len >= ETH_FRAME_LEN) { - /* Hardware offloaded checksumming isn't working on frames + /* Hardware offloaded checksumming isn't working on non-TCP frames * bigger than 1514 bytes. A hypothesis about this is that the * checksum buffer is only 1518 bytes, so when the frames get * bigger they get truncated, or the last few bytes get @@ -1181,21 +1207,16 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, } if (skb->ip_summed == CHECKSUM_PARTIAL) { - int tcp = 0; - /* We do not switch off the checksumming on non TCP/UDP * frames: as is shown from tests, the checksumming engine * is smart enough to see that a frame is not actually TCP * or UDP and then just pass it through without any changes * to the frame. */ - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) word1 |= TSS_IP_CHKSUM_BIT; - tcp = ip_hdr(skb)->protocol == IPPROTO_TCP; - } else { /* IPv6 */ + else word1 |= TSS_IPV6_ENABLE_BIT; - tcp = ipv6_hdr(skb)->nexthdr == IPPROTO_TCP; - } word1 |= tcp ? TSS_TCP_CHKSUM_BIT : TSS_UDP_CHKSUM_BIT; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index c8c53121557f..bec76e7bf5dd 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1411,7 +1411,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* grab all resources from both PIO and MMIO regions, as we * don't want anyone else messing around with our hardware */ - if (pci_request_regions(pdev, DRV_NAME)) + if (pcim_request_all_regions(pdev, DRV_NAME)) return -ENODEV; ioaddr = pcim_iomap(pdev, TULIP_BAR, tulip_tbl[chip_idx].io_size); diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 5930cdec6f2f..e593273b2867 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -375,7 +375,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); - if (pci_request_regions(pdev, DRV_NAME)) + if (pcim_request_all_regions(pdev, DRV_NAME)) goto err_out_netdev; ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 51b8377edd1d..adb441b36581 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2615,7 +2615,11 @@ err: return status; } -static char flash_cookie[2][16] = {"*** SE FLAS", "H DIRECTORY *** "}; +/* + * Since the cookie is text, add a parsing-skipped space to keep it from + * ever being matched on storage holding this source file. + */ +static const char flash_cookie[32] __nonstring = "*** SE FLAS" "H DIRECTORY *** "; static bool phy_flashing_required(struct be_adapter *adapter) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index d70818f06be7..5e2d3ddb5d43 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1415,7 +1415,7 @@ struct flash_section_entry { } __packed; struct flash_section_info { - u8 cookie[32]; + u8 cookie[32] __nonstring; struct flash_section_hdr fsec_hdr; struct flash_section_entry fsec_entry[32]; } __packed; diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index a2d7300925a8..bbef47c3480c 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -71,7 +71,6 @@ config FSL_XGMAC_MDIO tristate "Freescale XGMAC MDIO" select PHYLIB depends on OF - select MDIO_DEVRES select OF_MDIO help This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 4948b4906584..23c23cca2620 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -3089,15 +3089,25 @@ static int dpaa_xdp_xmit(struct net_device *net_dev, int n, return nxmit; } -static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int dpaa_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { struct dpaa_priv *priv = netdev_priv(dev); - struct hwtstamp_config config; - if (copy_from_user(&config, rq->ifr_data, sizeof(config))) - return -EFAULT; + config->tx_type = priv->tx_tstamp ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + config->rx_filter = priv->rx_tstamp ? HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; - switch (config.tx_type) { + return 0; +} + +static int dpaa_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct dpaa_priv *priv = netdev_priv(dev); + + switch (config->tx_type) { case HWTSTAMP_TX_OFF: /* Couldn't disable rx/tx timestamping separately. * Do nothing here. @@ -3112,7 +3122,7 @@ static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -ERANGE; } - if (config.rx_filter == HWTSTAMP_FILTER_NONE) { + if (config->rx_filter == HWTSTAMP_FILTER_NONE) { /* Couldn't disable rx/tx timestamping separately. * Do nothing here. */ @@ -3121,28 +3131,17 @@ static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) priv->mac_dev->set_tstamp(priv->mac_dev->fman_mac, true); priv->rx_tstamp = true; /* TS is set for all frame types, not only those requested */ - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; } - return copy_to_user(rq->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } static int dpaa_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd) { - int ret = -EINVAL; struct dpaa_priv *priv = netdev_priv(net_dev); - if (cmd == SIOCGMIIREG) { - if (net_dev->phydev) - return phylink_mii_ioctl(priv->mac_dev->phylink, rq, - cmd); - } - - if (cmd == SIOCSHWTSTAMP) - return dpaa_ts_ioctl(net_dev, rq, cmd); - - return ret; + return phylink_mii_ioctl(priv->mac_dev->phylink, rq, cmd); } static const struct net_device_ops dpaa_ops = { @@ -3160,6 +3159,8 @@ static const struct net_device_ops dpaa_ops = { .ndo_change_mtu = dpaa_change_mtu, .ndo_bpf = dpaa_xdp, .ndo_xdp_xmit = dpaa_xdp_xmit, + .ndo_hwtstamp_get = dpaa_hwtstamp_get, + .ndo_hwtstamp_set = dpaa_hwtstamp_set, }; static int dpaa_napi_add(struct net_device *net_dev) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 29886a8ba73f..2ec2c3dab250 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2585,40 +2585,52 @@ static int dpaa2_eth_set_features(struct net_device *net_dev, return 0; } -static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +static int dpaa2_eth_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct dpaa2_eth_priv *priv = netdev_priv(dev); - struct hwtstamp_config config; if (!dpaa2_ptp) return -EINVAL; - if (copy_from_user(&config, rq->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: case HWTSTAMP_TX_ONESTEP_SYNC: - priv->tx_tstamp_type = config.tx_type; + priv->tx_tstamp_type = config->tx_type; break; default: return -ERANGE; } - if (config.rx_filter == HWTSTAMP_FILTER_NONE) { + if (config->rx_filter == HWTSTAMP_FILTER_NONE) { priv->rx_tstamp = false; } else { priv->rx_tstamp = true; /* TS is set for all frame types, not only those requested */ - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; } if (priv->tx_tstamp_type == HWTSTAMP_TX_ONESTEP_SYNC) dpaa2_ptp_onestep_reg_update_method(priv); - return copy_to_user(rq->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; +} + +static int dpaa2_eth_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) +{ + struct dpaa2_eth_priv *priv = netdev_priv(dev); + + if (!dpaa2_ptp) + return -EINVAL; + + config->tx_type = priv->tx_tstamp_type; + config->rx_filter = priv->rx_tstamp ? HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; + + return 0; } static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -2626,9 +2638,6 @@ static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) struct dpaa2_eth_priv *priv = netdev_priv(dev); int err; - if (cmd == SIOCSHWTSTAMP) - return dpaa2_eth_ts_ioctl(dev, rq, cmd); - mutex_lock(&priv->mac_lock); if (dpaa2_eth_is_type_phy(priv)) { @@ -3034,7 +3043,9 @@ static const struct net_device_ops dpaa2_eth_ops = { .ndo_xsk_wakeup = dpaa2_xsk_wakeup, .ndo_setup_tc = dpaa2_eth_setup_tc, .ndo_vlan_rx_add_vid = dpaa2_eth_rx_add_vid, - .ndo_vlan_rx_kill_vid = dpaa2_eth_rx_kill_vid + .ndo_vlan_rx_kill_vid = dpaa2_eth_rx_kill_vid, + .ndo_hwtstamp_get = dpaa2_eth_hwtstamp_get, + .ndo_hwtstamp_set = dpaa2_eth_hwtstamp_set, }; static void dpaa2_eth_cdan_cb(struct dpaa2_io_notification_ctx *ctx) diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index 6c2779047dcd..e917132d3714 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -15,10 +15,16 @@ config NXP_ENETC_PF_COMMON If compiled as module (M), the module name is nxp-enetc-pf-common. +config NXP_NETC_LIB + tristate + help + This module provides common functionalities for both ENETC and NETC + Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc + flower and debugfs interfaces and so on. + config FSL_ENETC tristate "ENETC PF driver" depends on PCI_MSI - select MDIO_DEVRES select FSL_ENETC_CORE select FSL_ENETC_IERB select FSL_ENETC_MDIO @@ -36,10 +42,10 @@ config FSL_ENETC config NXP_ENETC4 tristate "ENETC4 PF driver" depends on PCI_MSI - select MDIO_DEVRES select FSL_ENETC_CORE select FSL_ENETC_MDIO select NXP_ENETC_PF_COMMON + select NXP_NETC_LIB select PHYLINK select DIMLIB help @@ -73,7 +79,7 @@ config FSL_ENETC_IERB config FSL_ENETC_MDIO tristate "ENETC MDIO driver" - depends on PCI && MDIO_DEVRES && MDIO_BUS + depends on PCI && PHYLIB help This driver supports NXP ENETC Central MDIO controller as a PCIe physical function (PF) device. diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index 6fd27ee4fcd1..f1c5ad45fd76 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -6,6 +6,9 @@ fsl-enetc-core-y := enetc.o enetc_cbdr.o enetc_ethtool.o obj-$(CONFIG_NXP_ENETC_PF_COMMON) += nxp-enetc-pf-common.o nxp-enetc-pf-common-y := enetc_pf_common.o +obj-$(CONFIG_NXP_NETC_LIB) += nxp-netc-lib.o +nxp-netc-lib-y := ntmp.o + obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o fsl-enetc-y := enetc_pf.o fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o @@ -13,6 +16,7 @@ fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o obj-$(CONFIG_NXP_ENETC4) += nxp-enetc4.o nxp-enetc4-y := enetc4_pf.o +nxp-enetc4-$(CONFIG_DEBUG_FS) += enetc4_debugfs.o obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o fsl-enetc-vf-y := enetc_vf.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3ee52f4b1166..dcc3fbac3481 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -36,6 +36,42 @@ static void enetc_change_preemptible_tcs(struct enetc_ndev_priv *priv, enetc_mm_commit_preemptible_tcs(priv); } +static int enetc_mac_addr_hash_idx(const u8 *addr) +{ + u64 fold = __swab64(ether_addr_to_u64(addr)) >> 16; + u64 mask = 0; + int res = 0; + int i; + + for (i = 0; i < 8; i++) + mask |= BIT_ULL(i * 6); + + for (i = 0; i < 6; i++) + res |= (hweight64(fold & (mask << i)) & 0x1) << i; + + return res; +} + +void enetc_add_mac_addr_ht_filter(struct enetc_mac_filter *filter, + const unsigned char *addr) +{ + int idx = enetc_mac_addr_hash_idx(addr); + + /* add hash table entry */ + __set_bit(idx, filter->mac_hash_table); + filter->mac_addr_cnt++; +} +EXPORT_SYMBOL_GPL(enetc_add_mac_addr_ht_filter); + +void enetc_reset_mac_addr_filter(struct enetc_mac_filter *filter) +{ + filter->mac_addr_cnt = 0; + + bitmap_zero(filter->mac_hash_table, + ENETC_MADDR_HASH_TBL_SZ); +} +EXPORT_SYMBOL_GPL(enetc_reset_mac_addr_filter); + static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv) { int num_tx_rings = priv->num_tx_rings; @@ -2379,7 +2415,7 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) for (i = 0; i < si->num_rss; i++) rss_table[i] = i % num_groups; - enetc_set_rss_table(si, rss_table, si->num_rss); + si->ops->set_rss_table(si, rss_table, si->num_rss); kfree(rss_table); @@ -2394,6 +2430,20 @@ static void enetc_set_lso_flags_mask(struct enetc_hw *hw) enetc_wr(hw, ENETC4_SILSOSFMR1, 0); } +static void enetc_set_rss(struct net_device *ndev, int en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + u32 reg; + + enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings); + + reg = enetc_rd(hw, ENETC_SIMR); + reg &= ~ENETC_SIMR_RSSE; + reg |= (en) ? ENETC_SIMR_RSSE : 0; + enetc_wr(hw, ENETC_SIMR, reg); +} + int enetc_configure_si(struct enetc_ndev_priv *priv) { struct enetc_si *si = priv->si; @@ -2410,13 +2460,13 @@ int enetc_configure_si(struct enetc_ndev_priv *priv) if (si->hw_features & ENETC_SI_F_LSO) enetc_set_lso_flags_mask(hw); - /* TODO: RSS support for i.MX95 will be supported later, and the - * is_enetc_rev1() condition will be removed - */ - if (si->num_rss && is_enetc_rev1(si)) { + if (si->num_rss) { err = enetc_setup_default_rss_table(si, priv->num_rx_rings); if (err) return err; + + if (priv->ndev->features & NETIF_F_RXHASH) + enetc_set_rss(priv->ndev, true); } return 0; @@ -3209,22 +3259,6 @@ struct net_device_stats *enetc_get_stats(struct net_device *ndev) } EXPORT_SYMBOL_GPL(enetc_get_stats); -static int enetc_set_rss(struct net_device *ndev, int en) -{ - struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; - u32 reg; - - enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings); - - reg = enetc_rd(hw, ENETC_SIMR); - reg &= ~ENETC_SIMR_RSSE; - reg |= (en) ? ENETC_SIMR_RSSE : 0; - enetc_wr(hw, ENETC_SIMR, reg); - - return 0; -} - static void enetc_enable_rxvlan(struct net_device *ndev, bool en) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -3262,16 +3296,17 @@ void enetc_set_features(struct net_device *ndev, netdev_features_t features) } EXPORT_SYMBOL_GPL(enetc_set_features); -static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) +int enetc_hwtstamp_set(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct enetc_ndev_priv *priv = netdev_priv(ndev); int err, new_offloads = priv->active_offloads; - struct hwtstamp_config config; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + return -EOPNOTSUPP; - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: new_offloads &= ~ENETC_F_TX_TSTAMP_MASK; break; @@ -3290,13 +3325,13 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: new_offloads &= ~ENETC_F_RX_TSTAMP; break; default: new_offloads |= ENETC_F_RX_TSTAMP; - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; } if ((new_offloads ^ priv->active_offloads) & ENETC_F_RX_TSTAMP) { @@ -3309,42 +3344,36 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) priv->active_offloads = new_offloads; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } +EXPORT_SYMBOL_GPL(enetc_hwtstamp_set); -static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr) +int enetc_hwtstamp_get(struct net_device *ndev, + struct kernel_hwtstamp_config *config) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct hwtstamp_config config; - config.flags = 0; + if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + return -EOPNOTSUPP; if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) - config.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + config->tx_type = HWTSTAMP_TX_ONESTEP_SYNC; else if (priv->active_offloads & ENETC_F_TX_TSTAMP) - config.tx_type = HWTSTAMP_TX_ON; + config->tx_type = HWTSTAMP_TX_ON; else - config.tx_type = HWTSTAMP_TX_OFF; + config->tx_type = HWTSTAMP_TX_OFF; - config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + config->rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ? + HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } +EXPORT_SYMBOL_GPL(enetc_hwtstamp_get); int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) { - if (cmd == SIOCSHWTSTAMP) - return enetc_hwtstamp_set(ndev, rq); - if (cmd == SIOCGHWTSTAMP) - return enetc_hwtstamp_get(ndev, rq); - } - if (!priv->phylink) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 4ad4eb5c5a74..872d2cbd088b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -8,6 +8,7 @@ #include <linux/dma-mapping.h> #include <linux/skbuff.h> #include <linux/ethtool.h> +#include <linux/fsl/ntmp.h> #include <linux/if_vlan.h> #include <linux/phylink.h> #include <linux/dim.h> @@ -22,6 +23,18 @@ #define ENETC_CBD_DATA_MEM_ALIGN 64 +#define ENETC_MADDR_HASH_TBL_SZ 64 + +enum enetc_mac_addr_type {UC, MC, MADDR_TYPE}; + +struct enetc_mac_filter { + union { + char mac_addr[ETH_ALEN]; + DECLARE_BITMAP(mac_hash_table, ENETC_MADDR_HASH_TBL_SZ); + }; + int mac_addr_cnt; +}; + struct enetc_tx_swbd { union { struct sk_buff *skb; @@ -266,6 +279,19 @@ struct enetc_platform_info { const struct enetc_drvdata *data; }; +struct enetc_si; + +/* + * This structure defines the some common hooks for ENETC PSI and VSI. + * In addition, since VSI only uses the struct enetc_si as its private + * driver data, so this structure also define some hooks specifically + * for VSI. For VSI-specific hooks, the format is ‘vf_*()’. + */ +struct enetc_si_ops { + int (*get_rss_table)(struct enetc_si *si, u32 *table, int count); + int (*set_rss_table)(struct enetc_si *si, const u32 *table, int count); +}; + /* PCI IEP device data */ struct enetc_si { struct pci_dev *pdev; @@ -274,7 +300,10 @@ struct enetc_si { struct net_device *ndev; /* back ref. */ - struct enetc_cbdr cbd_ring; + union { + struct enetc_cbdr cbd_ring; /* Only ENETC 1.0 */ + struct ntmp_user ntmp_user; /* ENETC 4.1 and later */ + }; int num_rx_rings; /* how many rings are available in the SI */ int num_tx_rings; @@ -284,6 +313,11 @@ struct enetc_si { u16 revision; int hw_features; const struct enetc_drvdata *drvdata; + const struct enetc_si_ops *ops; + + struct workqueue_struct *workqueue; + struct work_struct rx_mode_task; + struct dentry *debugfs_root; }; #define ENETC_SI_ALIGN 32 @@ -466,6 +500,9 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv); void enetc_free_si_resources(struct enetc_ndev_priv *priv); int enetc_configure_si(struct enetc_ndev_priv *priv); int enetc_get_driver_data(struct enetc_si *si); +void enetc_add_mac_addr_ht_filter(struct enetc_mac_filter *filter, + const unsigned char *addr); +void enetc_reset_mac_addr_filter(struct enetc_mac_filter *filter); int enetc_open(struct net_device *ndev); int enetc_close(struct net_device *ndev); @@ -481,6 +518,12 @@ int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf); int enetc_xdp_xmit(struct net_device *ndev, int num_frames, struct xdp_frame **frames, u32 flags); +int enetc_hwtstamp_get(struct net_device *ndev, + struct kernel_hwtstamp_config *config); +int enetc_hwtstamp_set(struct net_device *ndev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); + /* ethtool */ extern const struct ethtool_ops enetc_pf_ethtool_ops; extern const struct ethtool_ops enetc4_pf_ethtool_ops; @@ -493,15 +536,19 @@ void enetc_mm_commit_preemptible_tcs(struct enetc_ndev_priv *priv); int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count, struct enetc_cbdr *cbdr); void enetc_teardown_cbdr(struct enetc_cbdr *cbdr); +int enetc4_setup_cbdr(struct enetc_si *si); +void enetc4_teardown_cbdr(struct enetc_si *si); int enetc_set_mac_flt_entry(struct enetc_si *si, int index, char *mac_addr, int si_map); int enetc_clear_mac_flt_entry(struct enetc_si *si, int index); int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, int index); -void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes); +void enetc_set_rss_key(struct enetc_si *si, const u8 *bytes); int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count); int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count); int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd); +int enetc4_get_rss_table(struct enetc_si *si, u32 *table, int count); +int enetc4_set_rss_table(struct enetc_si *si, const u32 *table, int count); static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si, struct enetc_cbd *cbd, diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c new file mode 100644 index 000000000000..1b1591dce73d --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright 2025 NXP */ + +#include <linux/device.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/string_choices.h> + +#include "enetc_pf.h" +#include "enetc4_debugfs.h" + +static void enetc_show_si_mac_hash_filter(struct seq_file *s, int i) +{ + struct enetc_si *si = s->private; + struct enetc_hw *hw = &si->hw; + u32 hash_h, hash_l; + + hash_l = enetc_port_rd(hw, ENETC4_PSIUMHFR0(i)); + hash_h = enetc_port_rd(hw, ENETC4_PSIUMHFR1(i)); + seq_printf(s, "SI %d unicast MAC hash filter: 0x%08x%08x\n", + i, hash_h, hash_l); + + hash_l = enetc_port_rd(hw, ENETC4_PSIMMHFR0(i)); + hash_h = enetc_port_rd(hw, ENETC4_PSIMMHFR1(i)); + seq_printf(s, "SI %d multicast MAC hash filter: 0x%08x%08x\n", + i, hash_h, hash_l); +} + +static int enetc_mac_filter_show(struct seq_file *s, void *data) +{ + struct enetc_si *si = s->private; + struct enetc_hw *hw = &si->hw; + struct maft_entry_data maft; + struct enetc_pf *pf; + int i, err, num_si; + u32 val; + + pf = enetc_si_priv(si); + num_si = pf->caps.num_vsi + 1; + + val = enetc_port_rd(hw, ENETC4_PSIPMMR); + for (i = 0; i < num_si; i++) { + seq_printf(s, "SI %d Unicast Promiscuous mode: %s\n", i, + str_enabled_disabled(PSIPMMR_SI_MAC_UP(i) & val)); + seq_printf(s, "SI %d Multicast Promiscuous mode: %s\n", i, + str_enabled_disabled(PSIPMMR_SI_MAC_MP(i) & val)); + } + + /* MAC hash filter table */ + for (i = 0; i < num_si; i++) + enetc_show_si_mac_hash_filter(s, i); + + if (!pf->num_mfe) + return 0; + + /* MAC address filter table */ + seq_puts(s, "MAC address filter table\n"); + for (i = 0; i < pf->num_mfe; i++) { + memset(&maft, 0, sizeof(maft)); + err = ntmp_maft_query_entry(&si->ntmp_user, i, &maft); + if (err) + return err; + + seq_printf(s, "Entry %d, MAC: %pM, SI bitmap: 0x%04x\n", i, + maft.keye.mac_addr, le16_to_cpu(maft.cfge.si_bitmap)); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(enetc_mac_filter); + +void enetc_create_debugfs(struct enetc_si *si) +{ + struct net_device *ndev = si->ndev; + struct dentry *root; + + root = debugfs_create_dir(netdev_name(ndev), NULL); + if (IS_ERR(root)) + return; + + si->debugfs_root = root; + + debugfs_create_file("mac_filter", 0444, root, si, &enetc_mac_filter_fops); +} + +void enetc_remove_debugfs(struct enetc_si *si) +{ + debugfs_remove(si->debugfs_root); + si->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.h b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.h new file mode 100644 index 000000000000..96caca35f79d --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2025 NXP */ + +#ifndef __ENETC4_DEBUGFS_H +#define __ENETC4_DEBUGFS_H + +#if IS_ENABLED(CONFIG_DEBUG_FS) +void enetc_create_debugfs(struct enetc_si *si); +void enetc_remove_debugfs(struct enetc_si *si); +#else +static inline void enetc_create_debugfs(struct enetc_si *si) +{ +} + +static inline void enetc_remove_debugfs(struct enetc_si *si) +{ +} +#endif + +#endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 695cb07c74bc..aa25b445d301 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -99,6 +99,18 @@ #define ENETC4_PSICFGR2(a) ((a) * 0x80 + 0x2018) #define PSICFGR2_NUM_MSIX GENMASK(5, 0) +/* Port station interface a unicast MAC hash filter register 0/1 */ +#define ENETC4_PSIUMHFR0(a) ((a) * 0x80 + 0x2050) +#define ENETC4_PSIUMHFR1(a) ((a) * 0x80 + 0x2054) + +/* Port station interface a multicast MAC hash filter register 0/1 */ +#define ENETC4_PSIMMHFR0(a) ((a) * 0x80 + 0x2058) +#define ENETC4_PSIMMHFR1(a) ((a) * 0x80 + 0x205c) + +/* Port station interface a VLAN hash filter register 0/1 */ +#define ENETC4_PSIVHFR0(a) ((a) * 0x80 + 0x2060) +#define ENETC4_PSIVHFR1(a) ((a) * 0x80 + 0x2064) + #define ENETC4_PMCAPR 0x4004 #define PMCAPR_HD BIT(8) #define PMCAPR_FP GENMASK(10, 9) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 73ac8c6afb3a..c16378eb50bc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -8,9 +8,19 @@ #include <linux/unaligned.h> #include "enetc_pf_common.h" +#include "enetc4_debugfs.h" #define ENETC_SI_MAX_RING_NUM 8 +#define ENETC_MAC_FILTER_TYPE_UC BIT(0) +#define ENETC_MAC_FILTER_TYPE_MC BIT(1) +#define ENETC_MAC_FILTER_TYPE_ALL (ENETC_MAC_FILTER_TYPE_UC | \ + ENETC_MAC_FILTER_TYPE_MC) + +struct enetc_mac_addr { + u8 addr[ETH_ALEN]; +}; + static void enetc4_get_port_caps(struct enetc_pf *pf) { struct enetc_hw *hw = &pf->si->hw; @@ -26,6 +36,9 @@ static void enetc4_get_port_caps(struct enetc_pf *pf) val = enetc_port_rd(hw, ENETC4_PMCAPR); pf->caps.half_duplex = (val & PMCAPR_HD) ? 1 : 0; + + val = enetc_port_rd(hw, ENETC4_PSIMAFCAPR); + pf->caps.mac_filter_num = val & PSIMAFCAPR_NUM_MAC_AFTE; } static void enetc4_pf_set_si_primary_mac(struct enetc_hw *hw, int si, @@ -56,6 +69,200 @@ static void enetc4_pf_get_si_primary_mac(struct enetc_hw *hw, int si, put_unaligned_le16(lower, addr + 4); } +static void enetc4_pf_set_si_mac_promisc(struct enetc_hw *hw, int si, + bool uc_promisc, bool mc_promisc) +{ + u32 val = enetc_port_rd(hw, ENETC4_PSIPMMR); + + if (uc_promisc) + val |= PSIPMMR_SI_MAC_UP(si); + else + val &= ~PSIPMMR_SI_MAC_UP(si); + + if (mc_promisc) + val |= PSIPMMR_SI_MAC_MP(si); + else + val &= ~PSIPMMR_SI_MAC_MP(si); + + enetc_port_wr(hw, ENETC4_PSIPMMR, val); +} + +static void enetc4_pf_set_si_uc_hash_filter(struct enetc_hw *hw, int si, + u64 hash) +{ + enetc_port_wr(hw, ENETC4_PSIUMHFR0(si), lower_32_bits(hash)); + enetc_port_wr(hw, ENETC4_PSIUMHFR1(si), upper_32_bits(hash)); +} + +static void enetc4_pf_set_si_mc_hash_filter(struct enetc_hw *hw, int si, + u64 hash) +{ + enetc_port_wr(hw, ENETC4_PSIMMHFR0(si), lower_32_bits(hash)); + enetc_port_wr(hw, ENETC4_PSIMMHFR1(si), upper_32_bits(hash)); +} + +static void enetc4_pf_set_loopback(struct net_device *ndev, bool en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; + u32 val; + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val = u32_replace_bits(val, en ? 1 : 0, PM_CMD_CFG_LOOP_EN); + /* Default to select MAC level loopback mode if loopback is enabled. */ + val = u32_replace_bits(val, en ? LPBCK_MODE_MAC_LEVEL : 0, + PM_CMD_CFG_LPBK_MODE); + + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); +} + +static void enetc4_pf_clear_maft_entries(struct enetc_pf *pf) +{ + int i; + + for (i = 0; i < pf->num_mfe; i++) + ntmp_maft_delete_entry(&pf->si->ntmp_user, i); + + pf->num_mfe = 0; +} + +static int enetc4_pf_add_maft_entries(struct enetc_pf *pf, + struct enetc_mac_addr *mac, + int mac_cnt) +{ + struct maft_entry_data maft = {}; + u16 si_bit = BIT(0); + int i, err; + + maft.cfge.si_bitmap = cpu_to_le16(si_bit); + for (i = 0; i < mac_cnt; i++) { + ether_addr_copy(maft.keye.mac_addr, mac[i].addr); + err = ntmp_maft_add_entry(&pf->si->ntmp_user, i, &maft); + if (unlikely(err)) { + pf->num_mfe = i; + goto clear_maft_entries; + } + } + + pf->num_mfe = mac_cnt; + + return 0; + +clear_maft_entries: + enetc4_pf_clear_maft_entries(pf); + + return err; +} + +static int enetc4_pf_set_uc_exact_filter(struct enetc_pf *pf) +{ + int max_num_mfe = pf->caps.mac_filter_num; + struct enetc_mac_filter mac_filter = {}; + struct net_device *ndev = pf->si->ndev; + struct enetc_hw *hw = &pf->si->hw; + struct enetc_mac_addr *mac_tbl; + struct netdev_hw_addr *ha; + int i = 0, err; + int mac_cnt; + + netif_addr_lock_bh(ndev); + + mac_cnt = netdev_uc_count(ndev); + if (!mac_cnt) { + netif_addr_unlock_bh(ndev); + /* clear both MAC hash and exact filters */ + enetc4_pf_set_si_uc_hash_filter(hw, 0, 0); + enetc4_pf_clear_maft_entries(pf); + + return 0; + } + + if (mac_cnt > max_num_mfe) { + err = -ENOSPC; + goto unlock_netif_addr; + } + + mac_tbl = kcalloc(mac_cnt, sizeof(*mac_tbl), GFP_ATOMIC); + if (!mac_tbl) { + err = -ENOMEM; + goto unlock_netif_addr; + } + + netdev_for_each_uc_addr(ha, ndev) { + enetc_add_mac_addr_ht_filter(&mac_filter, ha->addr); + ether_addr_copy(mac_tbl[i++].addr, ha->addr); + } + + netif_addr_unlock_bh(ndev); + + /* Set temporary unicast hash filters in case of Rx loss when + * updating MAC address filter table + */ + enetc4_pf_set_si_uc_hash_filter(hw, 0, *mac_filter.mac_hash_table); + enetc4_pf_clear_maft_entries(pf); + + if (!enetc4_pf_add_maft_entries(pf, mac_tbl, i)) + enetc4_pf_set_si_uc_hash_filter(hw, 0, 0); + + kfree(mac_tbl); + + return 0; + +unlock_netif_addr: + netif_addr_unlock_bh(ndev); + + return err; +} + +static void enetc4_pf_set_mac_hash_filter(struct enetc_pf *pf, int type) +{ + struct net_device *ndev = pf->si->ndev; + struct enetc_mac_filter *mac_filter; + struct enetc_hw *hw = &pf->si->hw; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(ndev); + if (type & ENETC_MAC_FILTER_TYPE_UC) { + mac_filter = &pf->mac_filter[UC]; + enetc_reset_mac_addr_filter(mac_filter); + netdev_for_each_uc_addr(ha, ndev) + enetc_add_mac_addr_ht_filter(mac_filter, ha->addr); + + enetc4_pf_set_si_uc_hash_filter(hw, 0, + *mac_filter->mac_hash_table); + } + + if (type & ENETC_MAC_FILTER_TYPE_MC) { + mac_filter = &pf->mac_filter[MC]; + enetc_reset_mac_addr_filter(mac_filter); + netdev_for_each_mc_addr(ha, ndev) + enetc_add_mac_addr_ht_filter(mac_filter, ha->addr); + + enetc4_pf_set_si_mc_hash_filter(hw, 0, + *mac_filter->mac_hash_table); + } + netif_addr_unlock_bh(ndev); +} + +static void enetc4_pf_set_mac_filter(struct enetc_pf *pf, int type) +{ + /* Currently, the MAC address filter table (MAFT) only has 4 entries, + * and multiple multicast addresses for filtering will be configured + * in the default network configuration, so MAFT is only suitable for + * unicast filtering. If the number of unicast addresses exceeds the + * table capacity, the MAC hash filter will be used. + */ + if (type & ENETC_MAC_FILTER_TYPE_UC && enetc4_pf_set_uc_exact_filter(pf)) { + /* Fall back to the MAC hash filter */ + enetc4_pf_set_mac_hash_filter(pf, ENETC_MAC_FILTER_TYPE_UC); + /* Clear the old MAC exact filter */ + enetc4_pf_clear_maft_entries(pf); + } + + if (type & ENETC_MAC_FILTER_TYPE_MC) + enetc4_pf_set_mac_hash_filter(pf, ENETC_MAC_FILTER_TYPE_MC); +} + static const struct enetc_pf_ops enetc4_pf_ops = { .set_si_primary_mac = enetc4_pf_set_si_primary_mac, .get_si_primary_mac = enetc4_pf_get_si_primary_mac, @@ -226,24 +433,6 @@ static void enetc4_set_trx_frame_size(struct enetc_pf *pf) enetc4_pf_reset_tc_msdu(&si->hw); } -static void enetc4_set_rss_key(struct enetc_hw *hw, const u8 *bytes) -{ - int i; - - for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) - enetc_port_wr(hw, ENETC4_PRSSKR(i), ((u32 *)bytes)[i]); -} - -static void enetc4_set_default_rss_key(struct enetc_pf *pf) -{ - u8 hash_key[ENETC_RSSHASH_KEY_SIZE] = {0}; - struct enetc_hw *hw = &pf->si->hw; - - /* set up hash key */ - get_random_bytes(hash_key, ENETC_RSSHASH_KEY_SIZE); - enetc4_set_rss_key(hw, hash_key); -} - static void enetc4_enable_trx(struct enetc_pf *pf) { struct enetc_hw *hw = &pf->si->hw; @@ -256,10 +445,25 @@ static void enetc4_configure_port(struct enetc_pf *pf) { enetc4_configure_port_si(pf); enetc4_set_trx_frame_size(pf); - enetc4_set_default_rss_key(pf); + enetc_set_default_rss_key(pf); enetc4_enable_trx(pf); } +static int enetc4_init_ntmp_user(struct enetc_si *si) +{ + struct ntmp_user *user = &si->ntmp_user; + + /* For ENETC 4.1, all table versions are 0 */ + memset(&user->tbl, 0, sizeof(user->tbl)); + + return enetc4_setup_cbdr(si); +} + +static void enetc4_free_ntmp_user(struct enetc_si *si) +{ + enetc4_teardown_cbdr(si); +} + static int enetc4_pf_init(struct enetc_pf *pf) { struct device *dev = &pf->si->pdev->dev; @@ -272,17 +476,99 @@ static int enetc4_pf_init(struct enetc_pf *pf) return err; } + err = enetc4_init_ntmp_user(pf->si); + if (err) { + dev_err(dev, "Failed to init CBDR\n"); + return err; + } + enetc4_configure_port(pf); return 0; } +static void enetc4_pf_free(struct enetc_pf *pf) +{ + enetc4_free_ntmp_user(pf->si); +} + +static void enetc4_psi_do_set_rx_mode(struct work_struct *work) +{ + struct enetc_si *si = container_of(work, struct enetc_si, rx_mode_task); + struct enetc_pf *pf = enetc_si_priv(si); + struct net_device *ndev = si->ndev; + struct enetc_hw *hw = &si->hw; + bool uc_promisc = false; + bool mc_promisc = false; + int type = 0; + + rtnl_lock(); + + if (ndev->flags & IFF_PROMISC) { + uc_promisc = true; + mc_promisc = true; + } else if (ndev->flags & IFF_ALLMULTI) { + mc_promisc = true; + type = ENETC_MAC_FILTER_TYPE_UC; + } else { + type = ENETC_MAC_FILTER_TYPE_ALL; + } + + enetc4_pf_set_si_mac_promisc(hw, 0, uc_promisc, mc_promisc); + + if (uc_promisc) { + enetc4_pf_set_si_uc_hash_filter(hw, 0, 0); + enetc4_pf_clear_maft_entries(pf); + } + + if (mc_promisc) + enetc4_pf_set_si_mc_hash_filter(hw, 0, 0); + + /* Set new MAC filter */ + enetc4_pf_set_mac_filter(pf, type); + + rtnl_unlock(); +} + +static void enetc4_pf_set_rx_mode(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; + + queue_work(si->workqueue, &si->rx_mode_task); +} + +static int enetc4_pf_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { + bool promisc_en = !(features & NETIF_F_HW_VLAN_CTAG_FILTER); + + enetc4_pf_set_si_vlan_promisc(hw, 0, promisc_en); + } + + if (changed & NETIF_F_LOOPBACK) + enetc4_pf_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK)); + + enetc_set_features(ndev, features); + + return 0; +} + static const struct net_device_ops enetc4_ndev_ops = { .ndo_open = enetc_open, .ndo_stop = enetc_close, .ndo_start_xmit = enetc_xmit, .ndo_get_stats = enetc_get_stats, .ndo_set_mac_address = enetc_pf_set_mac_addr, + .ndo_set_rx_mode = enetc4_pf_set_rx_mode, + .ndo_set_features = enetc4_pf_set_features, + .ndo_vlan_rx_add_vid = enetc_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = enetc_vlan_rx_del_vid, }; static struct phylink_pcs * @@ -617,6 +903,19 @@ static void enetc4_link_deinit(struct enetc_ndev_priv *priv) enetc_mdiobus_destroy(pf); } +static int enetc4_psi_wq_task_init(struct enetc_si *si) +{ + char wq_name[24]; + + INIT_WORK(&si->rx_mode_task, enetc4_psi_do_set_rx_mode); + snprintf(wq_name, sizeof(wq_name), "enetc-%s", pci_name(si->pdev)); + si->workqueue = create_singlethread_workqueue(wq_name); + if (!si->workqueue) + return -ENOMEM; + + return 0; +} + static int enetc4_pf_netdev_create(struct enetc_si *si) { struct device *dev = &si->pdev->dev; @@ -657,6 +956,12 @@ static int enetc4_pf_netdev_create(struct enetc_si *si) if (err) goto err_link_init; + err = enetc4_psi_wq_task_init(si); + if (err) { + dev_err(dev, "Failed to init workqueue\n"); + goto err_wq_init; + } + err = register_netdev(ndev); if (err) { dev_err(dev, "Failed to register netdev\n"); @@ -667,6 +972,7 @@ static int enetc4_pf_netdev_create(struct enetc_si *si) err_reg_netdev: enetc4_link_deinit(priv); +err_wq_init: err_link_init: enetc_free_msix(priv); err_alloc_msix: @@ -683,11 +989,18 @@ static void enetc4_pf_netdev_destroy(struct enetc_si *si) struct net_device *ndev = si->ndev; unregister_netdev(ndev); + cancel_work(&si->rx_mode_task); + destroy_workqueue(si->workqueue); enetc4_link_deinit(priv); enetc_free_msix(priv); free_netdev(ndev); } +static const struct enetc_si_ops enetc4_psi_ops = { + .get_rss_table = enetc4_get_rss_table, + .set_rss_table = enetc4_set_rss_table, +}; + static int enetc4_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -712,6 +1025,7 @@ static int enetc4_pf_probe(struct pci_dev *pdev, "Couldn't map PF only space\n"); si->revision = enetc_get_ip_revision(&si->hw); + si->ops = &enetc4_psi_ops; err = enetc_get_driver_data(si); if (err) return dev_err_probe(dev, err, @@ -728,14 +1042,28 @@ static int enetc4_pf_probe(struct pci_dev *pdev, enetc_get_si_caps(si); - return enetc4_pf_netdev_create(si); + err = enetc4_pf_netdev_create(si); + if (err) + goto err_netdev_create; + + enetc_create_debugfs(si); + + return 0; + +err_netdev_create: + enetc4_pf_free(pf); + + return err; } static void enetc4_pf_remove(struct pci_dev *pdev) { struct enetc_si *si = pci_get_drvdata(pdev); + struct enetc_pf *pf = enetc_si_priv(si); + enetc_remove_debugfs(si); enetc4_pf_netdev_destroy(si); + enetc4_pf_free(pf); } static const struct pci_device_id enetc4_pf_id_table[] = { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c index 20bfdf7fb4b4..3d5f31879d5c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -60,6 +60,44 @@ void enetc_teardown_cbdr(struct enetc_cbdr *cbdr) } EXPORT_SYMBOL_GPL(enetc_teardown_cbdr); +int enetc4_setup_cbdr(struct enetc_si *si) +{ + struct ntmp_user *user = &si->ntmp_user; + struct device *dev = &si->pdev->dev; + struct enetc_hw *hw = &si->hw; + struct netc_cbdr_regs regs; + + user->cbdr_num = 1; + user->dev = dev; + user->ring = devm_kcalloc(dev, user->cbdr_num, + sizeof(struct netc_cbdr), GFP_KERNEL); + if (!user->ring) + return -ENOMEM; + + /* set CBDR cache attributes */ + enetc_wr(hw, ENETC_SICAR2, + ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); + + regs.pir = hw->reg + ENETC_SICBDRPIR; + regs.cir = hw->reg + ENETC_SICBDRCIR; + regs.mr = hw->reg + ENETC_SICBDRMR; + regs.bar0 = hw->reg + ENETC_SICBDRBAR0; + regs.bar1 = hw->reg + ENETC_SICBDRBAR1; + regs.lenr = hw->reg + ENETC_SICBDRLENR; + + return ntmp_init_cbdr(user->ring, dev, ®s); +} +EXPORT_SYMBOL_GPL(enetc4_setup_cbdr); + +void enetc4_teardown_cbdr(struct enetc_si *si) +{ + struct ntmp_user *user = &si->ntmp_user; + + ntmp_free_cbdr(user->ring); + user->dev = NULL; +} +EXPORT_SYMBOL_GPL(enetc4_teardown_cbdr); + static void enetc_clean_cbdr(struct enetc_cbdr *ring) { struct enetc_cbd *dest_cbd; @@ -256,3 +294,15 @@ int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count) return enetc_cmd_rss_table(si, (u32 *)table, count, false); } EXPORT_SYMBOL_GPL(enetc_set_rss_table); + +int enetc4_get_rss_table(struct enetc_si *si, u32 *table, int count) +{ + return ntmp_rsst_query_entry(&si->ntmp_user, table, count); +} +EXPORT_SYMBOL_GPL(enetc4_get_rss_table); + +int enetc4_set_rss_table(struct enetc_si *si, const u32 *table, int count) +{ + return ntmp_rsst_update_entry(&si->ntmp_user, table, count); +} +EXPORT_SYMBOL_GPL(enetc4_set_rss_table); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index ece3ae28ba82..d38cd36be4a6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -141,7 +141,7 @@ static const struct { static const struct { int reg; - char name[ETH_GSTRING_LEN]; + char name[ETH_GSTRING_LEN] __nonstring; } enetc_port_counters[] = { { ENETC_PM_REOCT(0), "MAC rx ethernet octets" }, { ENETC_PM_RALN(0), "MAC rx alignment errors" }, @@ -264,7 +264,7 @@ static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) break; for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) - ethtool_puts(&data, enetc_port_counters[i].name); + ethtool_cpy(&data, enetc_port_counters[i].name); break; } @@ -625,6 +625,29 @@ static int enetc_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc, return 0; } +/* i.MX95 ENETC does not support RFS table, but we can use ingress port + * filter table to implement Wake-on-LAN filter or drop the matched flow, + * so the implementation will be different from enetc_get_rxnfc() and + * enetc_set_rxnfc(). Therefore, add enetc4_get_rxnfc() for ENETC v4 PF. + */ +static int enetc4_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc, + u32 *rule_locs) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + switch (rxnfc->cmd) { + case ETHTOOL_GRXRINGS: + rxnfc->data = priv->num_rx_rings; + break; + case ETHTOOL_GRXFH: + return enetc_get_rsshash(rxnfc); + default: + return -EOPNOTSUPP; + } + + return 0; +} + static int enetc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -677,36 +700,53 @@ static u32 enetc_get_rxfh_indir_size(struct net_device *ndev) return priv->si->num_rss; } +static int enetc_get_rss_key_base(struct enetc_si *si) +{ + if (is_enetc_rev1(si)) + return ENETC_PRSSK(0); + + return ENETC4_PRSSKR(0); +} + +static void enetc_get_rss_key(struct enetc_si *si, const u8 *key) +{ + int base = enetc_get_rss_key_base(si); + struct enetc_hw *hw = &si->hw; + int i; + + for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) + ((u32 *)key)[i] = enetc_port_rd(hw, base + i * 4); +} + static int enetc_get_rxfh(struct net_device *ndev, struct ethtool_rxfh_param *rxfh) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; - int err = 0, i; + struct enetc_si *si = priv->si; + int err = 0; /* return hash function */ rxfh->hfunc = ETH_RSS_HASH_TOP; /* return hash key */ - if (rxfh->key && hw->port) - for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) - ((u32 *)rxfh->key)[i] = enetc_port_rd(hw, - ENETC_PRSSK(i)); + if (rxfh->key && enetc_si_is_pf(si)) + enetc_get_rss_key(si, rxfh->key); /* return RSS table */ if (rxfh->indir) - err = enetc_get_rss_table(priv->si, rxfh->indir, - priv->si->num_rss); + err = si->ops->get_rss_table(si, rxfh->indir, si->num_rss); return err; } -void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes) +void enetc_set_rss_key(struct enetc_si *si, const u8 *bytes) { + int base = enetc_get_rss_key_base(si); + struct enetc_hw *hw = &si->hw; int i; for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) - enetc_port_wr(hw, ENETC_PRSSK(i), ((u32 *)bytes)[i]); + enetc_port_wr(hw, base + i * 4, ((u32 *)bytes)[i]); } EXPORT_SYMBOL_GPL(enetc_set_rss_key); @@ -715,17 +755,16 @@ static int enetc_set_rxfh(struct net_device *ndev, struct netlink_ext_ack *extack) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; int err = 0; /* set hash key, if PF */ - if (rxfh->key && hw->port) - enetc_set_rss_key(hw, rxfh->key); + if (rxfh->key && enetc_si_is_pf(si)) + enetc_set_rss_key(si, rxfh->key); /* set RSS table */ if (rxfh->indir) - err = enetc_set_rss_table(priv->si, rxfh->indir, - priv->si->num_rss); + err = si->ops->set_rss_table(si, rxfh->indir, si->num_rss); return err; } @@ -1240,6 +1279,11 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = { .set_wol = enetc_set_wol, .get_pauseparam = enetc_get_pauseparam, .set_pauseparam = enetc_set_pauseparam, + .get_rxnfc = enetc4_get_rxnfc, + .get_rxfh_key_size = enetc_get_rxfh_key_size, + .get_rxfh_indir_size = enetc_get_rxfh_indir_size, + .get_rxfh = enetc_get_rxfh, + .set_rxfh = enetc_set_rxfh, }; void enetc_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 203862ec1114..f63a29e2e031 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -72,30 +72,6 @@ static void enetc_set_isol_vlan(struct enetc_hw *hw, int si, u16 vlan, u8 qos) enetc_port_wr(hw, ENETC_PSIVLANR(si), val); } -static int enetc_mac_addr_hash_idx(const u8 *addr) -{ - u64 fold = __swab64(ether_addr_to_u64(addr)) >> 16; - u64 mask = 0; - int res = 0; - int i; - - for (i = 0; i < 8; i++) - mask |= BIT_ULL(i * 6); - - for (i = 0; i < 6; i++) - res |= (hweight64(fold & (mask << i)) & 0x1) << i; - - return res; -} - -static void enetc_reset_mac_addr_filter(struct enetc_mac_filter *filter) -{ - filter->mac_addr_cnt = 0; - - bitmap_zero(filter->mac_hash_table, - ENETC_MADDR_HASH_TBL_SZ); -} - static void enetc_add_mac_addr_em_filter(struct enetc_mac_filter *filter, const unsigned char *addr) { @@ -104,16 +80,6 @@ static void enetc_add_mac_addr_em_filter(struct enetc_mac_filter *filter, filter->mac_addr_cnt++; } -static void enetc_add_mac_addr_ht_filter(struct enetc_mac_filter *filter, - const unsigned char *addr) -{ - int idx = enetc_mac_addr_hash_idx(addr); - - /* add hash table entry */ - __set_bit(idx, filter->mac_hash_table); - filter->mac_addr_cnt++; -} - static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type) { bool err = si->errata & ENETC_ERR_UCMCSWP; @@ -250,67 +216,6 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) enetc_port_wr(hw, ENETC_PSIPMR, psipmr); } -static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx, - unsigned long hash) -{ - enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), lower_32_bits(hash)); - enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), upper_32_bits(hash)); -} - -static int enetc_vid_hash_idx(unsigned int vid) -{ - int res = 0; - int i; - - for (i = 0; i < 6; i++) - res |= (hweight8(vid & (BIT(i) | BIT(i + 6))) & 0x1) << i; - - return res; -} - -static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash) -{ - int i; - - if (rehash) { - bitmap_zero(pf->vlan_ht_filter, ENETC_VLAN_HT_SIZE); - - for_each_set_bit(i, pf->active_vlans, VLAN_N_VID) { - int hidx = enetc_vid_hash_idx(i); - - __set_bit(hidx, pf->vlan_ht_filter); - } - } - - enetc_set_vlan_ht_filter(&pf->si->hw, 0, *pf->vlan_ht_filter); -} - -static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid) -{ - struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_pf *pf = enetc_si_priv(priv->si); - int idx; - - __set_bit(vid, pf->active_vlans); - - idx = enetc_vid_hash_idx(vid); - if (!__test_and_set_bit(idx, pf->vlan_ht_filter)) - enetc_sync_vlan_ht_filter(pf, false); - - return 0; -} - -static int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid) -{ - struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_pf *pf = enetc_si_priv(priv->si); - - __clear_bit(vid, pf->active_vlans); - enetc_sync_vlan_ht_filter(pf, true); - - return 0; -} - static void enetc_set_loopback(struct net_device *ndev, bool en) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -549,7 +454,6 @@ static void enetc_mac_enable(struct enetc_si *si, bool en) static void enetc_configure_port(struct enetc_pf *pf) { - u8 hash_key[ENETC_RSSHASH_KEY_SIZE]; struct enetc_hw *hw = &pf->si->hw; enetc_configure_port_mac(pf->si); @@ -557,8 +461,7 @@ static void enetc_configure_port(struct enetc_pf *pf) enetc_port_si_configure(pf->si); /* set up hash key */ - get_random_bytes(hash_key, ENETC_RSSHASH_KEY_SIZE); - enetc_set_rss_key(hw, hash_key); + enetc_set_default_rss_key(pf); /* split up RFS entries */ enetc_port_assign_rfs_entries(pf->si); @@ -728,6 +631,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_setup_tc = enetc_pf_setup_tc, .ndo_bpf = enetc_setup_bpf, .ndo_xdp_xmit = enetc_xdp_xmit, + .ndo_hwtstamp_get = enetc_hwtstamp_get, + .ndo_hwtstamp_set = enetc_hwtstamp_set, }; static struct phylink_pcs * @@ -939,6 +844,11 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev) return enetc_ierb_register_pf(ierb_pdev, pdev); } +static const struct enetc_si_ops enetc_psi_ops = { + .get_rss_table = enetc_get_rss_table, + .set_rss_table = enetc_set_rss_table, +}; + static struct enetc_si *enetc_psi_create(struct pci_dev *pdev) { struct enetc_si *si; @@ -958,6 +868,7 @@ static struct enetc_si *enetc_psi_create(struct pci_dev *pdev) } si->revision = enetc_get_ip_revision(&si->hw); + si->ops = &enetc_psi_ops; err = enetc_get_driver_data(si); if (err) { dev_err(&pdev->dev, "Could not get PF driver data\n"); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h index a26a12863855..ae407e9e9ee7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h @@ -5,19 +5,8 @@ #include <linux/phylink.h> #define ENETC_PF_NUM_RINGS 8 - -enum enetc_mac_addr_type {UC, MC, MADDR_TYPE}; #define ENETC_MAX_NUM_MAC_FLT ((ENETC_MAX_NUM_VFS + 1) * MADDR_TYPE) -#define ENETC_MADDR_HASH_TBL_SZ 64 -struct enetc_mac_filter { - union { - char mac_addr[ETH_ALEN]; - DECLARE_BITMAP(mac_hash_table, ENETC_MADDR_HASH_TBL_SZ); - }; - int mac_addr_cnt; -}; - #define ENETC_VLAN_HT_SIZE 64 enum enetc_vf_flags { @@ -34,6 +23,7 @@ struct enetc_port_caps { int num_msix; int num_rx_bdr; int num_tx_bdr; + int mac_filter_num; }; struct enetc_pf; @@ -71,6 +61,8 @@ struct enetc_pf { struct enetc_port_caps caps; const struct enetc_pf_ops *ops; + + int num_mfe; /* number of mac address filter table entries */ }; #define phylink_to_enetc_pf(config) \ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index 3fd9b0727875..edf14a95cab7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -128,14 +128,14 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, if (si->hw_features & ENETC_SI_F_LSO) priv->active_offloads |= ENETC_F_LSO; - /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ - if (!is_enetc_rev1(si)) { - ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK); - goto end; + if (si->num_rss) { + ndev->hw_features |= NETIF_F_RXHASH; + ndev->features |= NETIF_F_RXHASH; } - if (si->num_rss) - ndev->hw_features |= NETIF_F_RXHASH; + /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ + if (!is_enetc_rev1(si)) + goto end; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | @@ -341,5 +341,86 @@ void enetc_phylink_destroy(struct enetc_ndev_priv *priv) } EXPORT_SYMBOL_GPL(enetc_phylink_destroy); +void enetc_set_default_rss_key(struct enetc_pf *pf) +{ + u8 hash_key[ENETC_RSSHASH_KEY_SIZE] = {0}; + + /* set up hash key */ + get_random_bytes(hash_key, ENETC_RSSHASH_KEY_SIZE); + enetc_set_rss_key(pf->si, hash_key); +} +EXPORT_SYMBOL_GPL(enetc_set_default_rss_key); + +static int enetc_vid_hash_idx(unsigned int vid) +{ + int res = 0; + int i; + + for (i = 0; i < 6; i++) + res |= (hweight8(vid & (BIT(i) | BIT(i + 6))) & 0x1) << i; + + return res; +} + +static void enetc_refresh_vlan_ht_filter(struct enetc_pf *pf) +{ + int i; + + bitmap_zero(pf->vlan_ht_filter, ENETC_VLAN_HT_SIZE); + for_each_set_bit(i, pf->active_vlans, VLAN_N_VID) { + int hidx = enetc_vid_hash_idx(i); + + __set_bit(hidx, pf->vlan_ht_filter); + } +} + +static void enetc_set_si_vlan_ht_filter(struct enetc_si *si, + int si_id, u64 hash) +{ + struct enetc_hw *hw = &si->hw; + int high_reg_off, low_reg_off; + + if (is_enetc_rev1(si)) { + low_reg_off = ENETC_PSIVHFR0(si_id); + high_reg_off = ENETC_PSIVHFR1(si_id); + } else { + low_reg_off = ENETC4_PSIVHFR0(si_id); + high_reg_off = ENETC4_PSIVHFR1(si_id); + } + + enetc_port_wr(hw, low_reg_off, lower_32_bits(hash)); + enetc_port_wr(hw, high_reg_off, upper_32_bits(hash)); +} + +int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + int idx; + + __set_bit(vid, pf->active_vlans); + + idx = enetc_vid_hash_idx(vid); + if (!__test_and_set_bit(idx, pf->vlan_ht_filter)) + enetc_set_si_vlan_ht_filter(pf->si, 0, *pf->vlan_ht_filter); + + return 0; +} +EXPORT_SYMBOL_GPL(enetc_vlan_rx_add_vid); + +int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + + if (__test_and_clear_bit(vid, pf->active_vlans)) { + enetc_refresh_vlan_ht_filter(pf); + enetc_set_si_vlan_ht_filter(pf->si, 0, *pf->vlan_ht_filter); + } + + return 0; +} +EXPORT_SYMBOL_GPL(enetc_vlan_rx_del_vid); + MODULE_DESCRIPTION("NXP ENETC PF common functionality driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.h b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.h index 48f55ee743ad..96d4840a3107 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.h @@ -12,6 +12,9 @@ void enetc_mdiobus_destroy(struct enetc_pf *pf); int enetc_phylink_create(struct enetc_ndev_priv *priv, struct device_node *node, const struct phylink_mac_ops *ops); void enetc_phylink_destroy(struct enetc_ndev_priv *priv); +void enetc_set_default_rss_key(struct enetc_pf *pf); +int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid); +int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid); static inline u16 enetc_get_ip_revision(struct enetc_hw *hw) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 3768752b6008..6c4b374bcb0e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -121,6 +121,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_features = enetc_vf_set_features, .ndo_eth_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_vf_setup_tc, + .ndo_hwtstamp_get = enetc_hwtstamp_get, + .ndo_hwtstamp_set = enetc_hwtstamp_set, }; static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, @@ -155,13 +157,20 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; - if (si->num_rss) + if (si->num_rss) { ndev->hw_features |= NETIF_F_RXHASH; + ndev->features |= NETIF_F_RXHASH; + } /* pick up primary MAC address from SI */ enetc_load_primary_mac_addr(&si->hw, ndev); } +static const struct enetc_si_ops enetc_vsi_ops = { + .get_rss_table = enetc_get_rss_table, + .set_rss_table = enetc_set_rss_table, +}; + static int enetc_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -176,6 +185,7 @@ static int enetc_vf_probe(struct pci_dev *pdev, si = pci_get_drvdata(pdev); si->revision = ENETC_REV_1_0; + si->ops = &enetc_vsi_ops; err = enetc_get_driver_data(si); if (err) { dev_err_probe(&pdev->dev, err, diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c new file mode 100644 index 000000000000..ba32c1bbd9e1 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/ntmp.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * NETC NTMP (NETC Table Management Protocol) 2.0 Library + * Copyright 2025 NXP + */ + +#include <linux/dma-mapping.h> +#include <linux/fsl/netc_global.h> +#include <linux/iopoll.h> + +#include "ntmp_private.h" + +#define NETC_CBDR_TIMEOUT 1000 /* us */ +#define NETC_CBDR_DELAY_US 10 +#define NETC_CBDR_MR_EN BIT(31) + +#define NTMP_BASE_ADDR_ALIGN 128 +#define NTMP_DATA_ADDR_ALIGN 32 + +/* Define NTMP Table ID */ +#define NTMP_MAFT_ID 1 +#define NTMP_RSST_ID 3 + +/* Generic Update Actions for most tables */ +#define NTMP_GEN_UA_CFGEU BIT(0) +#define NTMP_GEN_UA_STSEU BIT(1) + +#define NTMP_ENTRY_ID_SIZE 4 +#define RSST_ENTRY_NUM 64 +#define RSST_STSE_DATA_SIZE(n) ((n) * 8) +#define RSST_CFGE_DATA_SIZE(n) (n) + +int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs) +{ + int cbd_num = NETC_CBDR_BD_NUM; + size_t size; + + size = cbd_num * sizeof(union netc_cbd) + NTMP_BASE_ADDR_ALIGN; + cbdr->addr_base = dma_alloc_coherent(dev, size, &cbdr->dma_base, + GFP_KERNEL); + if (!cbdr->addr_base) + return -ENOMEM; + + cbdr->dma_size = size; + cbdr->bd_num = cbd_num; + cbdr->regs = *regs; + cbdr->dev = dev; + + /* The base address of the Control BD Ring must be 128 bytes aligned */ + cbdr->dma_base_align = ALIGN(cbdr->dma_base, NTMP_BASE_ADDR_ALIGN); + cbdr->addr_base_align = PTR_ALIGN(cbdr->addr_base, + NTMP_BASE_ADDR_ALIGN); + + cbdr->next_to_clean = 0; + cbdr->next_to_use = 0; + spin_lock_init(&cbdr->ring_lock); + + /* Step 1: Configure the base address of the Control BD Ring */ + netc_write(cbdr->regs.bar0, lower_32_bits(cbdr->dma_base_align)); + netc_write(cbdr->regs.bar1, upper_32_bits(cbdr->dma_base_align)); + + /* Step 2: Configure the producer index register */ + netc_write(cbdr->regs.pir, cbdr->next_to_clean); + + /* Step 3: Configure the consumer index register */ + netc_write(cbdr->regs.cir, cbdr->next_to_use); + + /* Step4: Configure the number of BDs of the Control BD Ring */ + netc_write(cbdr->regs.lenr, cbdr->bd_num); + + /* Step 5: Enable the Control BD Ring */ + netc_write(cbdr->regs.mr, NETC_CBDR_MR_EN); + + return 0; +} +EXPORT_SYMBOL_GPL(ntmp_init_cbdr); + +void ntmp_free_cbdr(struct netc_cbdr *cbdr) +{ + /* Disable the Control BD Ring */ + netc_write(cbdr->regs.mr, 0); + dma_free_coherent(cbdr->dev, cbdr->dma_size, cbdr->addr_base, + cbdr->dma_base); + memset(cbdr, 0, sizeof(*cbdr)); +} +EXPORT_SYMBOL_GPL(ntmp_free_cbdr); + +static int ntmp_get_free_cbd_num(struct netc_cbdr *cbdr) +{ + return (cbdr->next_to_clean - cbdr->next_to_use - 1 + + cbdr->bd_num) % cbdr->bd_num; +} + +static union netc_cbd *ntmp_get_cbd(struct netc_cbdr *cbdr, int index) +{ + return &((union netc_cbd *)(cbdr->addr_base_align))[index]; +} + +static void ntmp_clean_cbdr(struct netc_cbdr *cbdr) +{ + union netc_cbd *cbd; + int i; + + i = cbdr->next_to_clean; + while (netc_read(cbdr->regs.cir) != i) { + cbd = ntmp_get_cbd(cbdr, i); + memset(cbd, 0, sizeof(*cbd)); + i = (i + 1) % cbdr->bd_num; + } + + cbdr->next_to_clean = i; +} + +static int netc_xmit_ntmp_cmd(struct ntmp_user *user, union netc_cbd *cbd) +{ + union netc_cbd *cur_cbd; + struct netc_cbdr *cbdr; + int i, err; + u16 status; + u32 val; + + /* Currently only i.MX95 ENETC is supported, and it only has one + * command BD ring + */ + cbdr = &user->ring[0]; + + spin_lock_bh(&cbdr->ring_lock); + + if (unlikely(!ntmp_get_free_cbd_num(cbdr))) + ntmp_clean_cbdr(cbdr); + + i = cbdr->next_to_use; + cur_cbd = ntmp_get_cbd(cbdr, i); + *cur_cbd = *cbd; + dma_wmb(); + + /* Update producer index of both software and hardware */ + i = (i + 1) % cbdr->bd_num; + cbdr->next_to_use = i; + netc_write(cbdr->regs.pir, i); + + err = read_poll_timeout_atomic(netc_read, val, val == i, + NETC_CBDR_DELAY_US, NETC_CBDR_TIMEOUT, + true, cbdr->regs.cir); + if (unlikely(err)) + goto cbdr_unlock; + + dma_rmb(); + /* Get the writeback command BD, because the caller may need + * to check some other fields of the response header. + */ + *cbd = *cur_cbd; + + /* Check the writeback error status */ + status = le16_to_cpu(cbd->resp_hdr.error_rr) & NTMP_RESP_ERROR; + if (unlikely(status)) { + err = -EIO; + dev_err(user->dev, "Command BD error: 0x%04x\n", status); + } + + ntmp_clean_cbdr(cbdr); + dma_wmb(); + +cbdr_unlock: + spin_unlock_bh(&cbdr->ring_lock); + + return err; +} + +static int ntmp_alloc_data_mem(struct ntmp_dma_buf *data, void **buf_align) +{ + void *buf; + + buf = dma_alloc_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN, + &data->dma, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + data->buf = buf; + *buf_align = PTR_ALIGN(buf, NTMP_DATA_ADDR_ALIGN); + + return 0; +} + +static void ntmp_free_data_mem(struct ntmp_dma_buf *data) +{ + dma_free_coherent(data->dev, data->size + NTMP_DATA_ADDR_ALIGN, + data->buf, data->dma); +} + +static void ntmp_fill_request_hdr(union netc_cbd *cbd, dma_addr_t dma, + int len, int table_id, int cmd, + int access_method) +{ + dma_addr_t dma_align; + + memset(cbd, 0, sizeof(*cbd)); + dma_align = ALIGN(dma, NTMP_DATA_ADDR_ALIGN); + cbd->req_hdr.addr = cpu_to_le64(dma_align); + cbd->req_hdr.len = cpu_to_le32(len); + cbd->req_hdr.cmd = cmd; + cbd->req_hdr.access_method = FIELD_PREP(NTMP_ACCESS_METHOD, + access_method); + cbd->req_hdr.table_id = table_id; + cbd->req_hdr.ver_cci_rr = FIELD_PREP(NTMP_HDR_VERSION, + NTMP_HDR_VER2); + /* For NTMP version 2.0 or later version */ + cbd->req_hdr.npf = cpu_to_le32(NTMP_NPF); +} + +static void ntmp_fill_crd(struct ntmp_cmn_req_data *crd, u8 tblv, + u8 qa, u16 ua) +{ + crd->update_act = cpu_to_le16(ua); + crd->tblv_qact = NTMP_TBLV_QACT(tblv, qa); +} + +static void ntmp_fill_crd_eid(struct ntmp_req_by_eid *rbe, u8 tblv, + u8 qa, u16 ua, u32 entry_id) +{ + ntmp_fill_crd(&rbe->crd, tblv, qa, ua); + rbe->entry_id = cpu_to_le32(entry_id); +} + +static const char *ntmp_table_name(int tbl_id) +{ + switch (tbl_id) { + case NTMP_MAFT_ID: + return "MAC Address Filter Table"; + case NTMP_RSST_ID: + return "RSS Table"; + default: + return "Unknown Table"; + }; +} + +static int ntmp_delete_entry_by_id(struct ntmp_user *user, int tbl_id, + u8 tbl_ver, u32 entry_id, u32 req_len, + u32 resp_len) +{ + struct ntmp_dma_buf data = { + .dev = user->dev, + .size = max(req_len, resp_len), + }; + struct ntmp_req_by_eid *req; + union netc_cbd cbd; + int err; + + err = ntmp_alloc_data_mem(&data, (void **)&req); + if (err) + return err; + + ntmp_fill_crd_eid(req, tbl_ver, 0, 0, entry_id); + ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(req_len, resp_len), + tbl_id, NTMP_CMD_DELETE, NTMP_AM_ENTRY_ID); + + err = netc_xmit_ntmp_cmd(user, &cbd); + if (err) + dev_err(user->dev, + "Failed to delete entry 0x%x of %s, err: %pe", + entry_id, ntmp_table_name(tbl_id), ERR_PTR(err)); + + ntmp_free_data_mem(&data); + + return err; +} + +static int ntmp_query_entry_by_id(struct ntmp_user *user, int tbl_id, + u32 len, struct ntmp_req_by_eid *req, + dma_addr_t dma, bool compare_eid) +{ + struct ntmp_cmn_resp_query *resp; + int cmd = NTMP_CMD_QUERY; + union netc_cbd cbd; + u32 entry_id; + int err; + + entry_id = le32_to_cpu(req->entry_id); + if (le16_to_cpu(req->crd.update_act)) + cmd = NTMP_CMD_QU; + + /* Request header */ + ntmp_fill_request_hdr(&cbd, dma, len, tbl_id, cmd, NTMP_AM_ENTRY_ID); + err = netc_xmit_ntmp_cmd(user, &cbd); + if (err) { + dev_err(user->dev, + "Failed to query entry 0x%x of %s, err: %pe\n", + entry_id, ntmp_table_name(tbl_id), ERR_PTR(err)); + return err; + } + + /* For a few tables, the first field of their response data is not + * entry_id, so directly return success. + */ + if (!compare_eid) + return 0; + + resp = (struct ntmp_cmn_resp_query *)req; + if (unlikely(le32_to_cpu(resp->entry_id) != entry_id)) { + dev_err(user->dev, + "%s: query EID 0x%x doesn't match response EID 0x%x\n", + ntmp_table_name(tbl_id), entry_id, le32_to_cpu(resp->entry_id)); + return -EIO; + } + + return 0; +} + +int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + struct ntmp_dma_buf data = { + .dev = user->dev, + .size = sizeof(struct maft_req_add), + }; + struct maft_req_add *req; + union netc_cbd cbd; + int err; + + err = ntmp_alloc_data_mem(&data, (void **)&req); + if (err) + return err; + + /* Set mac address filter table request data buffer */ + ntmp_fill_crd_eid(&req->rbe, user->tbl.maft_ver, 0, 0, entry_id); + req->keye = maft->keye; + req->cfge = maft->cfge; + + ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0), + NTMP_MAFT_ID, NTMP_CMD_ADD, NTMP_AM_ENTRY_ID); + err = netc_xmit_ntmp_cmd(user, &cbd); + if (err) + dev_err(user->dev, "Failed to add MAFT entry 0x%x, err: %pe\n", + entry_id, ERR_PTR(err)); + + ntmp_free_data_mem(&data); + + return err; +} +EXPORT_SYMBOL_GPL(ntmp_maft_add_entry); + +int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + struct ntmp_dma_buf data = { + .dev = user->dev, + .size = sizeof(struct maft_resp_query), + }; + struct maft_resp_query *resp; + struct ntmp_req_by_eid *req; + int err; + + err = ntmp_alloc_data_mem(&data, (void **)&req); + if (err) + return err; + + ntmp_fill_crd_eid(req, user->tbl.maft_ver, 0, 0, entry_id); + err = ntmp_query_entry_by_id(user, NTMP_MAFT_ID, + NTMP_LEN(sizeof(*req), data.size), + req, data.dma, true); + if (err) + goto end; + + resp = (struct maft_resp_query *)req; + maft->keye = resp->keye; + maft->cfge = resp->cfge; + +end: + ntmp_free_data_mem(&data); + + return err; +} +EXPORT_SYMBOL_GPL(ntmp_maft_query_entry); + +int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id) +{ + return ntmp_delete_entry_by_id(user, NTMP_MAFT_ID, user->tbl.maft_ver, + entry_id, NTMP_EID_REQ_LEN, 0); +} +EXPORT_SYMBOL_GPL(ntmp_maft_delete_entry); + +int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table, + int count) +{ + struct ntmp_dma_buf data = {.dev = user->dev}; + struct rsst_req_update *req; + union netc_cbd cbd; + int err, i; + + if (count != RSST_ENTRY_NUM) + /* HW only takes in a full 64 entry table */ + return -EINVAL; + + data.size = struct_size(req, groups, count); + err = ntmp_alloc_data_mem(&data, (void **)&req); + if (err) + return err; + + /* Set the request data buffer */ + ntmp_fill_crd_eid(&req->rbe, user->tbl.rsst_ver, 0, + NTMP_GEN_UA_CFGEU | NTMP_GEN_UA_STSEU, 0); + for (i = 0; i < count; i++) + req->groups[i] = (u8)(table[i]); + + ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(data.size, 0), + NTMP_RSST_ID, NTMP_CMD_UPDATE, NTMP_AM_ENTRY_ID); + + err = netc_xmit_ntmp_cmd(user, &cbd); + if (err) + dev_err(user->dev, "Failed to update RSST entry, err: %pe\n", + ERR_PTR(err)); + + ntmp_free_data_mem(&data); + + return err; +} +EXPORT_SYMBOL_GPL(ntmp_rsst_update_entry); + +int ntmp_rsst_query_entry(struct ntmp_user *user, u32 *table, int count) +{ + struct ntmp_dma_buf data = {.dev = user->dev}; + struct ntmp_req_by_eid *req; + union netc_cbd cbd; + int err, i; + u8 *group; + + if (count != RSST_ENTRY_NUM) + /* HW only takes in a full 64 entry table */ + return -EINVAL; + + data.size = NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count) + + RSST_CFGE_DATA_SIZE(count); + err = ntmp_alloc_data_mem(&data, (void **)&req); + if (err) + return err; + + /* Set the request data buffer */ + ntmp_fill_crd_eid(req, user->tbl.rsst_ver, 0, 0, 0); + ntmp_fill_request_hdr(&cbd, data.dma, NTMP_LEN(sizeof(*req), data.size), + NTMP_RSST_ID, NTMP_CMD_QUERY, NTMP_AM_ENTRY_ID); + err = netc_xmit_ntmp_cmd(user, &cbd); + if (err) { + dev_err(user->dev, "Failed to query RSST entry, err: %pe\n", + ERR_PTR(err)); + goto end; + } + + group = (u8 *)req; + group += NTMP_ENTRY_ID_SIZE + RSST_STSE_DATA_SIZE(count); + for (i = 0; i < count; i++) + table[i] = group[i]; + +end: + ntmp_free_data_mem(&data); + + return err; +} +EXPORT_SYMBOL_GPL(ntmp_rsst_query_entry); + +MODULE_DESCRIPTION("NXP NETC Library"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/freescale/enetc/ntmp_private.h b/drivers/net/ethernet/freescale/enetc/ntmp_private.h new file mode 100644 index 000000000000..34394e40fddd --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/ntmp_private.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * NTMP table request and response data buffer formats + * Copyright 2025 NXP + */ + +#ifndef __NTMP_PRIVATE_H +#define __NTMP_PRIVATE_H + +#include <linux/bitfield.h> +#include <linux/fsl/ntmp.h> + +#define NTMP_EID_REQ_LEN 8 +#define NETC_CBDR_BD_NUM 256 + +union netc_cbd { + struct { + __le64 addr; + __le32 len; +#define NTMP_RESP_LEN GENMASK(19, 0) +#define NTMP_REQ_LEN GENMASK(31, 20) +#define NTMP_LEN(req, resp) (FIELD_PREP(NTMP_REQ_LEN, (req)) | \ + ((resp) & NTMP_RESP_LEN)) + u8 cmd; +#define NTMP_CMD_DELETE BIT(0) +#define NTMP_CMD_UPDATE BIT(1) +#define NTMP_CMD_QUERY BIT(2) +#define NTMP_CMD_ADD BIT(3) +#define NTMP_CMD_QU (NTMP_CMD_QUERY | NTMP_CMD_UPDATE) + u8 access_method; +#define NTMP_ACCESS_METHOD GENMASK(7, 4) +#define NTMP_AM_ENTRY_ID 0 +#define NTMP_AM_EXACT_KEY 1 +#define NTMP_AM_SEARCH 2 +#define NTMP_AM_TERNARY_KEY 3 + u8 table_id; + u8 ver_cci_rr; +#define NTMP_HDR_VERSION GENMASK(5, 0) +#define NTMP_HDR_VER2 2 +#define NTMP_CCI BIT(6) +#define NTMP_RR BIT(7) + __le32 resv[3]; + __le32 npf; +#define NTMP_NPF BIT(15) + } req_hdr; /* NTMP Request Message Header Format */ + + struct { + __le32 resv0[3]; + __le16 num_matched; + __le16 error_rr; +#define NTMP_RESP_ERROR GENMASK(11, 0) +#define NTMP_RESP_RR BIT(15) + __le32 resv1[4]; + } resp_hdr; /* NTMP Response Message Header Format */ +}; + +struct ntmp_dma_buf { + struct device *dev; + size_t size; + void *buf; + dma_addr_t dma; +}; + +struct ntmp_cmn_req_data { + __le16 update_act; + u8 dbg_opt; + u8 tblv_qact; +#define NTMP_QUERY_ACT GENMASK(3, 0) +#define NTMP_TBL_VER GENMASK(7, 4) +#define NTMP_TBLV_QACT(v, a) (FIELD_PREP(NTMP_TBL_VER, (v)) | \ + ((a) & NTMP_QUERY_ACT)) +}; + +struct ntmp_cmn_resp_query { + __le32 entry_id; +}; + +/* Generic structure for request data by entry ID */ +struct ntmp_req_by_eid { + struct ntmp_cmn_req_data crd; + __le32 entry_id; +}; + +/* MAC Address Filter Table Request Data Buffer Format of Add action */ +struct maft_req_add { + struct ntmp_req_by_eid rbe; + struct maft_keye_data keye; + struct maft_cfge_data cfge; +}; + +/* MAC Address Filter Table Response Data Buffer Format of Query action */ +struct maft_resp_query { + __le32 entry_id; + struct maft_keye_data keye; + struct maft_cfge_data cfge; +}; + +/* RSS Table Request Data Buffer Format of Update action */ +struct rsst_req_update { + struct ntmp_req_by_eid rbe; + u8 groups[]; +}; + +#endif diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index deb35b38c976..bcbcad613512 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2061,15 +2061,13 @@ static void gfar_timeout(struct net_device *dev, unsigned int txqueue) schedule_work(&priv->reset_task); } -static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) +static int gfar_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; struct gfar_private *priv = netdev_priv(netdev); - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { + switch (config->tx_type) { case HWTSTAMP_TX_OFF: priv->hwts_tx_en = 0; break; @@ -2082,7 +2080,7 @@ static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: if (priv->hwts_rx_en) { priv->hwts_rx_en = 0; @@ -2096,44 +2094,23 @@ static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) priv->hwts_rx_en = 1; reset_gfar(netdev); } - config.rx_filter = HWTSTAMP_FILTER_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; break; } - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } -static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr) +static int gfar_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; struct gfar_private *priv = netdev_priv(netdev); - config.flags = 0; - config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - config.rx_filter = (priv->hwts_rx_en ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -{ - struct phy_device *phydev = dev->phydev; + config->tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + config->rx_filter = priv->hwts_rx_en ? HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; - if (!netif_running(dev)) - return -EINVAL; - - if (cmd == SIOCSHWTSTAMP) - return gfar_hwtstamp_set(dev, rq); - if (cmd == SIOCGHWTSTAMP) - return gfar_hwtstamp_get(dev, rq); - - if (!phydev) - return -ENODEV; - - return phy_mii_ioctl(phydev, rq, cmd); + return 0; } /* Interrupt Handler for Transmit complete */ @@ -3174,7 +3151,7 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_set_features = gfar_set_features, .ndo_set_rx_mode = gfar_set_multi, .ndo_tx_timeout = gfar_timeout, - .ndo_eth_ioctl = gfar_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_get_stats64 = gfar_get_stats64, .ndo_change_carrier = fixed_phy_change_carrier, .ndo_set_mac_address = gfar_set_mac_addr, @@ -3182,6 +3159,8 @@ static const struct net_device_ops gfar_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = gfar_netpoll, #endif + .ndo_hwtstamp_get = gfar_hwtstamp_get, + .ndo_hwtstamp_set = gfar_hwtstamp_set, }; /* Set up the ethernet device structure, private data, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index eae1a7595a69..3c1da0cf3f61 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -67,7 +67,7 @@ static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; -static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { +static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = { "adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts", "adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt", "adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt", @@ -113,7 +113,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) i); for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) - ethtool_puts(&s, gve_gstrings_adminq_stats[i]); + ethtool_cpy(&s, gve_gstrings_adminq_stats[i]); break; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index c3791cf23c87..e1ffbd561fac 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1830,7 +1830,7 @@ static void gve_turndown(struct gve_priv *priv) /* Stop tx queues */ netif_tx_disable(priv->dev); - xdp_features_clear_redirect_target(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); @@ -1902,7 +1902,7 @@ static void gve_turnup(struct gve_priv *priv) } if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) - xdp_features_set_redirect_target(priv->dev, false); + xdp_features_set_redirect_target_locked(priv->dev, false); gve_set_napi_enabled(priv); } @@ -2185,7 +2185,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) xdp_features = 0; } - xdp_set_features_flag(priv->dev, xdp_features); + xdp_set_features_flag_locked(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2659,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) goto abort_with_gve_init; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 2eba868d8037..a27f1574a733 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -660,7 +660,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, goto err; dma_unmap_len_set(pkt, len[pkt->num_bufs], len); - dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, + dma[pkt->num_bufs], addr); ++pkt->num_bufs; gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, @@ -1038,8 +1039,9 @@ static void gve_unmap_packet(struct device *dev, dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); for (i = 1; i < pkt->num_bufs; i++) { - dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); } pkt->num_bufs = 0; } diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig index c0c112d95b89..4f4b23465c47 100644 --- a/drivers/net/ethernet/ibm/Kconfig +++ b/drivers/net/ethernet/ibm/Kconfig @@ -27,6 +27,19 @@ config IBMVETH To compile this driver as a module, choose M here. The module will be called ibmveth. +config IBMVETH_KUNIT_TEST + bool "KUnit test for IBM LAN Virtual Ethernet support" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on KUNIT=y && IBMVETH=y + default KUNIT_ALL_TESTS + help + This builds unit tests for the IBM LAN Virtual Ethernet driver. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + source "drivers/net/ethernet/ibm/emac/Kconfig" config EHEA diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 04192190beba..24046fe16634 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -39,8 +39,6 @@ #include "ibmveth.h" static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse); static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; @@ -231,7 +229,10 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, index = pool->free_map[free_index]; skb = NULL; - BUG_ON(index == IBM_VETH_INVALID_MAP); + if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { + schedule_work(&adapter->work); + goto bad_index_failure; + } /* are we allocating a new buffer or recycling an old one */ if (pool->skbuff[index]) @@ -300,6 +301,7 @@ failure: DMA_FROM_DEVICE); dev_kfree_skb_any(pool->skbuff[index]); pool->skbuff[index] = NULL; +bad_index_failure: adapter->replenish_add_buff_failure++; mb(); @@ -370,20 +372,36 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, } } -/* remove a buffer from a pool */ -static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, - u64 correlator, bool reuse) +/** + * ibmveth_remove_buffer_from_pool - remove a buffer from a pool + * @adapter: adapter instance + * @correlator: identifies pool and index + * @reuse: whether to reuse buffer + * + * Return: + * * %0 - success + * * %-EINVAL - correlator maps to pool or index out of range + * * %-EFAULT - pool and index map to null skb + */ +static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator, bool reuse) { unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; unsigned int free_index; struct sk_buff *skb; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return -EINVAL; + } skb = adapter->rx_buff_pool[pool].skbuff[index]; - BUG_ON(skb == NULL); + if (WARN_ON(!skb)) { + schedule_work(&adapter->work); + return -EFAULT; + } /* if we are going to reuse the buffer then keep the pointers around * but mark index as available. replenish will see the skb pointer and @@ -411,6 +429,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, mb(); atomic_dec(&(adapter->rx_buff_pool[pool].available)); + + return 0; } /* get the current buffer on the rx queue */ @@ -420,24 +440,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return NULL; + } return adapter->rx_buff_pool[pool].skbuff[index]; } -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse) +/** + * ibmveth_rxq_harvest_buffer - Harvest buffer from pool + * + * @adapter: pointer to adapter + * @reuse: whether to reuse buffer + * + * Context: called from ibmveth_poll + * + * Return: + * * %0 - success + * * other - non-zero return from ibmveth_remove_buffer_from_pool + */ +static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, + bool reuse) { u64 cor; + int rc; cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; - ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + if (unlikely(rc)) + return rc; if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } + + return 0; } static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) @@ -709,6 +749,35 @@ static int ibmveth_close(struct net_device *netdev) return 0; } +/** + * ibmveth_reset - Handle scheduled reset work + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: This routine acquires rtnl_mutex and disables its NAPI through + * ibmveth_close. It can't be called directly in a context that has + * already acquired rtnl_mutex or disabled its NAPI, or directly from + * a poll routine. + * + * Return: void + */ +static void ibmveth_reset(struct work_struct *w) +{ + struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work); + struct net_device *netdev = adapter->netdev; + + netdev_dbg(netdev, "reset starting\n"); + + rtnl_lock(); + + dev_close(adapter->netdev); + dev_open(adapter->netdev, NULL); + + rtnl_unlock(); + + netdev_dbg(netdev, "reset complete\n"); +} + static int ibmveth_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { @@ -1324,7 +1393,8 @@ restart_poll: wmb(); /* suggested by larson1 */ adapter->rx_invalid_buffer++; netdev_dbg(netdev, "recycling invalid buffer\n"); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; } else { struct sk_buff *skb, *new_skb; int length = ibmveth_rxq_frame_length(adapter); @@ -1334,6 +1404,8 @@ restart_poll: __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); + if (unlikely(!skb)) + break; /* if the large packet bit is set in the rx queue * descriptor, the mss will be written by PHYP eight @@ -1357,10 +1429,12 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; skb = new_skb; } else { - ibmveth_rxq_harvest_buffer(adapter, false); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false))) + break; skb_reserve(skb, offset); } @@ -1407,7 +1481,10 @@ restart_poll: * then check once more to make sure we are done. */ lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); - BUG_ON(lpar_rc != H_SUCCESS); + if (WARN_ON(lpar_rc != H_SUCCESS)) { + schedule_work(&adapter->work); + goto out; + } if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, @@ -1428,7 +1505,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) if (napi_schedule_prep(&adapter->napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - BUG_ON(lpar_rc != H_SUCCESS); + WARN_ON(lpar_rc != H_SUCCESS); __napi_schedule(&adapter->napi); } return IRQ_HANDLED; @@ -1670,6 +1747,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; + INIT_WORK(&adapter->work, ibmveth_reset); adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); ibmveth_init_link_settings(netdev); @@ -1762,6 +1840,8 @@ static void ibmveth_remove(struct vio_dev *dev) struct ibmveth_adapter *adapter = netdev_priv(netdev); int i; + cancel_work_sync(&adapter->work); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) kobject_put(&adapter->rx_buff_pool[i].kobj); @@ -1791,6 +1871,26 @@ static ssize_t veth_pool_show(struct kobject *kobj, return 0; } +/** + * veth_pool_store - sysfs store handler for pool attributes + * @kobj: kobject embedded in pool + * @attr: attribute being changed + * @buf: value being stored + * @count: length of @buf in bytes + * + * Stores new value in pool attribute. Verifies the range of the new value for + * size and buff_size. Verifies that at least one pool remains available to + * receive MTU-sized packets. + * + * Context: Process context. + * Takes and releases rtnl_mutex to ensure correct ordering of close + * and open calls. + * Return: + * * %-EPERM - Not allowed to disabled all MTU-sized buffer pools + * * %-EINVAL - New pool size or buffer size is out of range + * * count - Return count for success + * * other - Return value from a failed ibmveth_open call + */ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { @@ -1800,28 +1900,30 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent)); struct ibmveth_adapter *adapter = netdev_priv(netdev); long value = simple_strtol(buf, NULL, 10); + bool change = false; + u32 newbuff_size; + u32 oldbuff_size; + int newactive; + int oldactive; + u32 newsize; + u32 oldsize; long rc; rtnl_lock(); + oldbuff_size = pool->buff_size; + oldactive = pool->active; + oldsize = pool->size; + + newbuff_size = oldbuff_size; + newactive = oldactive; + newsize = oldsize; + if (attr == &veth_active_attr) { - if (value && !pool->active) { - if (netif_running(netdev)) { - if (ibmveth_alloc_buffer_pool(pool)) { - netdev_err(netdev, - "unable to alloc pool\n"); - rc = -ENOMEM; - goto unlock_err; - } - pool->active = 1; - ibmveth_close(netdev); - rc = ibmveth_open(netdev); - if (rc) - goto unlock_err; - } else { - pool->active = 1; - } - } else if (!value && pool->active) { + if (value && !oldactive) { + newactive = 1; + change = true; + } else if (!value && oldactive) { int mtu = netdev->mtu + IBMVETH_BUFF_OH; int i; /* Make sure there is a buffer pool with buffers that @@ -1841,43 +1943,44 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, goto unlock_err; } - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->active = 0; - rc = ibmveth_open(netdev); - if (rc) - goto unlock_err; - } - pool->active = 0; + newactive = 0; + change = true; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { rc = -EINVAL; goto unlock_err; - } else { - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->size = value; - rc = ibmveth_open(netdev); - if (rc) - goto unlock_err; - } else { - pool->size = value; - } + } + if (value != oldsize) { + newsize = value; + change = true; } } else if (attr == &veth_size_attr) { if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { rc = -EINVAL; goto unlock_err; - } else { - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->buff_size = value; - rc = ibmveth_open(netdev); - if (rc) - goto unlock_err; - } else { - pool->buff_size = value; + } + if (value != oldbuff_size) { + newbuff_size = value; + change = true; + } + } + + if (change) { + if (netif_running(netdev)) + ibmveth_close(netdev); + + pool->active = newactive; + pool->buff_size = newbuff_size; + pool->size = newsize; + + if (netif_running(netdev)) { + rc = ibmveth_open(netdev); + if (rc) { + pool->active = oldactive; + pool->buff_size = oldbuff_size; + pool->size = oldsize; + goto unlock_err; } } } @@ -1962,3 +2065,132 @@ static void __exit ibmveth_module_exit(void) module_init(ibmveth_module_init); module_exit(ibmveth_module_exit); + +#ifdef CONFIG_IBMVETH_KUNIT_TEST +#include <kunit/test.h> + +/** + * ibmveth_reset_kunit - reset routine for running in KUnit environment + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: Called in the KUnit environment. Does nothing. + * + * Return: void + */ +static void ibmveth_reset_kunit(struct work_struct *w) +{ + netdev_dbg(NULL, "reset_kunit starting\n"); + netdev_dbg(NULL, "reset_kunit complete\n"); +} + +/** + * ibmveth_remove_buffer_from_pool_test - unit test for some of + * ibmveth_remove_buffer_from_pool + * @test: pointer to kunit structure + * + * Tests the error returns from ibmveth_remove_buffer_from_pool. + * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be + * checked to see that these warnings happened. + * + * Return: void + */ +static void ibmveth_remove_buffer_from_pool_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + u64 correlator; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = (u64)0 | 0; + pool->skbuff[0] = NULL; + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + flush_work(&adapter->work); +} + +/** + * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer + * @test: pointer to kunit structure + * + * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for + * the NULL returns, so dmesg should be checked to see that these warnings + * happened. + * + * Return: void + */ +static void ibmveth_rxq_get_buffer_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + adapter->rx_queue.queue_len = 1; + adapter->rx_queue.index = 0; + adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry), + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + pool->skbuff[0] = skb; + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter)); + + flush_work(&adapter->work); +} + +static struct kunit_case ibmveth_test_cases[] = { + KUNIT_CASE(ibmveth_remove_buffer_from_pool_test), + KUNIT_CASE(ibmveth_rxq_get_buffer_test), + {} +}; + +static struct kunit_suite ibmveth_test_suite = { + .name = "ibmveth-kunit-test", + .test_cases = ibmveth_test_cases, +}; + +kunit_test_suite(ibmveth_test_suite); +#endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 8468e2c59d7a..b0a2460ec9f9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -134,38 +134,39 @@ struct ibmveth_rx_q { }; struct ibmveth_adapter { - struct vio_dev *vdev; - struct net_device *netdev; - struct napi_struct napi; - unsigned int mcastFilterSize; - void * buffer_list_addr; - void * filter_list_addr; - void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; - unsigned int tx_ltb_size; - dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; - dma_addr_t buffer_list_dma; - dma_addr_t filter_list_dma; - struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; - struct ibmveth_rx_q rx_queue; - int rx_csum; - int large_send; - bool is_active_trunk; - - u64 fw_ipv6_csum_support; - u64 fw_ipv4_csum_support; - u64 fw_large_send_support; - /* adapter specific stats */ - u64 replenish_task_cycles; - u64 replenish_no_mem; - u64 replenish_add_buff_failure; - u64 replenish_add_buff_success; - u64 rx_invalid_buffer; - u64 rx_no_buffer; - u64 tx_map_failed; - u64 tx_send_failed; - u64 tx_large_packets; - u64 rx_large_packets; - /* Ethtool settings */ + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + struct work_struct work; + unsigned int mcastFilterSize; + void *buffer_list_addr; + void *filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int rx_csum; + int large_send; + bool is_active_trunk; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + u64 fw_large_send_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; + /* Ethtool settings */ u8 duplex; u32 speed; }; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 1640d2f27833..5a331c1c76cb 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -147,6 +147,8 @@ config IXGBE depends on PCI depends on PTP_1588_CLOCK_OPTIONAL select MDIO + select NET_DEVLINK + select PLDMFW select PHYLIB help This driver supports Intel(R) 10GbE PCI Express family of @@ -367,6 +369,7 @@ config IGC default n depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + depends on ETHTOOL_NETLINK help This driver supports Intel(R) Ethernet Controller I225-LM/I225-V family of adapters. diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ba9c19e6994c..952898151565 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -319,7 +319,7 @@ struct e1000_adapter { u16 tx_ring_count; u16 rx_ring_count; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; struct delayed_work systim_overflow_work; struct sk_buff *tx_hwtstamp_skb; unsigned long tx_hwtstamp_start; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 8ebcb6a7d608..e0f492a6723f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3574,6 +3574,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) * e1000e_config_hwtstamp - configure the hwtstamp registers and enable/disable * @adapter: board private structure * @config: timestamp configuration + * @extack: netlink extended ACK for error report * * Outgoing time stamping can be enabled and disabled. Play nice and * disable it when requested, although it shouldn't cause any overhead @@ -3587,7 +3588,8 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) * exception of "all V2 events regardless of level 2 or 4". **/ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct e1000_hw *hw = &adapter->hw; u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; @@ -3598,8 +3600,10 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, bool is_l2 = false; u32 regval; - if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) + if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) { + NL_SET_ERR_MSG(extack, "No HW timestamp support"); return -EINVAL; + } switch (config->tx_type) { case HWTSTAMP_TX_OFF: @@ -3608,6 +3612,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, case HWTSTAMP_TX_ON: break; default: + NL_SET_ERR_MSG(extack, "Unsupported TX HW timestamp type"); return -ERANGE; } @@ -3681,6 +3686,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, config->rx_filter = HWTSTAMP_FILTER_ALL; break; default: + NL_SET_ERR_MSG(extack, "Unsupported RX HW timestamp filter"); return -ERANGE; } @@ -3693,7 +3699,8 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, ew32(TSYNCTXCTL, regval); if ((er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_ENABLED) != (regval & E1000_TSYNCTXCTL_ENABLED)) { - e_err("Timesync Tx Control register not set as expected\n"); + NL_SET_ERR_MSG(extack, + "Timesync Tx Control register not set as expected"); return -EAGAIN; } @@ -3706,7 +3713,8 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter, E1000_TSYNCRXCTL_TYPE_MASK)) != (regval & (E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK))) { - e_err("Timesync Rx Control register not set as expected\n"); + NL_SET_ERR_MSG(extack, + "Timesync Rx Control register not set as expected"); return -EAGAIN; } @@ -3901,6 +3909,7 @@ static void e1000e_systim_reset(struct e1000_adapter *adapter) { struct ptp_clock_info *info = &adapter->ptp_clock_info; struct e1000_hw *hw = &adapter->hw; + struct netlink_ext_ack extack = {}; unsigned long flags; u32 timinca; s32 ret_val; @@ -3932,7 +3941,12 @@ static void e1000e_systim_reset(struct e1000_adapter *adapter) spin_unlock_irqrestore(&adapter->systim_lock, flags); /* restore the previous hwtstamp configuration settings */ - e1000e_config_hwtstamp(adapter, &adapter->hwtstamp_config); + ret_val = e1000e_config_hwtstamp(adapter, &adapter->hwtstamp_config, + &extack); + if (ret_val) { + if (extack._msg) + e_err("%s\n", extack._msg); + } } /** @@ -6079,8 +6093,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, - int cmd) +static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct e1000_adapter *adapter = netdev_priv(netdev); struct mii_ioctl_data *data = if_mii(ifr); @@ -6140,7 +6153,8 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, /** * e1000e_hwtstamp_set - control hardware time stamping * @netdev: network interface device structure - * @ifr: interface request + * @config: timestamp configuration + * @extack: netlink extended ACK report * * Outgoing time stamping can be enabled and disabled. Play nice and * disable it when requested, although it shouldn't cause any overhead @@ -6153,20 +6167,18 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, * specified. Matching the kind of event packet is not supported, with the * exception of "all V2 events regardless of level 2 or 4". **/ -static int e1000e_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) +static int e1000e_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct e1000_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config config; int ret_val; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - ret_val = e1000e_config_hwtstamp(adapter, &config); + ret_val = e1000e_config_hwtstamp(adapter, config, extack); if (ret_val) return ret_val; - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_SYNC: @@ -6178,38 +6190,23 @@ static int e1000e_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) * by hardware so notify the caller the requested packets plus * some others are time stamped. */ - config.rx_filter = HWTSTAMP_FILTER_SOME; + config->rx_filter = HWTSTAMP_FILTER_SOME; break; default: break; } - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; + return 0; } -static int e1000e_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr) +static int e1000e_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *kernel_config) { struct e1000_adapter *adapter = netdev_priv(netdev); - return copy_to_user(ifr->ifr_data, &adapter->hwtstamp_config, - sizeof(adapter->hwtstamp_config)) ? -EFAULT : 0; -} + *kernel_config = adapter->hwtstamp_config; -static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSMIIREG: - return e1000_mii_ioctl(netdev, ifr, cmd); - case SIOCSHWTSTAMP: - return e1000e_hwtstamp_set(netdev, ifr); - case SIOCGHWTSTAMP: - return e1000e_hwtstamp_get(netdev, ifr); - default: - return -EOPNOTSUPP; - } + return 0; } static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) @@ -7346,9 +7343,11 @@ static const struct net_device_ops e1000e_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = e1000_netpoll, #endif - .ndo_set_features = e1000_set_features, - .ndo_fix_features = e1000_fix_features, + .ndo_set_features = e1000_set_features, + .ndo_fix_features = e1000_fix_features, .ndo_features_check = passthru_features_check, + .ndo_hwtstamp_get = e1000e_hwtstamp_get, + .ndo_hwtstamp_set = e1000e_hwtstamp_set, }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 370b4bddee44..b11c35e307ca 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -817,10 +817,11 @@ int i40e_pf_reset(struct i40e_hw *hw) void i40e_clear_hw(struct i40e_hw *hw) { u32 num_queues, base_queue; - u32 num_pf_int; - u32 num_vf_int; + s32 num_pf_int; + s32 num_vf_int; u32 num_vfs; - u32 i, j; + s32 i; + u32 j; u32 val; u32 eol = 0x7ff; diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index fcb199efbea5..4af60e2f37df 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1339,8 +1339,13 @@ ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx) { struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; - ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false; + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2); return 0; } @@ -1350,19 +1355,24 @@ static int ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; bool roce_ena = ctx->val.vbool; int ret; + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + if (!roce_ena) { ice_unplug_aux_dev(pf); - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_ROCEV2; return 0; } - pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; + cdev->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; ret = ice_plug_aux_dev(pf); if (ret) - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_ROCEV2; return ret; } @@ -1373,11 +1383,16 @@ ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id, struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; + + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) return -EOPNOTSUPP; - if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) { + if (cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP) { NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); return -EOPNOTSUPP; } @@ -1390,8 +1405,13 @@ ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx) { struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; - ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP; + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + + ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP); return 0; } @@ -1401,19 +1421,24 @@ static int ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); + struct iidc_rdma_core_dev_info *cdev; bool iw_ena = ctx->val.vbool; int ret; + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + if (!iw_ena) { ice_unplug_aux_dev(pf); - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_IWARP; return 0; } - pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP; + cdev->rdma_protocol |= IIDC_RDMA_PROTOCOL_IWARP; ret = ice_plug_aux_dev(pf); if (ret) - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; + cdev->rdma_protocol &= ~IIDC_RDMA_PROTOCOL_IWARP; return ret; } @@ -1428,7 +1453,7 @@ ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id, if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) return -EOPNOTSUPP; - if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) { + if (pf->cdev_info->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2) { NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index fd083647c14a..ddd0ad68185b 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -193,8 +193,6 @@ #define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) -#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned) - enum ice_feature { ICE_F_DSCP, ICE_F_PHY_RCLK, @@ -401,7 +399,6 @@ struct ice_vsi { u16 req_rxq; /* User requested Rx queues */ u16 num_rx_desc; u16 num_tx_desc; - u16 qset_handle[ICE_MAX_TRAFFIC_CLASS]; struct ice_tc_cfg tc_cfg; struct bpf_prog *xdp_prog; struct ice_tx_ring **xdp_rings; /* XDP ring array */ @@ -515,6 +512,7 @@ enum ice_pf_flags { ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */ + ICE_FLAG_LLDP_AQ_FLTR, ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -557,7 +555,6 @@ struct ice_pf { struct devlink_port devlink_port; /* OS reserved IRQ details */ - struct msix_entry *msix_entries; struct ice_irq_tracker irq_tracker; struct ice_virt_irq_tracker virt_irq_tracker; @@ -592,7 +589,6 @@ struct ice_pf { struct gnss_serial *gnss_serial; struct gnss_device *gnss_dev; u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */ - u16 rdma_base_vector; /* spinlock to protect the AdminQ wait list */ spinlock_t aq_wait_lock; @@ -625,14 +621,12 @@ struct ice_pf { struct ice_hw_port_stats stats_prev; struct ice_hw hw; u8 stat_prev_loaded:1; /* has previous stats been loaded */ - u8 rdma_mode; u16 dcbx_cap; u32 tx_timeout_count; unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; char int_name[ICE_INT_NAME_STR_LEN]; char int_name_ll_ts[ICE_INT_NAME_STR_LEN]; - struct auxiliary_device *adev; int aux_idx; u32 sw_int_count; /* count of tc_flower filters specific to channel (aka where filter @@ -664,6 +658,7 @@ struct ice_pf { struct ice_dplls dplls; struct device *hwmon_dev; struct ice_health health_reporters; + struct iidc_rdma_core_dev_info *cdev_info; u8 num_quanta_prof_used; }; @@ -1045,4 +1040,62 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) } extern const struct xdp_metadata_ops ice_xdp_md_ops; + +/** + * ice_is_dual - Check if given config is multi-NAC + * @hw: pointer to HW structure + * + * Return: true if the device is running in mutli-NAC (Network + * Acceleration Complex) configuration variant, false otherwise + * (always false for non-E825 devices). + */ +static inline bool ice_is_dual(struct ice_hw *hw) +{ + return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && + (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M); +} + +/** + * ice_is_primary - Check if given device belongs to the primary complex + * @hw: pointer to HW structure + * + * Check if given PF/HW is running on primary complex in multi-NAC + * configuration. + * + * Return: true if the device is dual, false otherwise (always true + * for non-E825 devices). + */ +static inline bool ice_is_primary(struct ice_hw *hw) +{ + return hw->mac_type != ICE_MAC_GENERIC_3K_E825 || + !ice_is_dual(hw) || + (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M); +} + +/** + * ice_pf_src_tmr_owned - Check if a primary timer is owned by PF + * @pf: pointer to PF structure + * + * Return: true if PF owns primary timer, false otherwise. + */ +static inline bool ice_pf_src_tmr_owned(struct ice_pf *pf) +{ + return pf->hw.func_caps.ts_func_info.src_tmr_owned && + ice_is_primary(&pf->hw); +} + +/** + * ice_get_primary_hw - Get pointer to primary ice_hw structure + * @pf: pointer to PF structure + * + * Return: A pointer to ice_hw structure with access to timesync + * register space. + */ +static inline struct ice_hw *ice_get_primary_hw(struct ice_pf *pf) +{ + if (!pf->adapter->ctrl_pf) + return &pf->hw; + else + return &pf->adapter->ctrl_pf->hw; +} #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 59df31c2c83f..4fedf0181c4e 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1135,6 +1135,8 @@ int ice_init_hw(struct ice_hw *hw) } } + hw->lane_num = ice_get_phy_lane_number(hw); + return 0; err_unroll_fltr_mgmt_struct: ice_cleanup_fltr_mgmt_struct(hw); @@ -3434,7 +3436,7 @@ int ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, msg.msg_addr_low = lower_16_bits(reg_offset); msg.msg_addr_high = receiver_id; msg.opcode = ice_sbq_msg_rd; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; err = ice_sbq_rw_reg(hw, &msg, flag); if (err) @@ -4082,10 +4084,12 @@ int ice_get_phy_lane_number(struct ice_hw *hw) continue; if (hw->pf_id == lport) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && + ice_is_dual(hw) && !ice_is_primary(hw)) + lane += ICE_PORTS_PER_QUAD; kfree(options); return lane; } - lport++; } @@ -6011,15 +6015,21 @@ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) /** * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter * @hw: pointer to HW struct - * @vsi_num: absolute HW index for VSI + * @vsi: VSI to add the filter to * @add: boolean for if adding or removing a filter + * + * Return: 0 on success, -EOPNOTSUPP if the operation cannot be performed + * with this HW or VSI, otherwise an error corresponding to + * the AQ transaction result. */ -int -ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) +int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add) { struct ice_aqc_lldp_filter_ctrl *cmd; struct ice_aq_desc desc; + if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw)) + return -EOPNOTSUPP; + cmd = &desc.params.lldp_filter_ctrl; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl); @@ -6029,7 +6039,7 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) else cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE; - cmd->vsi_num = cpu_to_le16(vsi_num); + cmd->vsi_num = cpu_to_le16(vsi->vsi_num); return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 9b00aa0ddf10..64c530b39191 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -290,8 +290,7 @@ int ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); -int -ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); +int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add); int ice_lldp_execute_pending_mib(struct ice_hw *hw); int ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 74418c445cc4..64737fc62306 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -1288,7 +1288,7 @@ ice_add_dscp_up_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) tlv->ouisubtype = htonl(ouisubtype); /* bytes 0 - 63 - IPv4 DSCP2UP LUT */ - for (i = 0; i < ICE_DSCP_NUM_VAL; i++) { + for (i = 0; i < DSCP_MAX; i++) { /* IPv4 mapping */ buf[i] = dcbcfg->dscp_map[i]; /* IPv6 mapping */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index a7c510832824..533eb8930aa8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -352,8 +352,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) struct ice_aqc_port_ets_elem buf = { 0 }; struct ice_dcbx_cfg *old_cfg, *curr_cfg; struct device *dev = ice_pf_to_dev(pf); + struct iidc_rdma_event *event; int ret = ICE_DCB_NO_HW_CHG; - struct iidc_event *event; struct ice_vsi *pf_vsi; curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; @@ -405,7 +405,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) goto free_cfg; } - set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type); + set_bit(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE, event->type); ice_send_event_to_aux(pf, event); kfree(event); @@ -740,7 +740,9 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf) void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - struct iidc_event *event; + struct iidc_rdma_priv_dev_info *privd; + struct iidc_rdma_core_dev_info *cdev; + struct iidc_rdma_event *event; u8 tc_map = 0; int v, ret; @@ -783,13 +785,17 @@ void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) if (vsi->type == ICE_VSI_PF) ice_dcbnl_set_all(vsi); } - if (!locked) { + + cdev = pf->cdev_info; + if (cdev && !locked) { + privd = cdev->iidc_priv; + ice_setup_dcb_qos_info(pf, &privd->qos_info); /* Notify the AUX drivers that TC change is finished */ event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) return; - set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type); + set_bit(IIDC_RDMA_EVENT_AFTER_TC_CHANGE, event->type); ice_send_event_to_aux(pf, event); kfree(event); } @@ -846,7 +852,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) goto dcb_init_err; } - ice_cfg_sw_lldp(pf_vsi, false, true); + ice_cfg_sw_rx_lldp(pf, true); pf->dcbx_cap = ice_dcb_get_mode(port_info, true); return 0; @@ -945,6 +951,37 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, } /** + * ice_setup_dcb_qos_info - Setup DCB QoS information + * @pf: ptr to ice_pf + * @qos_info: QoS param instance + */ +void ice_setup_dcb_qos_info(struct ice_pf *pf, struct iidc_rdma_qos_params *qos_info) +{ + struct ice_dcbx_cfg *dcbx_cfg; + unsigned int i; + u32 up2tc; + + if (!pf || !qos_info) + return; + + dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + up2tc = rd32(&pf->hw, PRTDCB_TUP2TC); + + qos_info->num_tc = ice_dcb_get_num_tc(dcbx_cfg); + + for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) + qos_info->up2tc[i] = (up2tc >> (i * 3)) & 0x7; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + qos_info->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; + + qos_info->pfc_mode = dcbx_cfg->pfc_mode; + if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) + for (i = 0; i < DSCP_MAX; i++) + qos_info->dscp_map[i] = dcbx_cfg->dscp_map[i]; +} + +/** * ice_dcb_is_mib_change_pending - Check if MIB change is pending * @state: MIB change state */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 800879a88c5e..da9ba814b4e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -31,6 +31,9 @@ void ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first); void +ice_setup_dcb_qos_info(struct ice_pf *pf, + struct iidc_rdma_qos_params *qos_info); +void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); /** @@ -134,5 +137,11 @@ static inline void ice_update_dcb_stats(struct ice_pf *pf) { } static inline void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { } +static inline void +ice_setup_dcb_qos_info(struct ice_pf *pf, struct iidc_rdma_qos_params *qos_info) +{ + qos_info->num_tc = 1; + qos_info->tc_info[0].rel_bw = 100; +} #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 6d50b90a7359..a10c1c8d8697 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -754,7 +754,7 @@ static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app) if (!ice_is_feature_supported(pf, ICE_F_DSCP)) return -EOPNOTSUPP; - if (app->protocol >= ICE_DSCP_NUM_VAL) { + if (app->protocol >= DSCP_MAX) { netdev_err(netdev, "DSCP value 0x%04X out of range\n", app->protocol); return -EINVAL; @@ -931,7 +931,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) /* if the last DSCP mapping just got deleted, need to switch * to L2 VLAN QoS mode */ - if (bitmap_empty(new_cfg->dscp_mapped, ICE_DSCP_NUM_VAL) && + if (bitmap_empty(new_cfg->dscp_mapped, DSCP_MAX) && new_cfg->pfc_mode == ICE_QOS_MODE_DSCP) { ret = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_VLAN_BASED_PFC, diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index ed21d7f55ac1..6aae03771746 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -29,6 +29,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) return -ENODEV; ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); + ice_vsi_cfg_sw_lldp(uplink_vsi, true, false); netif_addr_lock_bh(netdev); __dev_uc_unsync(netdev, NULL); @@ -245,6 +246,10 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, u64 cd_cmd, dst_vsi; if (!dst) { + struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb); + + if (unlikely(eth->h_proto == htons(ETH_P_LLDP))) + return; cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S; off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX); } else { @@ -278,6 +283,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) ice_fltr_add_mac_and_broadcast(uplink_vsi, uplink_vsi->port_info->mac.perm_addr, ICE_FWD_TO_VSI); + ice_vsi_cfg_sw_lldp(uplink_vsi, true, true); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 7c2dc347e4e5..bbf9e6fd315b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1818,7 +1818,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) /* Remove rule to direct LLDP packets to default VSI. * The FW LLDP engine will now be consuming them. */ - ice_cfg_sw_lldp(vsi, false, false); + ice_cfg_sw_rx_lldp(vsi->back, false); /* AQ command to start FW LLDP agent will return an * error if the agent is already started @@ -3964,11 +3964,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) return -EINVAL; } - if (pf->adev) { + if (pf->cdev_info && pf->cdev_info->adev) { mutex_lock(&pf->adev_mutex); - device_lock(&pf->adev->dev); + device_lock(&pf->cdev_info->adev->dev); locked = true; - if (pf->adev->dev.driver) { + if (pf->cdev_info->adev->dev.driver) { netdev_err(dev, "Cannot change channels when RDMA is active\n"); ret = -EBUSY; goto adev_unlock; @@ -3987,7 +3987,7 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) adev_unlock: if (locked) { - device_unlock(&pf->adev->dev); + device_unlock(&pf->cdev_info->adev->dev); mutex_unlock(&pf->adev_mutex); } return ret; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 1d118171de37..aceec184e89b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1605,7 +1605,7 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf) */ int ice_fdir_create_dflt_rules(struct ice_pf *pf) { - const enum ice_fltr_ptype dflt_rules[] = { + static const enum ice_fltr_ptype dflt_rules[] = { ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP, ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP, }; diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index bab3e81cad5d..6ab53e430f91 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -9,22 +9,25 @@ static DEFINE_XARRAY_ALLOC1(ice_aux_id); /** - * ice_get_auxiliary_drv - retrieve iidc_auxiliary_drv struct - * @pf: pointer to PF struct + * ice_get_auxiliary_drv - retrieve iidc_rdma_core_auxiliary_drv struct + * @cdev: pointer to iidc_rdma_core_dev_info struct * * This function has to be called with a device_lock on the - * pf->adev.dev to avoid race conditions. + * cdev->adev.dev to avoid race conditions. + * + * Return: pointer to the matched auxiliary driver struct */ -static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf) +static struct iidc_rdma_core_auxiliary_drv * +ice_get_auxiliary_drv(struct iidc_rdma_core_dev_info *cdev) { struct auxiliary_device *adev; - adev = pf->adev; + adev = cdev->adev; if (!adev || !adev->dev.driver) return NULL; - return container_of(adev->dev.driver, struct iidc_auxiliary_drv, - adrv.driver); + return container_of(adev->dev.driver, + struct iidc_rdma_core_auxiliary_drv, adrv.driver); } /** @@ -32,44 +35,54 @@ static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf) * @pf: pointer to PF struct * @event: event struct */ -void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) +void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_rdma_event *event) { - struct iidc_auxiliary_drv *iadrv; + struct iidc_rdma_core_auxiliary_drv *iadrv; + struct iidc_rdma_core_dev_info *cdev; if (WARN_ON_ONCE(!in_task())) return; + cdev = pf->cdev_info; + if (!cdev) + return; + mutex_lock(&pf->adev_mutex); - if (!pf->adev) + if (!cdev->adev) goto finish; - device_lock(&pf->adev->dev); - iadrv = ice_get_auxiliary_drv(pf); + device_lock(&cdev->adev->dev); + iadrv = ice_get_auxiliary_drv(cdev); if (iadrv && iadrv->event_handler) - iadrv->event_handler(pf, event); - device_unlock(&pf->adev->dev); + iadrv->event_handler(cdev, event); + device_unlock(&cdev->adev->dev); finish: mutex_unlock(&pf->adev_mutex); } /** * ice_add_rdma_qset - Add Leaf Node for RDMA Qset - * @pf: PF struct + * @cdev: pointer to iidc_rdma_core_dev_info struct * @qset: Resource to be allocated + * + * Return: Zero on success or error code encountered */ -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) +int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset) { u16 max_rdmaqs[ICE_MAX_TRAFFIC_CLASS]; struct ice_vsi *vsi; struct device *dev; + struct ice_pf *pf; u32 qset_teid; u16 qs_handle; int status; int i; - if (WARN_ON(!pf || !qset)) + if (WARN_ON(!cdev || !qset)) return -EINVAL; + pf = pci_get_drvdata(cdev->pdev); dev = ice_pf_to_dev(pf); if (!ice_is_rdma_ena(pf)) @@ -100,7 +113,6 @@ int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) dev_err(dev, "Failed VSI RDMA Qset enable\n"); return status; } - vsi->qset_handle[qset->tc] = qset->qs_handle; qset->teid = qset_teid; return 0; @@ -109,18 +121,23 @@ EXPORT_SYMBOL_GPL(ice_add_rdma_qset); /** * ice_del_rdma_qset - Delete leaf node for RDMA Qset - * @pf: PF struct + * @cdev: pointer to iidc_rdma_core_dev_info struct * @qset: Resource to be freed + * + * Return: Zero on success, error code on failure */ -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) +int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset) { struct ice_vsi *vsi; + struct ice_pf *pf; u32 teid; u16 q_id; - if (WARN_ON(!pf || !qset)) + if (WARN_ON(!cdev || !qset)) return -EINVAL; + pf = pci_get_drvdata(cdev->pdev); vsi = ice_find_vsi(pf, qset->vport_id); if (!vsi) { dev_err(ice_pf_to_dev(pf), "RDMA Invalid VSI\n"); @@ -130,36 +147,36 @@ int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) q_id = qset->qs_handle; teid = qset->teid; - vsi->qset_handle[qset->tc] = 0; - return ice_dis_vsi_rdma_qset(vsi->port_info, 1, &teid, &q_id); } EXPORT_SYMBOL_GPL(ice_del_rdma_qset); /** * ice_rdma_request_reset - accept request from RDMA to perform a reset - * @pf: struct for PF + * @cdev: pointer to iidc_rdma_core_dev_info struct * @reset_type: type of reset + * + * Return: Zero on success, error code on failure */ -int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type) +int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev, + enum iidc_rdma_reset_type reset_type) { enum ice_reset_req reset; + struct ice_pf *pf; - if (WARN_ON(!pf)) + if (WARN_ON(!cdev)) return -EINVAL; + pf = pci_get_drvdata(cdev->pdev); + switch (reset_type) { - case IIDC_PFR: + case IIDC_FUNC_RESET: reset = ICE_RESET_PFR; break; - case IIDC_CORER: + case IIDC_DEV_RESET: reset = ICE_RESET_CORER; break; - case IIDC_GLOBR: - reset = ICE_RESET_GLOBR; - break; default: - dev_err(ice_pf_to_dev(pf), "incorrect reset request\n"); return -EINVAL; } @@ -169,18 +186,23 @@ EXPORT_SYMBOL_GPL(ice_rdma_request_reset); /** * ice_rdma_update_vsi_filter - update main VSI filters for RDMA - * @pf: pointer to struct for PF + * @cdev: pointer to iidc_rdma_core_dev_info struct * @vsi_id: VSI HW idx to update filter on * @enable: bool whether to enable or disable filters + * + * Return: Zero on success, error code on failure */ -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable) +int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, + u16 vsi_id, bool enable) { struct ice_vsi *vsi; + struct ice_pf *pf; int status; - if (WARN_ON(!pf)) + if (WARN_ON(!cdev)) return -EINVAL; + pf = pci_get_drvdata(cdev->pdev); vsi = ice_find_vsi(pf, vsi_id); if (!vsi) return -EINVAL; @@ -201,37 +223,23 @@ int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable) EXPORT_SYMBOL_GPL(ice_rdma_update_vsi_filter); /** - * ice_get_qos_params - parse QoS params for RDMA consumption - * @pf: pointer to PF struct - * @qos: set of QoS values + * ice_alloc_rdma_qvector - alloc vector resources reserved for RDMA driver + * @cdev: pointer to iidc_rdma_core_dev_info struct + * @entry: MSI-X entry to be removed + * + * Return: Zero on success, error code on failure */ -void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) +int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry) { - struct ice_dcbx_cfg *dcbx_cfg; - unsigned int i; - u32 up2tc; - - dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - up2tc = rd32(&pf->hw, PRTDCB_TUP2TC); - - qos->num_tc = ice_dcb_get_num_tc(dcbx_cfg); - for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) - qos->up2tc[i] = (up2tc >> (i * 3)) & 0x7; - - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; - - qos->pfc_mode = dcbx_cfg->pfc_mode; - if (qos->pfc_mode == IIDC_DSCP_PFC_MODE) - for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++) - qos->dscp_map[i] = dcbx_cfg->dscp_map[i]; -} -EXPORT_SYMBOL_GPL(ice_get_qos_params); + struct msi_map map; + struct ice_pf *pf; -int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) -{ - struct msi_map map = ice_alloc_irq(pf, true); + if (WARN_ON(!cdev)) + return -EINVAL; + pf = pci_get_drvdata(cdev->pdev); + map = ice_alloc_irq(pf, true); if (map.index < 0) return -ENOMEM; @@ -244,12 +252,19 @@ EXPORT_SYMBOL_GPL(ice_alloc_rdma_qvector); /** * ice_free_rdma_qvector - free vector resources reserved for RDMA driver - * @pf: board private structure to initialize + * @cdev: pointer to iidc_rdma_core_dev_info struct * @entry: MSI-X entry to be removed */ -void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) +void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry) { struct msi_map map; + struct ice_pf *pf; + + if (WARN_ON(!cdev || !entry)) + return; + + pf = pci_get_drvdata(cdev->pdev); map.index = entry->entry; map.virq = entry->vector; @@ -263,19 +278,23 @@ EXPORT_SYMBOL_GPL(ice_free_rdma_qvector); */ static void ice_adev_release(struct device *dev) { - struct iidc_auxiliary_dev *iadev; + struct iidc_rdma_core_auxiliary_dev *iadev; - iadev = container_of(dev, struct iidc_auxiliary_dev, adev.dev); + iadev = container_of(dev, struct iidc_rdma_core_auxiliary_dev, + adev.dev); kfree(iadev); } /** * ice_plug_aux_dev - allocate and register AUX device * @pf: pointer to pf struct + * + * Return: Zero on success, error code on failure */ int ice_plug_aux_dev(struct ice_pf *pf) { - struct iidc_auxiliary_dev *iadev; + struct iidc_rdma_core_auxiliary_dev *iadev; + struct iidc_rdma_core_dev_info *cdev; struct auxiliary_device *adev; int ret; @@ -285,17 +304,22 @@ int ice_plug_aux_dev(struct ice_pf *pf) if (!ice_is_rdma_ena(pf)) return 0; + cdev = pf->cdev_info; + if (!cdev) + return -ENODEV; + iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); if (!iadev) return -ENOMEM; adev = &iadev->adev; - iadev->pf = pf; + iadev->cdev_info = cdev; adev->id = pf->aux_idx; adev->dev.release = ice_adev_release; adev->dev.parent = &pf->pdev->dev; - adev->name = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? "roce" : "iwarp"; + adev->name = cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2 ? + "roce" : "iwarp"; ret = auxiliary_device_init(adev); if (ret) { @@ -310,7 +334,7 @@ int ice_plug_aux_dev(struct ice_pf *pf) } mutex_lock(&pf->adev_mutex); - pf->adev = adev; + cdev->adev = adev; mutex_unlock(&pf->adev_mutex); return 0; @@ -324,8 +348,8 @@ void ice_unplug_aux_dev(struct ice_pf *pf) struct auxiliary_device *adev; mutex_lock(&pf->adev_mutex); - adev = pf->adev; - pf->adev = NULL; + adev = pf->cdev_info->adev; + pf->cdev_info->adev = NULL; mutex_unlock(&pf->adev_mutex); if (adev) { @@ -340,7 +364,9 @@ void ice_unplug_aux_dev(struct ice_pf *pf) */ int ice_init_rdma(struct ice_pf *pf) { + struct iidc_rdma_priv_dev_info *privd; struct device *dev = &pf->pdev->dev; + struct iidc_rdma_core_dev_info *cdev; int ret; if (!ice_is_rdma_ena(pf)) { @@ -348,22 +374,50 @@ int ice_init_rdma(struct ice_pf *pf) return 0; } + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return -ENOMEM; + + pf->cdev_info = cdev; + + privd = kzalloc(sizeof(*privd), GFP_KERNEL); + if (!privd) { + ret = -ENOMEM; + goto err_privd_alloc; + } + + privd->pf_id = pf->hw.pf_id; ret = xa_alloc(&ice_aux_id, &pf->aux_idx, NULL, XA_LIMIT(1, INT_MAX), GFP_KERNEL); if (ret) { dev_err(dev, "Failed to allocate device ID for AUX driver\n"); - return -ENOMEM; + ret = -ENOMEM; + goto err_alloc_xa; } - pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; + cdev->iidc_priv = privd; + privd->netdev = pf->vsi[0]->netdev; + + privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr; + cdev->pdev = pf->pdev; + privd->vport_id = pf->vsi[0]->vsi_num; + + pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; + ice_setup_dcb_qos_info(pf, &privd->qos_info); ret = ice_plug_aux_dev(pf); if (ret) goto err_plug_aux_dev; return 0; err_plug_aux_dev: - pf->adev = NULL; + pf->cdev_info->adev = NULL; xa_erase(&ice_aux_id, pf->aux_idx); +err_alloc_xa: + kfree(privd); +err_privd_alloc: + kfree(cdev); + pf->cdev_info = NULL; + return ret; } @@ -378,4 +432,7 @@ void ice_deinit_rdma(struct ice_pf *pf) ice_unplug_aux_dev(pf); xa_erase(&ice_aux_id, pf->aux_idx); + kfree(pf->cdev_info->iidc_priv); + kfree(pf->cdev_info); + pf->cdev_info = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h index 4b0c86757df9..17dbfcfb6a2a 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc_int.h +++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h @@ -4,10 +4,11 @@ #ifndef _ICE_IDC_INT_H_ #define _ICE_IDC_INT_H_ -#include <linux/net/intel/iidc.h> +#include <linux/net/intel/iidc_rdma.h> +#include <linux/net/intel/iidc_rdma_ice.h> struct ice_pf; -void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event); +void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_rdma_event *event); #endif /* !_ICE_IDC_INT_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0bcf9d127ac9..03bb16191237 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2065,12 +2065,15 @@ static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) } /** - * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling + * ice_vsi_cfg_sw_lldp - Config switch rules for LLDP packet handling * @vsi: the VSI being configured * @tx: bool to determine Tx or Rx rule * @create: bool to determine create or remove Rule + * + * Adding an ethtype Tx rule to the uplink VSI results in it being applied + * to the whole port, so LLDP transmission for VFs will be blocked too. */ -void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) +void ice_vsi_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) { int (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag, enum ice_sw_fwd_act_type act); @@ -2085,19 +2088,59 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX, ICE_DROP_PACKET); } else { - if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) { - status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num, - create); - } else { + if (!test_bit(ICE_FLAG_LLDP_AQ_FLTR, pf->flags)) { status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI); + if (!status || !create) + goto report; + + dev_info(dev, + "Failed to add generic LLDP Rx filter on VSI %i error: %d, falling back to specialized AQ control\n", + vsi->vsi_num, status); } + + status = ice_lldp_fltr_add_remove(&pf->hw, vsi, create); + if (!status) + set_bit(ICE_FLAG_LLDP_AQ_FLTR, pf->flags); + } +report: if (status) - dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n", - create ? "adding" : "removing", tx ? "TX" : "RX", - vsi->vsi_num, status); + dev_warn(dev, "Failed to %s %s LLDP rule on VSI %i error: %d\n", + create ? "add" : "remove", tx ? "Tx" : "Rx", + vsi->vsi_num, status); +} + +/** + * ice_cfg_sw_rx_lldp - Enable/disable software handling of LLDP + * @pf: the PF being configured + * @enable: enable or disable + * + * Configure switch rules to enable/disable LLDP handling by software + * across PF. + */ +void ice_cfg_sw_rx_lldp(struct ice_pf *pf, bool enable) +{ + struct ice_vsi *vsi; + struct ice_vf *vf; + unsigned int bkt; + + vsi = ice_get_main_vsi(pf); + ice_vsi_cfg_sw_lldp(vsi, false, enable); + + if (!test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) + return; + + ice_for_each_vf(pf, bkt, vf) { + vsi = ice_get_vf_vsi(vf); + + if (WARN_ON(!vsi)) + continue; + + if (ice_vf_is_lldp_ena(vf)) + ice_vsi_cfg_sw_lldp(vsi, false, enable); + } } /** @@ -2528,7 +2571,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF) { ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX, ICE_DROP_PACKET); - ice_cfg_sw_lldp(vsi, true, true); + ice_vsi_cfg_sw_lldp(vsi, true, true); } if (!vsi->agg_node) @@ -2825,9 +2868,11 @@ int ice_vsi_release(struct ice_vsi *vsi) /* The Rx rule will only exist to remove if the LLDP FW * engine is currently stopped */ - if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF && - !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) - ice_cfg_sw_lldp(vsi, false, false); + if (!ice_is_safe_mode(pf) && + !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) && + (vsi->type == ICE_VSI_PF || (vsi->type == ICE_VSI_VF && + ice_vf_is_lldp_ena(vsi->vf)))) + ice_vsi_cfg_sw_lldp(vsi, false, false); ice_vsi_decfg(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b4c9cb28a016..654516c5fc3e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -29,7 +29,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi); -void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); +void ice_vsi_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); +void ice_cfg_sw_rx_lldp(struct ice_pf *pf, bool enable); int ice_set_link(struct ice_vsi *vsi, bool ena); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d390157b59fe..20d3baf955e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2401,11 +2401,11 @@ static void ice_service_task(struct work_struct *work) } if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { - struct iidc_event *event; + struct iidc_rdma_event *event; event = kzalloc(sizeof(*event), GFP_KERNEL); if (event) { - set_bit(IIDC_EVENT_CRIT_ERR, event->type); + set_bit(IIDC_RDMA_EVENT_CRIT_ERR, event->type); /* report the entire OICR value to AUX driver */ swap(event->reg, pf->oicr_err_reg); ice_send_event_to_aux(pf, event); @@ -2424,11 +2424,11 @@ static void ice_service_task(struct work_struct *work) ice_plug_aux_dev(pf); if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) { - struct iidc_event *event; + struct iidc_rdma_event *event; event = kzalloc(sizeof(*event), GFP_KERNEL); if (event) { - set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type); + set_bit(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, event->type); ice_send_event_to_aux(pf, event); kfree(event); } @@ -8330,11 +8330,16 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) * @np: net device to configure * @filter_dev: device on which filter is added * @cls_flower: offload data + * @ingress: if the rule is added to an ingress block + * + * Return: 0 if the flower was successfully added or deleted, + * negative error code otherwise. */ static int ice_setup_tc_cls_flower(struct ice_netdev_priv *np, struct net_device *filter_dev, - struct flow_cls_offload *cls_flower) + struct flow_cls_offload *cls_flower, + bool ingress) { struct ice_vsi *vsi = np->vsi; @@ -8343,7 +8348,7 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np, switch (cls_flower->command) { case FLOW_CLS_REPLACE: - return ice_add_cls_flower(filter_dev, vsi, cls_flower); + return ice_add_cls_flower(filter_dev, vsi, cls_flower, ingress); case FLOW_CLS_DESTROY: return ice_del_cls_flower(vsi, cls_flower); default: @@ -8352,20 +8357,46 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np, } /** - * ice_setup_tc_block_cb - callback handler registered for TC block + * ice_setup_tc_block_cb_ingress - callback handler for ingress TC block * @type: TC SETUP type * @type_data: TC flower offload data that contains user input * @cb_priv: netdev private data + * + * Return: 0 if the setup was successful, negative error code otherwise. */ static int -ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +ice_setup_tc_block_cb_ingress(enum tc_setup_type type, void *type_data, + void *cb_priv) { struct ice_netdev_priv *np = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: return ice_setup_tc_cls_flower(np, np->vsi->netdev, - type_data); + type_data, true); + default: + return -EOPNOTSUPP; + } +} + +/** + * ice_setup_tc_block_cb_egress - callback handler for egress TC block + * @type: TC SETUP type + * @type_data: TC flower offload data that contains user input + * @cb_priv: netdev private data + * + * Return: 0 if the setup was successful, negative error code otherwise. + */ +static int +ice_setup_tc_block_cb_egress(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct ice_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, np->vsi->netdev, + type_data, false); default: return -EOPNOTSUPP; } @@ -9310,27 +9341,45 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { struct ice_netdev_priv *np = netdev_priv(netdev); + enum flow_block_binder_type binder_type; + struct iidc_rdma_core_dev_info *cdev; struct ice_pf *pf = np->vsi->back; + flow_setup_cb_t *flower_handler; bool locked = false; int err; switch (type) { case TC_SETUP_BLOCK: + binder_type = + ((struct flow_block_offload *)type_data)->binder_type; + + switch (binder_type) { + case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS: + flower_handler = ice_setup_tc_block_cb_ingress; + break; + case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS: + flower_handler = ice_setup_tc_block_cb_egress; + break; + default: + return -EOPNOTSUPP; + } + return flow_block_cb_setup_simple(type_data, &ice_block_cb_list, - ice_setup_tc_block_cb, - np, np, true); + flower_handler, + np, np, false); case TC_SETUP_QDISC_MQPRIO: if (ice_is_eswitch_mode_switchdev(pf)) { netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n"); return -EOPNOTSUPP; } - if (pf->adev) { + cdev = pf->cdev_info; + if (cdev && cdev->adev) { mutex_lock(&pf->adev_mutex); - device_lock(&pf->adev->dev); + device_lock(&cdev->adev->dev); locked = true; - if (pf->adev->dev.driver) { + if (cdev->adev->dev.driver) { netdev_err(netdev, "Cannot change qdisc when RDMA is active\n"); err = -EBUSY; goto adev_unlock; @@ -9344,7 +9393,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, adev_unlock: if (locked) { - device_unlock(&pf->adev->dev); + device_unlock(&cdev->adev->dev); mutex_unlock(&pf->adev_mutex); } return err; @@ -9380,7 +9429,7 @@ ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: return ice_setup_tc_cls_flower(np, priv->netdev, (struct flow_cls_offload *) - type_data); + type_data, false); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1fd1ae03eb90..b79a148ed0f2 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -305,6 +305,9 @@ u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, u32 hi, lo, lo2; u8 tmr_idx; + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + tmr_idx = ice_get_ptp_src_clock_index(hw); guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); /* Read the system timestamp pre PHC read */ @@ -1624,14 +1627,6 @@ static int ice_ptp_cfg_extts(struct ice_pf *pf, struct ptp_extts_request *rq, int pin_desc_idx; u8 tmr_idx; - /* Reject requests with unsupported flags */ - - if (rq->flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; chan = rq->index; @@ -1802,9 +1797,6 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, struct ice_hw *hw = &pf->hw; int pin_desc_idx; - if (rq->flags & ~PTP_PEROUT_PHASE) - return -EOPNOTSUPP; - pin_desc_idx = ice_ptp_find_pin_idx(pf, PTP_PF_PEROUT, rq->index); if (pin_desc_idx < 0) return -EIO; @@ -2737,6 +2729,11 @@ static void ice_ptp_set_caps(struct ice_pf *pf) info->enable = ice_ptp_gpio_enable; info->verify = ice_verify_pin; + info->supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + info->supported_perout_flags = PTP_PEROUT_PHASE; + switch (pf->hw.mac_type) { case ICE_MAC_E810: ice_ptp_set_funcs_e810(pf); @@ -2986,6 +2983,32 @@ static void ice_ptp_periodic_work(struct kthread_work *work) } /** + * ice_ptp_prepare_rebuild_sec - Prepare second NAC for PTP reset or rebuild + * @pf: Board private structure + * @rebuild: rebuild if true, prepare if false + * @reset_type: the reset type being performed + */ +static void ice_ptp_prepare_rebuild_sec(struct ice_pf *pf, bool rebuild, + enum ice_reset_req reset_type) +{ + struct list_head *entry; + + list_for_each(entry, &pf->adapter->ports.ports) { + struct ice_ptp_port *port = list_entry(entry, + struct ice_ptp_port, + list_node); + struct ice_pf *peer_pf = ptp_port_to_pf(port); + + if (!ice_is_primary(&peer_pf->hw)) { + if (rebuild) + ice_ptp_rebuild(peer_pf, reset_type); + else + ice_ptp_prepare_for_reset(peer_pf, reset_type); + } + } +} + +/** * ice_ptp_prepare_for_reset - Prepare PTP for reset * @pf: Board private structure * @reset_type: the reset type being performed @@ -2993,6 +3016,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_ptp *ptp = &pf->ptp; + struct ice_hw *hw = &pf->hw; u8 src_tmr; if (ptp->state != ICE_PTP_READY) @@ -3008,6 +3032,9 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (reset_type == ICE_RESET_PFR) return; + if (ice_pf_src_tmr_owned(pf) && hw->mac_type == ICE_MAC_GENERIC_3K_E825) + ice_ptp_prepare_rebuild_sec(pf, false, reset_type); + ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); /* Disable periodic outputs */ @@ -3129,13 +3156,6 @@ err: dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err); } -static bool ice_is_primary(struct ice_hw *hw) -{ - return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) ? - !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : - true; -} - static int ice_ptp_setup_adapter(struct ice_pf *pf) { if (!ice_pf_src_tmr_owned(pf) || !ice_is_primary(&pf->hw)) @@ -3355,17 +3375,16 @@ void ice_ptp_init(struct ice_pf *pf) { struct ice_ptp *ptp = &pf->ptp; struct ice_hw *hw = &pf->hw; - int lane_num, err; + int err; ptp->state = ICE_PTP_INITIALIZING; - lane_num = ice_get_phy_lane_number(hw); - if (lane_num < 0) { - err = lane_num; + if (hw->lane_num < 0) { + err = hw->lane_num; goto err_exit; } + ptp->port.port_num = hw->lane_num; - ptp->port.port_num = (u8)lane_num; ice_ptp_init_hw(hw); ice_ptp_init_tx_interrupt_mode(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 89bb8461284a..ccac84eb34c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -240,7 +240,7 @@ static int ice_read_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input cgu_msg = { .opcode = ice_sbq_msg_rd, - .dest_dev = cgu, + .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr }; int err; @@ -272,7 +272,7 @@ static int ice_write_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input cgu_msg = { .opcode = ice_sbq_msg_wr, - .dest_dev = cgu, + .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr, .data = val }; @@ -874,8 +874,12 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw, */ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 cmd_val = ice_ptp_tmr_cmd_to_src_reg(hw, cmd); + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + wr32(hw, GLTSYN_CMD, cmd_val); } @@ -891,6 +895,9 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) { struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); ice_flush(hw); @@ -919,16 +926,24 @@ static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay) * * Return: destination sideband queue PHY device. */ -static enum ice_sbq_msg_dev ice_ptp_get_dest_dev_e825(struct ice_hw *hw, - u8 port) +static enum ice_sbq_dev_id ice_ptp_get_dest_dev_e825(struct ice_hw *hw, + u8 port) { - /* On a single complex E825, PHY 0 is always destination device phy_0 + u8 curr_phy, tgt_phy; + + tgt_phy = port >= hw->ptp.ports_per_phy; + curr_phy = hw->lane_num >= hw->ptp.ports_per_phy; + /* In the driver, lanes 4..7 are in fact 0..3 on a second PHY. + * On a single complex E825C, PHY 0 is always destination device phy_0 * and PHY 1 is phy_0_peer. + * On dual complex E825C, device phy_0 points to PHY on a current + * complex and phy_0_peer to PHY on a different complex. */ - if (port >= hw->ptp.ports_per_phy) - return eth56g_phy_1; + if ((!ice_is_dual(hw) && tgt_phy == 1) || + (ice_is_dual(hw) && tgt_phy != curr_phy)) + return ice_sbq_dev_phy_0_peer; else - return eth56g_phy_0; + return ice_sbq_dev_phy_0; } /** @@ -2417,6 +2432,7 @@ int ice_phy_cfg_intr_eth56g(struct ice_hw *hw, u8 port, bool ena, u8 threshold) static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port, u64 *phy_time, u64 *phc_time) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u64 tx_time, rx_time; u32 zo, lo; u8 tmr_idx; @@ -2436,8 +2452,13 @@ static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port, ice_ptp_exec_tmr_cmd(hw); /* Read the captured PHC time from the shadow time registers */ - zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx)); - lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx)); + if (ice_is_primary(hw)) { + zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx)); + lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx)); + } else { + zo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_0(tmr_idx)); + lo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_L(tmr_idx)); + } *phc_time = (u64)lo << 32 | zo; /* Read the captured PHY time from the PHY shadow registers */ @@ -2574,6 +2595,7 @@ int ice_stop_phy_timer_eth56g(struct ice_hw *hw, u8 port, bool soft_reset) */ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 lo, hi; u64 incval; u8 tmr_idx; @@ -2599,8 +2621,13 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) if (err) return err; - lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); - hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + if (ice_is_primary(hw)) { + lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + } else { + lo = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_H(tmr_idx)); + } incval = (u64)hi << 32 | lo; err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval); @@ -2631,25 +2658,6 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) } /** - * ice_sb_access_ena_eth56g - Enable SB devices (PHY and others) access - * @hw: pointer to HW struct - * @enable: Enable or disable access - * - * Enable sideband devices (PHY and others) access. - */ -static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable) -{ - u32 val = rd32(hw, PF_SB_REM_DEV_CTL); - - if (enable) - val |= BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1); - else - val &= ~(BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1)); - - wr32(hw, PF_SB_REM_DEV_CTL, val); -} - -/** * ice_ptp_init_phc_e825 - Perform E825 specific PHC initialization * @hw: pointer to HW struct * @@ -2659,8 +2667,6 @@ static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable) */ static int ice_ptp_init_phc_e825(struct ice_hw *hw) { - ice_sb_access_ena_eth56g(hw, true); - /* Initialize the Clock Generation Unit */ return ice_init_cgu_e82x(hw); } @@ -2747,8 +2753,6 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw) params->num_phys = 2; ptp->ports_per_phy = 4; ptp->num_lports = params->num_phys * ptp->ports_per_phy; - - ice_sb_access_ena_eth56g(hw, true); } /* E822 family functions @@ -2781,7 +2785,7 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw, msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port); } - msg->dest_dev = rmn_0; + msg->dest_dev = ice_sbq_dev_phy_0; } /** @@ -3104,7 +3108,7 @@ static int ice_fill_quad_msg_e82x(struct ice_hw *hw, if (quad >= ICE_GET_QUAD_NUM(hw->ptp.num_lports)) return -EINVAL; - msg->dest_dev = rmn_0; + msg->dest_dev = ice_sbq_dev_phy_0; if (!(quad % ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy))) addr = Q_0_BASE + offset; @@ -4823,7 +4827,7 @@ static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val) msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); msg.opcode = ice_sbq_msg_rd; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { @@ -4853,7 +4857,7 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); msg.opcode = ice_sbq_msg_wr; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; msg.data = val; err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index e5925ccc2613..83f20fa7ace7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -444,11 +444,6 @@ static inline u64 ice_get_base_incval(struct ice_hw *hw) } } -static inline bool ice_is_dual(struct ice_hw *hw) -{ - return !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M); -} - #define PFTSYN_SEM_BYTES 4 #define ICE_PTP_CLOCK_INDEX_0 0x00 diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index fb7a1b9a4313..cb08746556a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -219,7 +219,8 @@ ice_repr_setup_tc_cls_flower(struct ice_repr *repr, { switch (flower->command) { case FLOW_CLS_REPLACE: - return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower); + return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower, + true); case FLOW_CLS_DESTROY: return ice_del_cls_flower(repr->src_vsi, flower); default: @@ -336,6 +337,7 @@ void ice_repr_destroy(struct ice_repr *repr) static void ice_repr_rem_vf(struct ice_repr *repr) { ice_eswitch_decfg_vsi(repr->src_vsi, repr->parent_mac); + ice_pass_vf_tx_lldp(repr->src_vsi, true); unregister_netdev(repr->netdev); ice_devlink_destroy_vf_port(repr->vf); ice_virtchnl_set_dflt_ops(repr->vf); @@ -417,6 +419,10 @@ static int ice_repr_add_vf(struct ice_repr *repr) if (err) goto err_netdev; + err = ice_drop_vf_tx_lldp(repr->src_vsi, true); + if (err) + goto err_drop_lldp; + err = ice_eswitch_cfg_vsi(repr->src_vsi, repr->parent_mac); if (err) goto err_cfg_vsi; @@ -429,6 +435,8 @@ static int ice_repr_add_vf(struct ice_repr *repr) return 0; err_cfg_vsi: + ice_pass_vf_tx_lldp(repr->src_vsi, true); +err_drop_lldp: unregister_netdev(repr->netdev); err_netdev: ice_devlink_destroy_vf_port(vf); diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 3b0054faf70c..183dd5457d6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -46,13 +46,10 @@ struct ice_sbq_evt_desc { u8 data[24]; }; -enum ice_sbq_msg_dev { - eth56g_phy_0 = 0x02, - rmn_0 = 0x02, - rmn_1 = 0x03, - rmn_2 = 0x04, - cgu = 0x06, - eth56g_phy_1 = 0x0D, +enum ice_sbq_dev_id { + ice_sbq_dev_phy_0 = 0x02, + ice_sbq_dev_cgu = 0x06, + ice_sbq_dev_phy_0_peer = 0x0D, }; enum ice_sbq_msg_opcode { diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index f1648cf103b7..0e4dc1a5cff0 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -63,6 +63,7 @@ static void ice_free_vf_res(struct ice_vf *vf) if (vf->lan_vsi_idx != ICE_NO_VSI) { ice_vf_vsi_release(vf); vf->num_mac = 0; + vf->num_mac_lldp = 0; } last_vector_idx = vf->first_vector_idx + vf->num_msix - 1; @@ -1402,6 +1403,9 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) mutex_lock(&vf->cfg_lock); + while (!trusted && vf->num_mac_lldp) + ice_vf_update_mac_lldp_num(vf, ice_get_vf_vsi(vf), false); + vf->trusted = trusted; ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 4a91e0aaf0a5..9d9a7edd3618 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3146,7 +3146,7 @@ ice_add_update_vsi_list(struct ice_hw *hw, u16 vsi_handle_arr[2]; /* A rule already exists with the new VSI being added */ - if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id) + if (cur_fltr->vsi_handle == new_fltr->vsi_handle) return -EEXIST; vsi_handle_arr[0] = cur_fltr->vsi_handle; @@ -5978,7 +5978,7 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw, /* A rule already exists with the new VSI being added */ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map)) - return 0; + return -EEXIST; /* Update the previously created VSI list set with * the new VSI ID passed in diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index ea39b999a0d0..fb9ea7f8ef44 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -12,14 +12,11 @@ /** * ice_tc_count_lkups - determine lookup count for switch filter * @flags: TC-flower flags - * @headers: Pointer to TC flower filter header structure * @fltr: Pointer to outer TC filter structure * - * Determine lookup count based on TC flower input for switch filter. + * Return: lookup count based on TC flower input for a switch filter. */ -static int -ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, - struct ice_tc_flower_fltr *fltr) +static int ice_tc_count_lkups(u32 flags, struct ice_tc_flower_fltr *fltr) { int lkups_cnt = 1; /* 0th lookup is metadata */ @@ -684,26 +681,26 @@ static int ice_tc_setup_action(struct net_device *filter_dev, fltr->action.fltr_act = action; if (ice_is_port_repr_netdev(filter_dev) && - ice_is_port_repr_netdev(target_dev)) { + ice_is_port_repr_netdev(target_dev) && + fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { repr = ice_netdev_to_repr(target_dev); fltr->dest_vsi = repr->src_vsi; - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; } else if (ice_is_port_repr_netdev(filter_dev) && - ice_tc_is_dev_uplink(target_dev)) { + ice_tc_is_dev_uplink(target_dev) && + fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { repr = ice_netdev_to_repr(filter_dev); fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi; - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; } else if (ice_tc_is_dev_uplink(filter_dev) && - ice_is_port_repr_netdev(target_dev)) { + ice_is_port_repr_netdev(target_dev) && + fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { repr = ice_netdev_to_repr(target_dev); fltr->dest_vsi = repr->src_vsi; - fltr->direction = ICE_ESWITCH_FLTR_INGRESS; } else { NL_SET_ERR_MSG_MOD(fltr->extack, - "Unsupported netdevice in switchdev mode"); + "The action is not supported for this netdevice"); return -EINVAL; } @@ -716,13 +713,11 @@ ice_tc_setup_drop_action(struct net_device *filter_dev, { fltr->action.fltr_act = ICE_DROP_PACKET; - if (ice_is_port_repr_netdev(filter_dev)) { - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; - } else if (ice_tc_is_dev_uplink(filter_dev)) { - fltr->direction = ICE_ESWITCH_FLTR_INGRESS; - } else { + if (!ice_tc_is_dev_uplink(filter_dev) && + !(ice_is_port_repr_netdev(filter_dev) && + fltr->direction == ICE_ESWITCH_FLTR_INGRESS)) { NL_SET_ERR_MSG_MOD(fltr->extack, - "Unsupported netdevice in switchdev mode"); + "The action is not supported for this netdevice"); return -EINVAL; } @@ -767,10 +762,157 @@ static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, return 0; } +static bool ice_is_fltr_lldp(struct ice_tc_flower_fltr *fltr) +{ + return fltr->outer_headers.l2_key.n_proto == htons(ETH_P_LLDP); +} + +static bool ice_is_fltr_pf_tx_lldp(struct ice_tc_flower_fltr *fltr) +{ + struct ice_vsi *vsi = fltr->src_vsi, *uplink; + + if (!ice_is_switchdev_running(vsi->back)) + return false; + + uplink = vsi->back->eswitch.uplink_vsi; + return vsi == uplink && fltr->action.fltr_act == ICE_DROP_PACKET && + ice_is_fltr_lldp(fltr) && + fltr->direction == ICE_ESWITCH_FLTR_EGRESS && + fltr->flags == ICE_TC_FLWR_FIELD_ETH_TYPE_ID; +} + +static bool ice_is_fltr_vf_tx_lldp(struct ice_tc_flower_fltr *fltr) +{ + struct ice_vsi *vsi = fltr->src_vsi, *uplink; + + uplink = vsi->back->eswitch.uplink_vsi; + return fltr->src_vsi->type == ICE_VSI_VF && ice_is_fltr_lldp(fltr) && + fltr->direction == ICE_ESWITCH_FLTR_EGRESS && + fltr->dest_vsi == uplink; +} + +static struct ice_tc_flower_fltr * +ice_find_pf_tx_lldp_fltr(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + + hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node) + if (ice_is_fltr_pf_tx_lldp(fltr)) + return fltr; + + return NULL; +} + +static bool ice_any_vf_lldp_tx_ena(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + + ice_for_each_vf(pf, bkt, vf) + if (vf->lldp_tx_ena) + return true; + + return false; +} + +int ice_pass_vf_tx_lldp(struct ice_vsi *vsi, bool deinit) +{ + struct ice_rule_query_data remove_entry = { + .rid = vsi->vf->lldp_recipe_id, + .rule_id = vsi->vf->lldp_rule_id, + .vsi_handle = vsi->idx, + }; + struct ice_pf *pf = vsi->back; + int err; + + if (vsi->vf->lldp_tx_ena) + return 0; + + if (!deinit && !ice_find_pf_tx_lldp_fltr(vsi->back)) + return -EINVAL; + + if (!deinit && ice_any_vf_lldp_tx_ena(pf)) + return -EINVAL; + + err = ice_rem_adv_rule_by_id(&pf->hw, &remove_entry); + if (!err) + vsi->vf->lldp_tx_ena = true; + + return err; +} + +int ice_drop_vf_tx_lldp(struct ice_vsi *vsi, bool init) +{ + struct ice_rule_query_data rule_added; + struct ice_adv_rule_info rinfo = { + .priority = 7, + .src_vsi = vsi->idx, + .sw_act = { + .src = vsi->idx, + .flag = ICE_FLTR_TX, + .fltr_act = ICE_DROP_PACKET, + .vsi_handle = vsi->idx, + }, + .flags_info.act_valid = true, + }; + struct ice_adv_lkup_elem list[3]; + struct ice_pf *pf = vsi->back; + int err; + + if (!init && !vsi->vf->lldp_tx_ena) + return 0; + + memset(list, 0, sizeof(list)); + ice_rule_add_direction_metadata(&list[0]); + ice_rule_add_src_vsi_metadata(&list[1]); + list[2].type = ICE_ETYPE_OL; + list[2].h_u.ethertype.ethtype_id = htons(ETH_P_LLDP); + list[2].m_u.ethertype.ethtype_id = htons(0xFFFF); + + err = ice_add_adv_rule(&pf->hw, list, ARRAY_SIZE(list), &rinfo, + &rule_added); + if (err) { + dev_err(&pf->pdev->dev, + "Failed to add an LLDP rule to VSI 0x%X: %d\n", + vsi->idx, err); + } else { + vsi->vf->lldp_recipe_id = rule_added.rid; + vsi->vf->lldp_rule_id = rule_added.rule_id; + vsi->vf->lldp_tx_ena = false; + } + + return err; +} + +static void ice_handle_add_pf_lldp_drop_rule(struct ice_vsi *vsi) +{ + struct ice_tc_flower_fltr *fltr; + struct ice_pf *pf = vsi->back; + + hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node) { + if (!ice_is_fltr_vf_tx_lldp(fltr)) + continue; + ice_pass_vf_tx_lldp(fltr->src_vsi, true); + break; + } +} + +static void ice_handle_del_pf_lldp_drop_rule(struct ice_pf *pf) +{ + int i; + + /* Make the VF LLDP fwd to uplink rule dormant */ + ice_for_each_vsi(pf, i) { + struct ice_vsi *vf_vsi = pf->vsi[i]; + + if (vf_vsi && vf_vsi->type == ICE_VSI_VF) + ice_drop_vf_tx_lldp(vf_vsi, false); + } +} + static int ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { - struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; struct ice_adv_rule_info rule_info = { 0 }; struct ice_rule_query_data rule_added; struct ice_hw *hw = &vsi->back->hw; @@ -785,7 +927,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) return -EOPNOTSUPP; } - lkups_cnt = ice_tc_count_lkups(flags, headers, fltr); + if (ice_is_fltr_vf_tx_lldp(fltr)) + return ice_pass_vf_tx_lldp(vsi, false); + + lkups_cnt = ice_tc_count_lkups(flags, fltr); list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); if (!list) return -ENOMEM; @@ -814,6 +959,11 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.sw_act.src = hw->pf_id; rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS && + !fltr->dest_vsi && vsi == vsi->back->eswitch.uplink_vsi) { + /* PF to Uplink */ + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + } else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS && fltr->dest_vsi == vsi->back->eswitch.uplink_vsi) { /* VF to Uplink */ rule_info.sw_act.flag |= ICE_FLTR_TX; @@ -846,11 +996,17 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist"); ret = -EINVAL; goto exit; + } else if (ret == -ENOSPC) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter: insufficient space available."); + goto exit; } else if (ret) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error"); goto exit; } + if (ice_is_fltr_pf_tx_lldp(fltr)) + ice_handle_add_pf_lldp_drop_rule(vsi); + /* store the output params, which are needed later for removing * advanced switch filter */ @@ -985,7 +1141,6 @@ static int ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) { - struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; struct ice_adv_rule_info rule_info = {0}; struct ice_rule_query_data rule_added; struct ice_adv_lkup_elem *list; @@ -1021,7 +1176,7 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, return PTR_ERR(dest_vsi); } - lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); + lkups_cnt = ice_tc_count_lkups(flags, tc_fltr); list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); if (!list) return -ENOMEM; @@ -1056,8 +1211,13 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, tc_fltr->action.fwd.q.hw_queue, lkups_cnt); break; case ICE_DROP_PACKET: - rule_info.sw_act.flag |= ICE_FLTR_RX; - rule_info.sw_act.src = hw->pf_id; + if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + } else { + rule_info.sw_act.flag |= ICE_FLTR_RX; + rule_info.sw_act.src = hw->pf_id; + } rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; break; default: @@ -1071,6 +1231,10 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, "Unable to add filter because it already exist"); ret = -EINVAL; goto exit; + } else if (ret == -ENOSPC) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter: insufficient space available."); + goto exit; } else if (ret) { NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter due to error"); @@ -1463,11 +1627,16 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, * @filter_dev: Pointer to device on which filter is being added * @f: Pointer to struct flow_cls_offload * @fltr: Pointer to filter structure + * @ingress: if the rule is added to an ingress block + * + * Return: 0 if the flower was parsed successfully, -EINVAL if the flower + * cannot be parsed, -EOPNOTSUPP if such filter cannot be configured + * for the given VSI. */ static int ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, struct flow_cls_offload *f, - struct ice_tc_flower_fltr *fltr) + struct ice_tc_flower_fltr *fltr, bool ingress) { struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; struct flow_rule *rule = flow_cls_offload_flow_rule(f); @@ -1551,6 +1720,20 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID; } + if (!ingress) { + bool switchdev = + ice_is_eswitch_mode_switchdev(vsi->back); + + if (switchdev != (n_proto_key == ETH_P_LLDP)) { + NL_SET_ERR_MSG_FMT_MOD(fltr->extack, + "%sLLDP filtering is not supported on egress in %s mode", + switchdev ? "Non-" : "", + switchdev ? "switchdev" : + "legacy"); + return -EOPNOTSUPP; + } + } + headers->l2_key.n_proto = cpu_to_be16(n_proto_key); headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask); headers->l3_key.ip_proto = match.key->ip_proto; @@ -1726,6 +1909,14 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, return -EINVAL; } } + + /* Ingress filter on representor results in an egress filter in HW + * and vice versa + */ + ingress = ice_is_port_repr_netdev(filter_dev) ? !ingress : ingress; + fltr->direction = ingress ? ICE_ESWITCH_FLTR_INGRESS : + ICE_ESWITCH_FLTR_EGRESS; + return 0; } @@ -1939,6 +2130,12 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) struct ice_pf *pf = vsi->back; int err; + if (ice_is_fltr_pf_tx_lldp(fltr)) + ice_handle_del_pf_lldp_drop_rule(pf); + + if (ice_is_fltr_vf_tx_lldp(fltr)) + return ice_drop_vf_tx_lldp(vsi, false); + rule_rem.rid = fltr->rid; rule_rem.rule_id = fltr->rule_id; rule_rem.vsi_handle = fltr->dest_vsi_handle; @@ -1975,14 +2172,18 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) * @vsi: Pointer to VSI * @f: Pointer to flower offload structure * @__fltr: Pointer to struct ice_tc_flower_fltr + * @ingress: if the rule is added to an ingress block * * This function parses TC-flower input fields, parses action, * and adds a filter. + * + * Return: 0 if the filter was successfully added, + * negative error code otherwise. */ static int ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *f, - struct ice_tc_flower_fltr **__fltr) + struct ice_tc_flower_fltr **__fltr, bool ingress) { struct ice_tc_flower_fltr *fltr; int err; @@ -1999,7 +2200,7 @@ ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi, fltr->src_vsi = vsi; INIT_HLIST_NODE(&fltr->tc_flower_node); - err = ice_parse_cls_flower(netdev, vsi, f, fltr); + err = ice_parse_cls_flower(netdev, vsi, f, fltr, ingress); if (err < 0) goto err; @@ -2042,10 +2243,13 @@ ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie) * @netdev: Pointer to filter device * @vsi: Pointer to VSI * @cls_flower: Pointer to flower offload structure + * @ingress: if the rule is added to an ingress block + * + * Return: 0 if the flower was successfully added, + * negative error code otherwise. */ -int -ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, - struct flow_cls_offload *cls_flower) +int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, bool ingress) { struct netlink_ext_ack *extack = cls_flower->common.extack; struct net_device *vsi_netdev = vsi->netdev; @@ -2080,7 +2284,7 @@ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, } /* prep and add TC-flower filter in HW */ - err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr); + err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr, ingress); if (err) return err; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index d84f153517ec..8a3ab2f22af9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -211,13 +211,14 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) } struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue); -int -ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, - struct flow_cls_offload *cls_flower); -int -ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); +int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, bool ingress); +int ice_del_cls_flower(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower); void ice_replay_tc_fltrs(struct ice_pf *pf); bool ice_is_tunnel_supported(struct net_device *dev); +int ice_drop_vf_tx_lldp(struct ice_vsi *vsi, bool init); +int ice_pass_vf_tx_lldp(struct ice_vsi *vsi, bool deinit); static inline bool ice_is_forward_action(enum ice_sw_fwd_act_type fltr_act) { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 1e4f6f6ee449..0e5107fe62ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2440,19 +2440,20 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ eth = (struct ethhdr *)skb_mac_header(skb); - if (unlikely((skb->priority == TC_PRIO_CONTROL || - eth->h_proto == htons(ETH_P_LLDP)) && - vsi->type == ICE_VSI_PF && - vsi->port_info->qos_cfg.is_sw_lldp)) - offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | - ICE_TX_CTX_DESC_SWTCH_UPLINK << - ICE_TXD_CTX_QW1_CMD_S); - ice_tstamp(tx_ring, skb, first, &offload); if ((ice_is_switchdev_running(vsi->back) || ice_lag_is_switchdev_running(vsi->back)) && vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); + else if (unlikely((skb->priority == TC_PRIO_CONTROL || + eth->h_proto == htons(ETH_P_LLDP)) && + vsi->type == ICE_VSI_PF && + vsi->port_info->qos_cfg.is_sw_lldp)) + offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | + ICE_TX_CTX_DESC_SWTCH_UPLINK << + ICE_TXD_CTX_QW1_CMD_S); + + ice_tstamp(tx_ring, skb, first, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { struct ice_tx_ctx_desc *cdesc; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 0aab21113cc4..3d68f465952d 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -19,6 +19,7 @@ #include "ice_vlan_mode.h" #include "ice_fwlog.h" #include <linux/wait.h> +#include <net/dscp.h> static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) { @@ -695,7 +696,6 @@ struct ice_dcb_app_priority_table { #define ICE_MAX_USER_PRIORITY 8 #define ICE_DCBX_MAX_APPS 64 -#define ICE_DSCP_NUM_VAL 64 #define ICE_LLDPDU_SIZE 1500 #define ICE_TLV_STATUS_OPER 0x1 #define ICE_TLV_STATUS_SYNC 0x2 @@ -718,9 +718,9 @@ struct ice_dcbx_cfg { u8 pfc_mode; struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS]; /* when DSCP mapping defined by user set its bit to 1 */ - DECLARE_BITMAP(dscp_mapped, ICE_DSCP_NUM_VAL); + DECLARE_BITMAP(dscp_mapped, DSCP_MAX); /* array holding DSCP -> UP/TC values for DSCP L3 QoS mode */ - u8 dscp_map[ICE_DSCP_NUM_VAL]; + u8 dscp_map[DSCP_MAX]; u8 dcbx_mode; #define ICE_DCBX_MODE_CEE 0x1 #define ICE_DCBX_MODE_IEEE 0x2 @@ -970,6 +970,7 @@ struct ice_hw { u8 intrl_gran; struct ice_ptp_hw ptp; + s8 lane_num; /* Active package version (currently active) */ struct ice_pkg_ver active_pkg_ver; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 815ad0bfe832..48cd533e93b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -226,6 +226,7 @@ static void ice_vf_clear_counters(struct ice_vf *vf) vsi->num_vlan = 0; vf->num_mac = 0; + vf->num_mac_lldp = 0; memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); } @@ -1401,3 +1402,28 @@ struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi) rcu_read_unlock(); return ctrl_vsi; } + +/** + * ice_vf_update_mac_lldp_num - update the VF's number of LLDP addresses + * @vf: a VF to add the address to + * @vsi: the corresponding VSI + * @incr: is the rule added or removed + */ +void ice_vf_update_mac_lldp_num(struct ice_vf *vf, struct ice_vsi *vsi, + bool incr) +{ + bool lldp_by_fw = test_bit(ICE_FLAG_FW_LLDP_AGENT, vsi->back->flags); + bool was_ena = ice_vf_is_lldp_ena(vf) && !lldp_by_fw; + bool is_ena; + + if (WARN_ON(!vsi)) { + vf->num_mac_lldp = 0; + return; + } + + vf->num_mac_lldp += incr ? 1 : -1; + is_ena = ice_vf_is_lldp_ena(vf) && !lldp_by_fw; + + if (was_ena != is_ena) + ice_vsi_cfg_sw_lldp(vsi, false, is_ena); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 799b2c1f1184..482f4285fd35 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -124,6 +124,7 @@ struct ice_vf { u8 spoofchk:1; u8 link_forced:1; u8 link_up:1; /* only valid if VF link is forced */ + u8 lldp_tx_ena:1; u32 ptp_caps; @@ -134,6 +135,7 @@ struct ice_vf { unsigned long vf_caps; /* VF's adv. capabilities */ u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; + u16 num_mac_lldp; u16 num_vf_qs; /* num of queue configured per VF */ u8 vlan_strip_ena; /* Outer and Inner VLAN strip enable */ #define ICE_INNER_VLAN_STRIP_ENA BIT(0) @@ -149,6 +151,9 @@ struct ice_vf { /* devlink port data */ struct devlink_port devlink_port; + u16 lldp_recipe_id; + u16 lldp_rule_id; + u16 num_msix; /* num of MSI-X configured on this VF */ struct ice_vf_qs_bw qs_bw[ICE_MAX_RSS_QS_PER_VF]; }; @@ -180,6 +185,11 @@ static inline u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf) return vf->port_vlan_info.tpid; } +static inline bool ice_vf_is_lldp_ena(struct ice_vf *vf) +{ + return vf->num_mac_lldp && vf->trusted; +} + /* VF Hash Table access functions * * These functions provide abstraction for interacting with the VF hash table. @@ -245,6 +255,8 @@ ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m); int ice_reset_vf(struct ice_vf *vf, u32 flags); void ice_reset_all_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi); +void ice_vf_update_mac_lldp_num(struct ice_vf *vf, struct ice_vsi *vsi, + bool incr); #else /* CONFIG_PCI_IOV */ static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd..b147f6cf2615 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -2266,6 +2266,51 @@ ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) } /** + * ice_is_mc_lldp_eth_addr - check if the given MAC is a multicast LLDP address + * @mac: address to check + * + * Return: true if the address is one of the three possible LLDP multicast + * addresses, false otherwise. + */ +static bool ice_is_mc_lldp_eth_addr(const u8 *mac) +{ + const u8 lldp_mac_base[] = {0x01, 0x80, 0xc2, 0x00, 0x00}; + + if (memcmp(mac, lldp_mac_base, sizeof(lldp_mac_base))) + return false; + + return (mac[5] == 0x0e || mac[5] == 0x03 || mac[5] == 0x00); +} + +/** + * ice_vc_can_add_mac - check if the VF is allowed to add a given MAC + * @vf: a VF to add the address to + * @mac: address to check + * + * Return: true if the VF is allowed to add such MAC address, false otherwise. + */ +static bool ice_vc_can_add_mac(const struct ice_vf *vf, const u8 *mac) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + + if (is_unicast_ether_addr(mac) && + !ice_can_vf_change_mac((struct ice_vf *)vf)) { + dev_err(dev, + "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); + return false; + } + + if (!vf->trusted && ice_is_mc_lldp_eth_addr(mac)) { + dev_warn(dev, + "An untrusted VF %u is attempting to configure an LLDP multicast address\n", + vf->vf_id); + return false; + } + + return true; +} + +/** * ice_vc_add_mac_addr - attempt to add the MAC address passed in * @vf: pointer to the VF info * @vsi: pointer to the VF's VSI @@ -2283,10 +2328,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, if (ether_addr_equal(mac_addr, vf->dev_lan_addr)) return 0; - if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) { - dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); + if (!ice_vc_can_add_mac(vf, mac_addr)) return -EPERM; - } ret = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI); if (ret == -EEXIST) { @@ -2301,6 +2344,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, return ret; } else { vf->num_mac++; + if (ice_is_mc_lldp_eth_addr(mac_addr)) + ice_vf_update_mac_lldp_num(vf, vsi, true); } ice_vfhw_mac_add(vf, vc_ether_addr); @@ -2395,6 +2440,8 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, ice_vfhw_mac_del(vf, vc_ether_addr); vf->num_mac--; + if (ice_is_mc_lldp_eth_addr(mac_addr)) + ice_vf_update_mac_lldp_num(vf, vsi, false); return 0; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 82f09b4030bc..6b8ce93e531e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -144,22 +144,6 @@ static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) } /** - * idpf_set_mb_vec_id - Set vector index for mailbox - * @adapter: adapter structure to access the vector chunks - * - * The first vector id in the requested vector chunks from the CP is for - * the mailbox - */ -static void idpf_set_mb_vec_id(struct idpf_adapter *adapter) -{ - if (adapter->req_vec_chunks) - adapter->mb_vector.v_idx = - le16_to_cpu(adapter->caps.mailbox_vector_id); - else - adapter->mb_vector.v_idx = 0; -} - -/** * idpf_mb_intr_init - Initialize the mailbox interrupt * @adapter: adapter structure to store the mailbox vector */ @@ -349,7 +333,7 @@ int idpf_intr_req(struct idpf_adapter *adapter) goto free_irq; } - idpf_set_mb_vec_id(adapter); + adapter->mb_vector.v_idx = le16_to_cpu(adapter->caps.mailbox_vector_id); vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL); if (!vecids) { diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index eae1b6f474e6..2e356dd10812 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -891,7 +891,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor * @rx_desc: the descriptor to process * @fields: storage for extracted values - * @ptype: pointer that will store packet type * * Decode the Rx descriptor and extract relevant information including the * size and Rx packet type. @@ -901,21 +900,20 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, */ static void idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, - struct libeth_rqe_info *fields, u32 *ptype) + struct libeth_rqe_info *fields) { u64 qword; qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); - *ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); + fields->ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); } /** * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor * @rx_desc: the descriptor to process * @fields: storage for extracted values - * @ptype: pointer that will store packet type * * Decode the Rx descriptor and extract relevant information including the * size and Rx packet type. @@ -925,12 +923,12 @@ idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, */ static void idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, - struct libeth_rqe_info *fields, u32 *ptype) + struct libeth_rqe_info *fields) { fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); - *ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, - le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); + fields->ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, + le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); } /** @@ -938,18 +936,17 @@ idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, * @rx_q: Rx descriptor queue * @rx_desc: the descriptor to process * @fields: storage for extracted values - * @ptype: pointer that will store packet type * */ static void idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, const union virtchnl2_rx_desc *rx_desc, - struct libeth_rqe_info *fields, u32 *ptype) + struct libeth_rqe_info *fields) { if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) - idpf_rx_singleq_extract_base_fields(rx_desc, fields, ptype); + idpf_rx_singleq_extract_base_fields(rx_desc, fields); else - idpf_rx_singleq_extract_flex_fields(rx_desc, fields, ptype); + idpf_rx_singleq_extract_flex_fields(rx_desc, fields); } /** @@ -972,7 +969,6 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) struct libeth_rqe_info fields = { }; union virtchnl2_rx_desc *rx_desc; struct idpf_rx_buf *rx_buf; - u32 ptype; /* get the Rx desc from Rx queue based on 'next_to_clean' */ rx_desc = &rx_q->rx[ntc]; @@ -993,7 +989,7 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) */ dma_rmb(); - idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields, &ptype); + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); rx_buf = &rx_q->rx_buf[ntc]; if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) @@ -1037,7 +1033,8 @@ skip_data: total_rx_bytes += skb->len; /* protocol */ - idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, ptype); + idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, + fields.ptype); /* send completed skb up the stack */ napi_gro_receive(rx_q->pp->p.napi, skb); diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 02f340280d20..f34ead8243e9 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -391,7 +391,8 @@ enum e1000_ring_flags_t { IGB_RING_FLAG_RX_LB_VLAN_BSWAP, IGB_RING_FLAG_TX_CTX_IDX, IGB_RING_FLAG_TX_DETECT_HANG, - IGB_RING_FLAG_TX_DISABLED + IGB_RING_FLAG_TX_DISABLED, + IGB_RING_FLAG_RX_ALLOC_FAILED, }; #define ring_uses_large_buffer(ring) \ @@ -722,6 +723,8 @@ enum igb_boards { extern char igb_driver_name[]; +void igb_set_queue_napi(struct igb_adapter *adapter, int q_idx, + struct napi_struct *napi); int igb_xmit_xdp_ring(struct igb_adapter *adapter, struct igb_ring *ring, struct xdp_frame *xdpf); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c646c71915f0..9e9a5900e6e5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -947,6 +947,9 @@ static int igb_request_msix(struct igb_adapter *adapter) q_vector); if (err) goto err_free; + + netif_napi_set_irq(&q_vector->napi, + adapter->msix_entries[vector].vector); } igb_configure_msix(adapter); @@ -1194,7 +1197,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, return -ENOMEM; /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll); + netif_napi_add_config(adapter->netdev, &q_vector->napi, igb_poll, + v_idx); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; @@ -2096,6 +2100,22 @@ static void igb_check_swap_media(struct igb_adapter *adapter) wr32(E1000_CTRL_EXT, ctrl_ext); } +void igb_set_queue_napi(struct igb_adapter *adapter, int vector, + struct napi_struct *napi) +{ + struct igb_q_vector *q_vector = adapter->q_vector[vector]; + + if (q_vector->rx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->rx.ring->queue_index, + NETDEV_QUEUE_TYPE_RX, napi); + + if (q_vector->tx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->tx.ring->queue_index, + NETDEV_QUEUE_TYPE_TX, napi); +} + /** * igb_up - Open the interface and prepare it to handle traffic * @adapter: board private structure @@ -2103,6 +2123,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) int igb_up(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int i; /* hardware has been reset, we need to reload some things */ @@ -2110,8 +2131,11 @@ int igb_up(struct igb_adapter *adapter) clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } if (adapter->flags & IGB_FLAG_HAS_MSIX) igb_configure_msix(adapter); @@ -2181,6 +2205,7 @@ void igb_down(struct igb_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) { if (adapter->q_vector[i]) { napi_synchronize(&adapter->q_vector[i]->napi); + igb_set_queue_napi(adapter, i, NULL); napi_disable(&adapter->q_vector[i]->napi); } } @@ -4113,8 +4138,9 @@ static int igb_sw_init(struct igb_adapter *adapter) static int __igb_open(struct net_device *netdev, bool resuming) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int err; int i; @@ -4166,8 +4192,11 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } /* Clear any pending interrupts. */ rd32(E1000_TSICR); @@ -5726,11 +5755,29 @@ no_wait: if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 eics = 0; - for (i = 0; i < adapter->num_q_vectors; i++) - eics |= adapter->q_vector[i]->eims_value; - wr32(E1000_EICS, eics); + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + struct igb_ring *rx_ring; + + if (!q_vector->rx.ring) + continue; + + rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + eics |= q_vector->eims_value; + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + } + } + if (eics) + wr32(E1000_EICS, eics); } else { - wr32(E1000_ICS, E1000_ICS_RXDMT0); + struct igb_ring *rx_ring = adapter->rx_ring[0]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + wr32(E1000_ICS, E1000_ICS_RXDMT0); + } } igb_spoof_check(adapter); @@ -9061,6 +9108,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_failed++; rx_buffer->pagecnt_bias++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; } @@ -9120,6 +9168,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, page = dev_alloc_pages(igb_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } @@ -9136,6 +9185,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, __free_pages(page, igb_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } @@ -9674,8 +9724,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); if (netif_running(netdev)) igb_down(adapter); + rtnl_unlock(); + pci_disable_device(pdev); /* Request a slot reset. */ @@ -9734,16 +9787,21 @@ static void igb_io_resume(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + rtnl_lock(); if (netif_running(netdev)) { if (!test_bit(__IGB_DOWN, &adapter->state)) { dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n"); + rtnl_unlock(); return; } + if (igb_up(adapter)) { dev_err(&pdev->dev, "igb_up failed after reset\n"); + rtnl_unlock(); return; } } + rtnl_unlock(); netif_device_attach(netdev); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index f323e1c1989f..793c96016288 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -502,13 +502,6 @@ static int igb_ptp_feature_enable_82580(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Both the rising and falling edge are timestamped */ if (rq->extts.flags & PTP_STRICT_FLAGS && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -658,13 +651,6 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests failing to enable both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -1356,6 +1342,9 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.n_per_out = IGB_N_PEROUT; adapter->ptp_caps.n_pins = IGB_N_SDP; adapter->ptp_caps.pps = 0; + adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; @@ -1378,6 +1367,9 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS; adapter->ptp_caps.n_per_out = IGB_N_PEROUT; adapter->ptp_caps.n_pins = IGB_N_SDP; + adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; adapter->ptp_caps.pps = 1; adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c index 157d43787fa0..5cf67ba29269 100644 --- a/drivers/net/ethernet/intel/igb/igb_xsk.c +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c @@ -415,6 +415,7 @@ int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; } diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 2f265c0959c7..859a15e4ccba 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -40,6 +40,11 @@ void igc_ethtool_set_ops(struct net_device *); #define IGC_MAX_TX_TSTAMP_REGS 4 +struct igc_fpe_t { + struct ethtool_mmsv mmsv; + u32 tx_min_frag_size; +}; + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -333,6 +338,8 @@ struct igc_adapter { struct timespec64 period; } perout[IGC_N_PEROUT]; + struct igc_fpe_t fpe; + /* LEDs */ struct mutex led_mutex; struct igc_led_classdev *leds; @@ -387,11 +394,11 @@ extern char igc_driver_name[]; #define IGC_FLAG_RX_LEGACY BIT(16) #define IGC_FLAG_TSN_QBV_ENABLED BIT(17) #define IGC_FLAG_TSN_QAV_ENABLED BIT(18) -#define IGC_FLAG_TSN_LEGACY_ENABLED BIT(19) +#define IGC_FLAG_TSN_PREEMPT_ENABLED BIT(19) #define IGC_FLAG_TSN_ANY_ENABLED \ (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED | \ - IGC_FLAG_TSN_LEGACY_ENABLED) + IGC_FLAG_TSN_PREEMPT_ENABLED) #define IGC_FLAG_RSS_FIELD_IPV4_UDP BIT(6) #define IGC_FLAG_RSS_FIELD_IPV6_UDP BIT(7) @@ -736,7 +743,10 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, u32 location); int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); - +void igc_disable_empty_addr_recv(struct igc_adapter *adapter); +int igc_enable_empty_addr_recv(struct igc_adapter *adapter); +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu); +void igc_flush_tx_descriptors(struct igc_ring *ring); void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index bf8cdfbba9ff..6320eabb72fe 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -49,6 +49,7 @@ struct igc_adv_tx_context_desc { #define IGC_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define IGC_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ #define IGC_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define IGC_ADVTXD_PAYLEN_MASK 0XFFFFC000 /* Adv desc PAYLEN mask */ #define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ #define IGC_RAR_ENTRIES 16 diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index d19325b0e6e0..7189dfc389ad 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -308,6 +308,8 @@ #define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */ #define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ #define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define IGC_TXD_POPTS_SMD_MASK 0x3000 /* Indicates whether it's SMD-V or SMD-R */ + #define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */ #define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */ #define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ @@ -363,6 +365,8 @@ #define IGC_SRRCTL_TIMER0SEL(timer) (((timer) & 0x3) << 17) /* Receive Descriptor bit definitions */ +#define IGC_RXD_STAT_SMD_TYPE_V 0x01 /* SMD-V Packet */ +#define IGC_RXD_STAT_SMD_TYPE_R 0x02 /* SMD-R Packet */ #define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ #define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */ #define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ @@ -372,7 +376,8 @@ #define IGC_RXDEXT_STATERR_LB 0x00040000 /* Advanced Receive Descriptor bit definitions */ -#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ +#define IGC_RXDADV_STAT_SMD_TYPE_MASK 0x06000 +#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ #define IGC_RXDEXT_STATERR_L4E 0x20000000 #define IGC_RXDEXT_STATERR_IPE 0x40000000 @@ -396,11 +401,47 @@ #define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ -#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ -#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ -#define IGC_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ - -#define IGC_TXPBSIZE_TSN 0x04145145 /* 5k bytes buffer for each queue */ +/* Mask for RX packet buffer size */ +#define IGC_RXPBSIZE_EXP_MASK GENMASK(5, 0) +#define IGC_BMC2OSPBSIZE_MASK GENMASK(11, 6) +#define IGC_RXPBSIZE_BE_MASK GENMASK(17, 12) +/* Mask for timestamp in RX buffer */ +#define IGC_RXPBS_CFG_TS_EN_MASK GENMASK(31, 31) +/* High-priority RX packet buffer size (KB). Used for Express traffic when preemption is enabled */ +#define IGC_RXPBSIZE_EXP(x) FIELD_PREP(IGC_RXPBSIZE_EXP_MASK, (x)) +/* BMC to OS packet buffer size in KB */ +#define IGC_BMC2OSPBSIZE(x) FIELD_PREP(IGC_BMC2OSPBSIZE_MASK, (x)) +/* Low-priority RX packet buffer size (KB). Used for BE traffic when preemption is enabled */ +#define IGC_RXPBSIZE_BE(x) FIELD_PREP(IGC_RXPBSIZE_BE_MASK, (x)) +/* Enable RX packet buffer for timestamp descriptor, saving 16 bytes per packet if set */ +#define IGC_RXPBS_CFG_TS_EN FIELD_PREP(IGC_RXPBS_CFG_TS_EN_MASK, 1) +/* Default value following I225/I226 SW User Manual Section 8.3.1 */ +#define IGC_RXPBSIZE_EXP_BMC_DEFAULT ( \ + IGC_RXPBSIZE_EXP(34) | IGC_BMC2OSPBSIZE(2)) +#define IGC_RXPBSIZE_EXP_BMC_BE_TSN ( \ + IGC_RXPBSIZE_EXP(15) | IGC_BMC2OSPBSIZE(2) | IGC_RXPBSIZE_BE(15)) + +/* Mask for TX packet buffer size */ +#define IGC_TXPB0SIZE_MASK GENMASK(5, 0) +#define IGC_TXPB1SIZE_MASK GENMASK(11, 6) +#define IGC_TXPB2SIZE_MASK GENMASK(17, 12) +#define IGC_TXPB3SIZE_MASK GENMASK(23, 18) +/* Mask for OS to BMC packet buffer size */ +#define IGC_OS2BMCPBSIZE_MASK GENMASK(29, 24) +/* TX Packet buffer size in KB */ +#define IGC_TXPB0SIZE(x) FIELD_PREP(IGC_TXPB0SIZE_MASK, (x)) +#define IGC_TXPB1SIZE(x) FIELD_PREP(IGC_TXPB1SIZE_MASK, (x)) +#define IGC_TXPB2SIZE(x) FIELD_PREP(IGC_TXPB2SIZE_MASK, (x)) +#define IGC_TXPB3SIZE(x) FIELD_PREP(IGC_TXPB3SIZE_MASK, (x)) +/* OS to BMC packet buffer size in KB */ +#define IGC_OS2BMCPBSIZE(x) FIELD_PREP(IGC_OS2BMCPBSIZE_MASK, (x)) +/* Default value following I225/I226 SW User Manual Section 8.3.2 */ +#define IGC_TXPBSIZE_DEFAULT ( \ + IGC_TXPB0SIZE(20) | IGC_TXPB1SIZE(0) | IGC_TXPB2SIZE(0) | \ + IGC_TXPB3SIZE(0) | IGC_OS2BMCPBSIZE(4)) +#define IGC_TXPBSIZE_TSN ( \ + IGC_TXPB0SIZE(7) | IGC_TXPB1SIZE(7) | IGC_TXPB2SIZE(7) | \ + IGC_TXPB3SIZE(7) | IGC_OS2BMCPBSIZE(4)) #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ @@ -539,8 +580,10 @@ /* Transmit Scheduling */ #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x00000001 +#define IGC_TQAVCTRL_PREEMPT_ENA 0x00000002 #define IGC_TQAVCTRL_ENHANCED_QAV 0x00000008 #define IGC_TQAVCTRL_FUTSCDDIS 0x00000080 +#define IGC_TQAVCTRL_MIN_FRAG_MASK 0x0000C000 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x00000001 #define IGC_TXQCTL_STRICT_CYCLE 0x00000002 diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 817838677817..3fc1eded9605 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -8,6 +8,7 @@ #include "igc.h" #include "igc_diag.h" +#include "igc_tsn.h" /* forward declaration */ struct igc_stats { @@ -1781,6 +1782,83 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return 0; } +static int igc_ethtool_get_mm(struct net_device *netdev, + struct ethtool_mm_state *cmd) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_fpe_t *fpe = &adapter->fpe; + + ethtool_mmsv_get_mm(&fpe->mmsv, cmd); + cmd->tx_min_frag_size = fpe->tx_min_frag_size; + cmd->rx_min_frag_size = IGC_RX_MIN_FRAG_SIZE; + + return 0; +} + +static int igc_ethtool_set_mm(struct net_device *netdev, + struct ethtool_mm_cfg *cmd, + struct netlink_ext_ack *extack) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_fpe_t *fpe = &adapter->fpe; + + fpe->tx_min_frag_size = igc_fpe_get_supported_frag_size(cmd->tx_min_frag_size); + if (fpe->tx_min_frag_size != cmd->tx_min_frag_size) + NL_SET_ERR_MSG_MOD(extack, + "tx-min-frag-size value set is unsupported. Rounded up to supported value (64, 128, 192, 256)"); + + if (fpe->mmsv.pmac_enabled != cmd->pmac_enabled) { + if (cmd->pmac_enabled) + static_branch_inc(&igc_fpe_enabled); + else + static_branch_dec(&igc_fpe_enabled); + } + + ethtool_mmsv_set_mm(&fpe->mmsv, cmd); + + return igc_tsn_offload_apply(adapter); +} + +/** + * igc_ethtool_get_frame_ass_error - Get the frame assembly error count. + * @reg_value: Register value for IGC_PRMEXCPRCNT + * Return: The count of frame assembly errors. + */ +static u64 igc_ethtool_get_frame_ass_error(u32 reg_value) +{ + /* Out of order statistics */ + u32 ooo_frame_cnt, ooo_frag_cnt; + u32 miss_frame_frag_cnt; + + ooo_frame_cnt = FIELD_GET(IGC_PRMEXCPRCNT_OOO_FRAME_CNT, reg_value); + ooo_frag_cnt = FIELD_GET(IGC_PRMEXCPRCNT_OOO_FRAG_CNT, reg_value); + miss_frame_frag_cnt = FIELD_GET(IGC_PRMEXCPRCNT_MISS_FRAME_FRAG_CNT, + reg_value); + + return ooo_frame_cnt + ooo_frag_cnt + miss_frame_frag_cnt; +} + +static u64 igc_ethtool_get_frame_smd_error(u32 reg_value) +{ + return FIELD_GET(IGC_PRMEXCPRCNT_OOO_SMDC, reg_value); +} + +static void igc_ethtool_get_mm_stats(struct net_device *dev, + struct ethtool_mm_stats *stats) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_hw *hw = &adapter->hw; + u32 reg_value; + + reg_value = rd32(IGC_PRMEXCPRCNT); + + stats->MACMergeFrameAssErrorCount = igc_ethtool_get_frame_ass_error(reg_value); + stats->MACMergeFrameSmdErrorCount = igc_ethtool_get_frame_smd_error(reg_value); + stats->MACMergeFrameAssOkCount = rd32(IGC_PRMPTDRCNT); + stats->MACMergeFragCountRx = rd32(IGC_PRMEVNTRCNT); + stats->MACMergeFragCountTx = rd32(IGC_PRMEVNTTCNT); +} + static int igc_ethtool_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -2076,6 +2154,9 @@ static const struct ethtool_ops igc_ethtool_ops = { .get_link_ksettings = igc_ethtool_get_link_ksettings, .set_link_ksettings = igc_ethtool_set_link_ksettings, .self_test = igc_ethtool_diag_test, + .get_mm = igc_ethtool_get_mm, + .get_mm_stats = igc_ethtool_get_mm_stats, + .set_mm = igc_ethtool_set_mm, }; void igc_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b1669d7cf435..27575a1e1777 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2464,8 +2464,7 @@ unmap: return -ENOMEM; } -static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, - int cpu) +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu) { int index = cpu; @@ -2489,7 +2488,7 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) if (unlikely(!xdpf)) return -EFAULT; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -2549,7 +2548,7 @@ out: } /* This function assumes __netif_tx_lock is held by the caller. */ -static void igc_flush_tx_descriptors(struct igc_ring *ring) +void igc_flush_tx_descriptors(struct igc_ring *ring) { /* Once tail pointer is updated, hardware can fetch the descriptors * any time so we issue a write membar here to ensure all memory @@ -2566,7 +2565,7 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) struct igc_ring *ring; if (status & IGC_XDP_TX) { - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -2638,6 +2637,14 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) size -= IGC_TS_HDR_LEN; } + if (igc_fpe_is_pmac_enabled(adapter) && + igc_fpe_handle_mpacket(adapter, rx_desc, size, pktbuf)) { + /* Advance the ring next-to-clean */ + igc_is_non_eop(rx_ring, rx_desc); + cleaned_count++; + continue; + } + if (!skb) { xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), @@ -3145,6 +3152,11 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) break; + if (igc_fpe_is_pmac_enabled(adapter) && + igc_fpe_transmitted_smd_v(tx_desc)) + ethtool_mmsv_event_handle(&adapter->fpe.mmsv, + ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); + /* Hold the completions while there's a pending tx hardware * timestamp request from XDP Tx metadata. */ @@ -4037,6 +4049,30 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) } /** + * igc_enable_empty_addr_recv - Enable Rx of packets with all-zeroes MAC address + * @adapter: Pointer to the igc_adapter structure. + * + * Frame preemption verification requires that packets with the all-zeroes + * MAC address are allowed to be received by the driver. This function adds the + * all-zeroes destination address to the list of acceptable addresses. + * + * Return: 0 on success, negative value otherwise. + */ +int igc_enable_empty_addr_recv(struct igc_adapter *adapter) +{ + u8 empty[ETH_ALEN] = {}; + + return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty, -1); +} + +void igc_disable_empty_addr_recv(struct igc_adapter *adapter) +{ + u8 empty[ETH_ALEN] = {}; + + igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty); +} + +/** * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure * @@ -5311,6 +5347,9 @@ void igc_down(struct igc_adapter *adapter) igc_disable_all_tx_rings_hw(adapter); igc_clean_all_tx_rings(adapter); igc_clean_all_rx_rings(adapter); + + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_stop(&adapter->fpe.mmsv); } void igc_reinit_locked(struct igc_adapter *adapter) @@ -5835,6 +5874,10 @@ static void igc_watchdog_task(struct work_struct *work) */ igc_tsn_adjust_txtime_offset(adapter); + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, + true); + if (adapter->link_speed != SPEED_1000) goto no_wait; @@ -5870,6 +5913,10 @@ no_wait: netdev_info(netdev, "NIC Link is Down\n"); netif_carrier_off(netdev); + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, + false); + /* link state has changed, schedule phy info update */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, @@ -6439,6 +6486,10 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; + /* preemptible isn't supported yet */ + if (qopt->mqprio.preemptible_tcs) + return -EOPNOTSUPP; + igc_ptp_read(adapter, &now); if (igc_tsn_is_taprio_activated_by_user(adapter) && @@ -6679,13 +6730,14 @@ static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, struct tc_mqprio_qopt_offload *mqprio) { struct igc_hw *hw = &adapter->hw; - int i; + int err, i; if (hw->mac.type != igc_i225) return -EOPNOTSUPP; if (!mqprio->qopt.num_tc) { adapter->strict_priority_enable = false; + netdev_reset_tc(adapter->netdev); goto apply; } @@ -6716,6 +6768,21 @@ static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, mqprio->qopt.offset); + err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); + if (err) + return err; + + for (i = 0; i < adapter->num_tc; i++) { + err = netdev_set_tc_queue(adapter->netdev, i, 1, + adapter->queue_per_tc[i]); + if (err) + return err; + } + + /* In case the card is configured with less than four queues. */ + for (; i < IGC_MAX_TX_QUEUES; i++) + adapter->queue_per_tc[i] = i; + mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; apply: @@ -6779,7 +6846,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -7125,6 +7192,9 @@ static int igc_probe(struct pci_dev *pdev, netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY; + /* enable HW vlan tag insertion/stripping by default */ + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; @@ -7157,8 +7227,8 @@ static int igc_probe(struct pci_dev *pdev, } /* configure RXPBSIZE and TXPBSIZE */ - wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); - wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); + wr32(IGC_RXPBS, IGC_RXPBSIZE_EXP_BMC_DEFAULT); + wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); @@ -7190,6 +7260,8 @@ static int igc_probe(struct pci_dev *pdev, igc_tsn_clear_schedule(adapter); + igc_fpe_init(adapter); + /* reset the hardware with the new settings */ igc_reset(adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index efc7b30e4211..f4f5c28615d3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -257,13 +257,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests failing to enable both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -300,10 +293,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, return 0; case PTP_CLK_REQ_PEROUT: - /* Reject requests with unsupported flags */ - if (rq->perout.flags) - return -EOPNOTSUPP; - if (on) { pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT, rq->perout.index); @@ -1162,6 +1151,9 @@ void igc_ptp_init(struct igc_adapter *adapter) adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS; adapter->ptp_caps.n_per_out = IGC_N_PEROUT; + adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; adapter->ptp_caps.n_pins = IGC_N_SDP; adapter->ptp_caps.verify = igc_ptp_verify_pin; diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 12ddc5793651..f343c6bfc6be 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -222,6 +222,22 @@ #define IGC_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ +/* Time sync registers - preemption statistics */ +#define IGC_PRMPTDRCNT 0x04284 /* Good RX Preempted Packets */ +#define IGC_PRMEVNTTCNT 0x04298 /* TX Preemption event counter */ +#define IGC_PRMEVNTRCNT 0x0429C /* RX Preemption event counter */ + + /* Preemption Exception Counter */ + #define IGC_PRMEXCPRCNT 0x42A0 +/* Received out of order packets with SMD-C */ +#define IGC_PRMEXCPRCNT_OOO_SMDC 0x000000FF +/* Received out of order packets with SMD-C and wrong Frame CNT */ +#define IGC_PRMEXCPRCNT_OOO_FRAME_CNT 0x0000FF00 +/* Received out of order packets with SMD-C and wrong Frag CNT */ +#define IGC_PRMEXCPRCNT_OOO_FRAG_CNT 0x00FF0000 +/* Received packets with SMD-S and wrong Frag CNT and Frame CNT */ +#define IGC_PRMEXCPRCNT_MISS_FRAME_FRAG_CNT 0xFF000000 + /* Transmit Scheduling Registers */ #define IGC_TQAVCTRL 0x3570 #define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n)) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 1e44374ca1ff..f22cc4d4f459 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -2,9 +2,143 @@ /* Copyright (c) 2019 Intel Corporation */ #include "igc.h" +#include "igc_base.h" #include "igc_hw.h" #include "igc_tsn.h" +#define MIN_MULTPLIER_TX_MIN_FRAG 0 +#define MAX_MULTPLIER_TX_MIN_FRAG 3 +/* Frag size is based on the Section 8.12.2 of the SW User Manual */ +#define TX_MIN_FRAG_SIZE 64 +#define TX_MAX_FRAG_SIZE (TX_MIN_FRAG_SIZE * \ + (MAX_MULTPLIER_TX_MIN_FRAG + 1)) + +DEFINE_STATIC_KEY_FALSE(igc_fpe_enabled); + +static int igc_fpe_init_smd_frame(struct igc_ring *ring, + struct igc_tx_buffer *buffer, + struct sk_buff *skb) +{ + dma_addr_t dma = dma_map_single(ring->dev, skb->data, skb->len, + DMA_TO_DEVICE); + + if (dma_mapping_error(ring->dev, dma)) { + netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); + return -ENOMEM; + } + + buffer->skb = skb; + buffer->protocol = 0; + buffer->bytecount = skb->len; + buffer->gso_segs = 1; + buffer->time_stamp = jiffies; + dma_unmap_len_set(buffer, len, skb->len); + dma_unmap_addr_set(buffer, dma, dma); + + return 0; +} + +static int igc_fpe_init_tx_descriptor(struct igc_ring *ring, + struct sk_buff *skb, + enum igc_txd_popts_type type) +{ + u32 cmd_type, olinfo_status = 0; + struct igc_tx_buffer *buffer; + union igc_adv_tx_desc *desc; + int err; + + if (!igc_desc_unused(ring)) + return -EBUSY; + + buffer = &ring->tx_buffer_info[ring->next_to_use]; + err = igc_fpe_init_smd_frame(ring, buffer, skb); + if (err) + return err; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + buffer->bytecount; + + olinfo_status |= FIELD_PREP(IGC_ADVTXD_PAYLEN_MASK, buffer->bytecount); + + switch (type) { + case SMD_V: + case SMD_R: + olinfo_status |= FIELD_PREP(IGC_TXD_POPTS_SMD_MASK, type); + break; + } + + desc = IGC_TX_DESC(ring, ring->next_to_use); + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.olinfo_status = cpu_to_le32(olinfo_status); + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); + + netdev_tx_sent_queue(txring_txq(ring), skb->len); + + buffer->next_to_watch = desc; + ring->next_to_use = (ring->next_to_use + 1) % ring->count; + + return 0; +} + +static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter, + enum igc_txd_popts_type type) +{ + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + struct sk_buff *skb; + int err; + + ring = igc_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + skb = alloc_skb(SMD_FRAME_SIZE, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb_put_zero(skb, SMD_FRAME_SIZE); + + __netif_tx_lock(nq, cpu); + + err = igc_fpe_init_tx_descriptor(ring, skb, type); + igc_flush_tx_descriptors(ring); + + __netif_tx_unlock(nq); + + return err; +} + +static void igc_fpe_send_mpacket(struct ethtool_mmsv *mmsv, + enum ethtool_mpacket type) +{ + struct igc_fpe_t *fpe = container_of(mmsv, struct igc_fpe_t, mmsv); + struct igc_adapter *adapter; + int err; + + adapter = container_of(fpe, struct igc_adapter, fpe); + + if (type == ETHTOOL_MPACKET_VERIFY) { + err = igc_fpe_xmit_smd_frame(adapter, SMD_V); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error sending SMD-V\n"); + } else if (type == ETHTOOL_MPACKET_RESPONSE) { + err = igc_fpe_xmit_smd_frame(adapter, SMD_R); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error sending SMD-R frame\n"); + } +} + +static const struct ethtool_mmsv_ops igc_mmsv_ops = { + .send_mpacket = igc_fpe_send_mpacket, +}; + +void igc_fpe_init(struct igc_adapter *adapter) +{ + adapter->fpe.tx_min_frag_size = TX_MIN_FRAG_SIZE; + ethtool_mmsv_init(&adapter->fpe.mmsv, adapter->netdev, &igc_mmsv_ops); +} + static bool is_any_launchtime(struct igc_adapter *adapter) { int i; @@ -37,17 +171,16 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) { unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; - if (adapter->taprio_offload_enable) - new_flags |= IGC_FLAG_TSN_QBV_ENABLED; - if (is_any_launchtime(adapter)) + if (adapter->taprio_offload_enable || is_any_launchtime(adapter) || + adapter->strict_priority_enable) new_flags |= IGC_FLAG_TSN_QBV_ENABLED; if (is_cbs_enabled(adapter)) new_flags |= IGC_FLAG_TSN_QAV_ENABLED; - if (adapter->strict_priority_enable) - new_flags |= IGC_FLAG_TSN_LEGACY_ENABLED; + if (adapter->fpe.mmsv.pmac_enabled) + new_flags |= IGC_FLAG_TSN_PREEMPT_ENABLED; return new_flags; } @@ -125,6 +258,29 @@ static void igc_tsn_tx_arb(struct igc_adapter *adapter, u16 *queue_per_tc) wr32(IGC_TXARB, txarb); } +/** + * igc_tsn_set_rxpbsize - Set the receive packet buffer size + * @adapter: Pointer to the igc_adapter structure + * @rxpbs_exp_bmc_be: Value to set the receive packet buffer size, including + * express buffer, BMC buffer, and Best Effort buffer + * + * The IGC_RXPBS register value may include allocations for the Express buffer, + * BMC buffer, Best Effort buffer, and the timestamp descriptor buffer + * (IGC_RXPBS_CFG_TS_EN). + */ +static void igc_tsn_set_rxpbsize(struct igc_adapter *adapter, + u32 rxpbs_exp_bmc_be) +{ + struct igc_hw *hw = &adapter->hw; + u32 rxpbs = rd32(IGC_RXPBS); + + rxpbs &= ~(IGC_RXPBSIZE_EXP_MASK | IGC_BMC2OSPBSIZE_MASK | + IGC_RXPBSIZE_BE_MASK); + rxpbs |= rxpbs_exp_bmc_be; + + wr32(IGC_RXPBS, rxpbs); +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -136,15 +292,18 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) int i; wr32(IGC_GTXOFFSET, 0); - wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); + wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + igc_tsn_set_rxpbsize(adapter, IGC_RXPBSIZE_EXP_BMC_DEFAULT); + if (igc_is_device_id_i226(hw)) igc_tsn_restore_retx_default(adapter); tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | - IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); + IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS | + IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK); wr32(IGC_TQAVCTRL, tqavctrl); @@ -157,16 +316,12 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_QBVCYCLET_S, 0); wr32(IGC_QBVCYCLET, NSEC_PER_SEC); - /* Reset mqprio TC configuration. */ - netdev_reset_tc(adapter->netdev); - /* Restore the default Tx arbitration: Priority 0 has the highest * priority and is assigned to queue 0 and so on and so forth. */ igc_tsn_tx_arb(adapter, queue_per_tc); adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED; - adapter->flags &= ~IGC_FLAG_TSN_LEGACY_ENABLED; return 0; } @@ -190,53 +345,51 @@ static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) wr32(IGC_RETX_CTL, retxctl); } +static u8 igc_fpe_get_frag_size_mult(const struct igc_fpe_t *fpe) +{ + u8 mult = (fpe->tx_min_frag_size / TX_MIN_FRAG_SIZE) - 1; + + return clamp_t(u8, mult, MIN_MULTPLIER_TX_MIN_FRAG, + MAX_MULTPLIER_TX_MIN_FRAG); +} + +u32 igc_fpe_get_supported_frag_size(u32 frag_size) +{ + const u32 supported_sizes[] = {64, 128, 192, 256}; + + /* Find the smallest supported size that is >= frag_size */ + for (int i = 0; i < ARRAY_SIZE(supported_sizes); i++) { + if (frag_size <= supported_sizes[i]) + return supported_sizes[i]; + } + + /* Should not happen */ + return TX_MAX_FRAG_SIZE; +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; + u32 frag_size_mult; int i; wr32(IGC_TSAUXC, 0); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + igc_tsn_set_rxpbsize(adapter, IGC_RXPBSIZE_EXP_BMC_BE_TSN); + if (igc_is_device_id_i226(hw)) igc_tsn_set_retx_qbvfullthreshold(adapter); if (adapter->strict_priority_enable) { - int err; - - err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); - if (err) - return err; - - for (i = 0; i < adapter->num_tc; i++) { - err = netdev_set_tc_queue(adapter->netdev, i, 1, - adapter->queue_per_tc[i]); - if (err) - return err; - } - - /* In case the card is configured with less than four queues. */ - for (; i < IGC_MAX_TX_QUEUES; i++) - adapter->queue_per_tc[i] = i; - /* Configure queue priorities according to the user provided * mapping. */ igc_tsn_tx_arb(adapter, adapter->queue_per_tc); - - /* Enable legacy TSN mode which will do strict priority without - * any other TSN features. - */ - tqavctrl = rd32(IGC_TQAVCTRL); - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN; - tqavctrl &= ~IGC_TQAVCTRL_ENHANCED_QAV; - wr32(IGC_TQAVCTRL, tqavctrl); - - return 0; } for (i = 0; i < adapter->num_tx_queues; i++) { @@ -361,10 +514,16 @@ skip_cbs: wr32(IGC_TXQCTL(i), txqctl); } - tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; - + tqavctrl = rd32(IGC_TQAVCTRL) & ~(IGC_TQAVCTRL_FUTSCDDIS | + IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK); tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + if (adapter->fpe.mmsv.pmac_enabled) + tqavctrl |= IGC_TQAVCTRL_PREEMPT_ENA; + + frag_size_mult = igc_fpe_get_frag_size_mult(&adapter->fpe); + tqavctrl |= FIELD_PREP(IGC_TQAVCTRL_MIN_FRAG_MASK, frag_size_mult); + adapter->qbv_count++; cycle = adapter->cycle_time; @@ -425,6 +584,14 @@ int igc_tsn_reset(struct igc_adapter *adapter) unsigned int new_flags; int err = 0; + if (adapter->fpe.mmsv.pmac_enabled) { + err = igc_enable_empty_addr_recv(adapter); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error adding empty address to MAC filter\n"); + } else { + igc_disable_empty_addr_recv(adapter); + } + new_flags = igc_tsn_new_flags(adapter); if (!(new_flags & IGC_FLAG_TSN_ANY_ENABLED)) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index 98ec845a86bf..c2a77229207b 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -4,9 +4,61 @@ #ifndef _IGC_TSN_H_ #define _IGC_TSN_H_ +#define IGC_RX_MIN_FRAG_SIZE 60 +#define SMD_FRAME_SIZE 60 + +enum igc_txd_popts_type { + SMD_V = 0x01, + SMD_R = 0x02, +}; + +DECLARE_STATIC_KEY_FALSE(igc_fpe_enabled); + +void igc_fpe_init(struct igc_adapter *adapter); +u32 igc_fpe_get_supported_frag_size(u32 frag_size); int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); +static inline bool igc_fpe_is_pmac_enabled(struct igc_adapter *adapter) +{ + return static_branch_unlikely(&igc_fpe_enabled) && + adapter->fpe.mmsv.pmac_enabled; +} + +static inline bool igc_fpe_handle_mpacket(struct igc_adapter *adapter, + union igc_adv_rx_desc *rx_desc, + unsigned int size, void *pktbuf) +{ + u32 status_error = le32_to_cpu(rx_desc->wb.upper.status_error); + int smd; + + smd = FIELD_GET(IGC_RXDADV_STAT_SMD_TYPE_MASK, status_error); + if (smd != IGC_RXD_STAT_SMD_TYPE_V && smd != IGC_RXD_STAT_SMD_TYPE_R) + return false; + + if (size == SMD_FRAME_SIZE && mem_is_zero(pktbuf, SMD_FRAME_SIZE)) { + struct ethtool_mmsv *mmsv = &adapter->fpe.mmsv; + enum ethtool_mmsv_event event; + + if (smd == IGC_RXD_STAT_SMD_TYPE_V) + event = ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET; + else + event = ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET; + + ethtool_mmsv_event_handle(mmsv, event); + } + + return true; +} + +static inline bool igc_fpe_transmitted_smd_v(union igc_adv_tx_desc *tx_desc) +{ + u32 olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status); + u8 smd = FIELD_GET(IGC_TXD_POPTS_SMD_MASK, olinfo_status); + + return smd == SMD_V; +} + #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index b456d102655a..2e7738f41c58 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -4,12 +4,14 @@ # Makefile for the Intel(R) 10GbE PCI Express ethernet driver # +subdir-ccflags-y += -I$(src) obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-y := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \ - ixgbe_xsk.o ixgbe_e610.o + ixgbe_xsk.o ixgbe_e610.o devlink/devlink.o ixgbe_fw_update.o \ + devlink/region.o ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe_dcb_82599.o ixgbe_dcb_nl.o diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c new file mode 100644 index 000000000000..54f1b83dfe42 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Intel Corporation. */ + +#include "ixgbe.h" +#include "devlink.h" +#include "ixgbe_fw_update.h" + +struct ixgbe_info_ctx { + char buf[128]; + struct ixgbe_orom_info pending_orom; + struct ixgbe_nvm_info pending_nvm; + struct ixgbe_netlist_info pending_netlist; + struct ixgbe_hw_dev_caps dev_caps; +}; + +enum ixgbe_devlink_version_type { + IXGBE_DL_VERSION_RUNNING, + IXGBE_DL_VERSION_STORED +}; + +static void ixgbe_info_get_dsn(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx) +{ + u8 dsn[8]; + + /* Copy the DSN into an array in Big Endian format */ + put_unaligned_be64(pci_get_dsn(adapter->pdev), dsn); + + snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); +} + +static void ixgbe_info_orom_ver(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_nvm_version nvm_ver; + + ctx->buf[0] = '\0'; + + if (hw->mac.type == ixgbe_mac_e610) { + struct ixgbe_orom_info *orom = &adapter->hw.flash.orom; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_orom) + orom = &ctx->pending_orom; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); + return; + } + + ixgbe_get_oem_prod_version(hw, &nvm_ver); + if (nvm_ver.oem_valid) { + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x", + nvm_ver.oem_major, nvm_ver.oem_minor, + nvm_ver.oem_release); + + return; + } + + ixgbe_get_orom_version(hw, &nvm_ver); + if (nvm_ver.or_valid) + snprintf(ctx->buf, sizeof(ctx->buf), "%d.%d.%d", + nvm_ver.or_major, nvm_ver.or_build, nvm_ver.or_patch); +} + +static void ixgbe_info_eetrack(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_nvm_version nvm_ver; + + if (hw->mac.type == ixgbe_mac_e610) { + u32 eetrack = hw->flash.nvm.eetrack; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_nvm) + eetrack = ctx->pending_nvm.eetrack; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", eetrack); + return; + } + + ixgbe_get_oem_prod_version(hw, &nvm_ver); + + /* No ETRACK version for OEM */ + if (nvm_ver.oem_valid) { + ctx->buf[0] = '\0'; + return; + } + + ixgbe_get_etk_id(hw, &nvm_ver); + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm_ver.etk_id); +} + +static void ixgbe_info_fw_api(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + hw->api_maj_ver, hw->api_min_ver, hw->api_patch); +} + +static void ixgbe_info_fw_build(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); +} + +static void ixgbe_info_fw_srev(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_nvm) + nvm = &ctx->pending_nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u", nvm->srev); +} + +static void ixgbe_info_orom_srev(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_orom_info *orom = &adapter->hw.flash.orom; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_orom) + orom = &ctx->pending_orom; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u", orom->srev); +} + +static void ixgbe_info_nvm_ver(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_nvm) + nvm = &ctx->pending_nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); +} + +static void ixgbe_info_netlist_ver(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_netlist_info *netlist = &adapter->hw.flash.netlist; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_netlist) + netlist = &ctx->pending_netlist; + + /* The netlist version fields are BCD formatted */ + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); +} + +static void ixgbe_info_netlist_build(struct ixgbe_adapter *adapter, + struct ixgbe_info_ctx *ctx, + enum ixgbe_devlink_version_type type) +{ + struct ixgbe_netlist_info *netlist = &adapter->hw.flash.netlist; + + if (type == IXGBE_DL_VERSION_STORED && + ctx->dev_caps.common_cap.nvm_update_pending_netlist) + netlist = &ctx->pending_netlist; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); +} + +static int ixgbe_set_ctx_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_info_ctx *ctx, + struct netlink_ext_ack *extack) +{ + bool *pending_orom, *pending_nvm, *pending_netlist; + int err; + + err = ixgbe_discover_dev_caps(hw, &ctx->dev_caps); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to discover device capabilities"); + return err; + } + + pending_orom = &ctx->dev_caps.common_cap.nvm_update_pending_orom; + pending_nvm = &ctx->dev_caps.common_cap.nvm_update_pending_nvm; + pending_netlist = &ctx->dev_caps.common_cap.nvm_update_pending_netlist; + + if (*pending_orom) { + err = ixgbe_get_inactive_orom_ver(hw, &ctx->pending_orom); + if (err) + *pending_orom = false; + } + + if (*pending_nvm) { + err = ixgbe_get_inactive_nvm_ver(hw, &ctx->pending_nvm); + if (err) + *pending_nvm = false; + } + + if (*pending_netlist) { + err = ixgbe_get_inactive_netlist_ver(hw, &ctx->pending_netlist); + if (err) + *pending_netlist = false; + } + + return 0; +} + +static int ixgbe_devlink_info_get_e610(struct ixgbe_adapter *adapter, + struct devlink_info_req *req, + struct ixgbe_info_ctx *ctx) +{ + int err; + + ixgbe_info_fw_api(adapter, ctx); + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API, + ctx->buf); + if (err) + return err; + + ixgbe_info_fw_build(adapter, ctx); + err = devlink_info_version_running_put(req, "fw.mgmt.build", ctx->buf); + if (err) + return err; + + ixgbe_info_fw_srev(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, "fw.mgmt.srev", ctx->buf); + if (err) + return err; + + ixgbe_info_orom_srev(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, "fw.undi.srev", ctx->buf); + if (err) + return err; + + ixgbe_info_nvm_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, "fw.psid.api", ctx->buf); + if (err) + return err; + + ixgbe_info_netlist_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, "fw.netlist", ctx->buf); + if (err) + return err; + + ixgbe_info_netlist_build(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + return devlink_info_version_running_put(req, "fw.netlist.build", + ctx->buf); +} + +static int +ixgbe_devlink_pending_info_get_e610(struct ixgbe_adapter *adapter, + struct devlink_info_req *req, + struct ixgbe_info_ctx *ctx) +{ + int err; + + ixgbe_info_orom_ver(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, + ctx->buf); + if (err) + return err; + + ixgbe_info_eetrack(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, + ctx->buf); + if (err) + return err; + + ixgbe_info_fw_srev(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, "fw.mgmt.srev", ctx->buf); + if (err) + return err; + + ixgbe_info_orom_srev(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, "fw.undi.srev", ctx->buf); + if (err) + return err; + + ixgbe_info_nvm_ver(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, "fw.psid.api", ctx->buf); + if (err) + return err; + + ixgbe_info_netlist_ver(adapter, ctx, IXGBE_DL_VERSION_STORED); + err = devlink_info_version_stored_put(req, "fw.netlist", ctx->buf); + if (err) + return err; + + ixgbe_info_netlist_build(adapter, ctx, IXGBE_DL_VERSION_STORED); + return devlink_info_version_stored_put(req, "fw.netlist.build", + ctx->buf); +} + +static int ixgbe_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_info_ctx *ctx; + int err; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (hw->mac.type == ixgbe_mac_e610) + ixgbe_refresh_fw_version(adapter); + + ixgbe_info_get_dsn(adapter, ctx); + err = devlink_info_serial_number_put(req, ctx->buf); + if (err) + goto free_ctx; + + err = hw->eeprom.ops.read_pba_string(hw, ctx->buf, sizeof(ctx->buf)); + if (err) + goto free_ctx; + + err = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, + ctx->buf); + if (err) + goto free_ctx; + + ixgbe_info_orom_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, + ctx->buf); + if (err) + goto free_ctx; + + ixgbe_info_eetrack(adapter, ctx, IXGBE_DL_VERSION_RUNNING); + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, + ctx->buf); + if (err || hw->mac.type != ixgbe_mac_e610) + goto free_ctx; + + err = ixgbe_set_ctx_dev_caps(hw, ctx, extack); + if (err) + goto free_ctx; + + err = ixgbe_devlink_info_get_e610(adapter, req, ctx); + if (err) + goto free_ctx; + + err = ixgbe_devlink_pending_info_get_e610(adapter, req, ctx); +free_ctx: + kfree(ctx); + return err; +} + +/** + * ixgbe_devlink_reload_empr_start - Start EMP reset to activate new firmware + * @devlink: pointer to the devlink instance to reload + * @netns_change: if true, the network namespace is changing + * @action: the action to perform. Must be DEVLINK_RELOAD_ACTION_FW_ACTIVATE + * @limit: limits on what reload should do, such as not resetting + * @extack: netlink extended ACK structure + * + * Allow user to activate new Embedded Management Processor firmware by + * issuing device specific EMP reset. Called in response to + * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE. + * + * Note that teardown and rebuild of the driver state happens automatically as + * part of an interrupt and watchdog task. This is because all physical + * functions on the device must be able to reset when an EMP reset occurs from + * any source. + * + * Return: the exit code of the operation. + */ +static int ixgbe_devlink_reload_empr_start(struct devlink *devlink, + bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_hw *hw = &adapter->hw; + u8 pending; + int err; + + if (hw->mac.type != ixgbe_mac_e610) + return -EOPNOTSUPP; + + err = ixgbe_get_pending_updates(adapter, &pending, extack); + if (err) + return err; + + /* Pending is a bitmask of which flash banks have a pending update, + * including the main NVM bank, the Option ROM bank, and the netlist + * bank. If any of these bits are set, then there is a pending update + * waiting to be activated. + */ + if (!pending) { + NL_SET_ERR_MSG_MOD(extack, "No pending firmware update"); + return -ECANCELED; + } + + if (adapter->fw_emp_reset_disabled) { + NL_SET_ERR_MSG_MOD(extack, + "EMP reset is not available. To activate firmware, a reboot or power cycle is needed"); + return -ECANCELED; + } + + err = ixgbe_aci_nvm_update_empr(hw); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "Failed to trigger EMP device reset to reload firmware"); + + return err; +} + +/*Wait for 10 sec with 0.5 sec tic. EMPR takes no less than half of a sec */ +#define IXGBE_DEVLINK_RELOAD_TIMEOUT_SEC 20 + +/** + * ixgbe_devlink_reload_empr_finish - finishes EMP reset + * @devlink: pointer to the devlink instance + * @action: the action to perform. + * @limit: limits on what reload should do + * @actions_performed: actions performed + * @extack: netlink extended ACK structure + * + * Wait for new NVM to be loaded during EMP reset. + * + * Return: -ETIME when timer is exceeded, 0 on success. + */ +static int ixgbe_devlink_reload_empr_finish(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_hw *hw = &adapter->hw; + int i = 0; + u32 fwsm; + + do { + /* Just right away after triggering EMP reset the FWSM register + * may be not cleared yet, so begin the loop with the delay + * in order to not check the not updated register. + */ + mdelay(500); + + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw)); + + if (i++ >= IXGBE_DEVLINK_RELOAD_TIMEOUT_SEC) + return -ETIME; + + } while (!(fwsm & IXGBE_FWSM_FW_VAL_BIT)); + + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); + + adapter->flags2 &= ~(IXGBE_FLAG2_API_MISMATCH | + IXGBE_FLAG2_FW_ROLLBACK); + + return 0; +} + +static const struct devlink_ops ixgbe_devlink_ops = { + .info_get = ixgbe_devlink_info_get, + .supported_flash_update_params = + DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .flash_update = ixgbe_flash_pldm_image, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_down = ixgbe_devlink_reload_empr_start, + .reload_up = ixgbe_devlink_reload_empr_finish, +}; + +/** + * ixgbe_allocate_devlink - Allocate devlink instance + * @dev: device to allocate devlink for + * + * Allocate a devlink instance for this physical function. + * + * Return: pointer to the device adapter structure on success, + * ERR_PTR(-ENOMEM) when allocation failed. + */ +struct ixgbe_adapter *ixgbe_allocate_devlink(struct device *dev) +{ + struct ixgbe_adapter *adapter; + struct devlink *devlink; + + devlink = devlink_alloc(&ixgbe_devlink_ops, sizeof(*adapter), dev); + if (!devlink) + return ERR_PTR(-ENOMEM); + + adapter = devlink_priv(devlink); + adapter->devlink = devlink; + + return adapter; +} + +/** + * ixgbe_devlink_set_switch_id - Set unique switch ID based on PCI DSN + * @adapter: pointer to the device adapter structure + * @ppid: struct with switch id information + */ +static void ixgbe_devlink_set_switch_id(struct ixgbe_adapter *adapter, + struct netdev_phys_item_id *ppid) +{ + u64 id = pci_get_dsn(adapter->pdev); + + ppid->id_len = sizeof(id); + put_unaligned_be64(id, &ppid->id); +} + +/** + * ixgbe_devlink_register_port - Register devlink port + * @adapter: pointer to the device adapter structure + * + * Create and register a devlink_port for this physical function. + * + * Return: 0 on success, error code on failure. + */ +int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter) +{ + struct devlink_port *devlink_port = &adapter->devlink_port; + struct devlink *devlink = adapter->devlink; + struct device *dev = &adapter->pdev->dev; + struct devlink_port_attrs attrs = {}; + int err; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = adapter->hw.bus.func; + ixgbe_devlink_set_switch_id(adapter, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + + err = devl_port_register(devlink, devlink_port, 0); + if (err) { + dev_err(dev, + "devlink port registration failed, err %d\n", err); + } + + return err; +} diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h new file mode 100644 index 000000000000..381558058048 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025, Intel Corporation. */ + +#ifndef _IXGBE_DEVLINK_H_ +#define _IXGBE_DEVLINK_H_ + +struct ixgbe_adapter *ixgbe_allocate_devlink(struct device *dev); +int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter); +void ixgbe_devlink_init_regions(struct ixgbe_adapter *adapter); +void ixgbe_devlink_destroy_regions(struct ixgbe_adapter *adapter); + +#endif /* _IXGBE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/region.c b/drivers/net/ethernet/intel/ixgbe/devlink/region.c new file mode 100644 index 000000000000..76f6571c3c34 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/devlink/region.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Intel Corporation. */ + +#include "ixgbe.h" +#include "devlink.h" + +#define IXGBE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + +static const struct devlink_region_ops ixgbe_nvm_region_ops; +static const struct devlink_region_ops ixgbe_sram_region_ops; + +static int ixgbe_devlink_parse_region(struct ixgbe_hw *hw, + const struct devlink_region_ops *ops, + bool *read_shadow_ram, u32 *nvm_size) +{ + if (ops == &ixgbe_nvm_region_ops) { + *read_shadow_ram = false; + *nvm_size = hw->flash.flash_size; + } else if (ops == &ixgbe_sram_region_ops) { + *read_shadow_ram = true; + *nvm_size = hw->flash.sr_words * 2u; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * ixgbe_devlink_nvm_snapshot - Capture a snapshot of the NVM content + * @devlink: the devlink instance + * @ops: the devlink region being snapshotted + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to the DEVLINK_CMD_REGION_NEW cmd. + * + * Capture a snapshot of the whole requested NVM region. + * + * No need to worry with freeing @data, devlink core takes care if it. + * + * Return: 0 on success, -EOPNOTSUPP for unsupported regions, -EBUSY when + * cannot lock NVM, -ENOMEM when cannot alloc mem and -EIO when error + * occurs during reading. + */ +static int ixgbe_devlink_nvm_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_hw *hw = &adapter->hw; + bool read_shadow_ram; + u8 *nvm_data, *buf; + u32 nvm_size, left; + u8 num_blks; + int err; + + err = ixgbe_devlink_parse_region(hw, ops, &read_shadow_ram, &nvm_size); + if (err) + return err; + + nvm_data = kvzalloc(nvm_size, GFP_KERNEL); + if (!nvm_data) + return -ENOMEM; + + num_blks = DIV_ROUND_UP(nvm_size, IXGBE_DEVLINK_READ_BLK_SIZE); + buf = nvm_data; + left = nvm_size; + + for (int i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, IXGBE_DEVLINK_READ_BLK_SIZE, left); + + /* Need to acquire NVM lock during each loop run because the + * total period of reading whole NVM is longer than the maximum + * period the lock can be taken defined by the IXGBE_NVM_TIMEOUT. + */ + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to acquire NVM semaphore"); + kvfree(nvm_data); + return -EBUSY; + } + + err = ixgbe_read_flat_nvm(hw, i * IXGBE_DEVLINK_READ_BLK_SIZE, + &read_sz, buf, read_shadow_ram); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read RAM content"); + ixgbe_release_nvm(hw); + kvfree(nvm_data); + return -EIO; + } + + ixgbe_release_nvm(hw); + + buf += read_sz; + left -= read_sz; + } + + *data = nvm_data; + return 0; +} + +/** + * ixgbe_devlink_devcaps_snapshot - Capture a snapshot of device capabilities + * @devlink: the devlink instance + * @ops: the devlink region being snapshotted + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to the DEVLINK_CMD_REGION_NEW for + * the device-caps devlink region. + * + * Capture a snapshot of the device capabilities reported by firmware. + * + * No need to worry with freeing @data, devlink core takes care if it. + * + * Return: 0 on success, -ENOMEM when cannot alloc mem, or return code of + * the reading operation. + */ +static int ixgbe_devlink_devcaps_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u8 **data) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_aci_cmd_list_caps_elem *caps; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + caps = kvzalloc(IXGBE_ACI_MAX_BUFFER_SIZE, GFP_KERNEL); + if (!caps) + return -ENOMEM; + + err = ixgbe_aci_list_caps(hw, caps, IXGBE_ACI_MAX_BUFFER_SIZE, NULL, + ixgbe_aci_opc_list_dev_caps); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read device capabilities"); + kvfree(caps); + return err; + } + + *data = (u8 *)caps; + return 0; +} + +/** + * ixgbe_devlink_nvm_read - Read a portion of NVM flash content + * @devlink: the devlink instance + * @ops: the devlink region to snapshot + * @extack: extended ACK response structure + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into + * + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * Read from either the nvm-flash region either shadow-ram region. + * + * Return: 0 on success, -EOPNOTSUPP for unsupported regions, -EBUSY when + * cannot lock NVM, -ERANGE when buffer limit exceeded and -EIO when error + * occurs during reading. + */ +static int ixgbe_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct ixgbe_hw *hw = &adapter->hw; + bool read_shadow_ram; + u32 nvm_size; + int err; + + err = ixgbe_devlink_parse_region(hw, ops, &read_shadow_ram, &nvm_size); + if (err) + return err; + + if (offset + size > nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EBUSY; + } + + err = ixgbe_read_flat_nvm(hw, (u32)offset, &size, data, read_shadow_ram); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ixgbe_release_nvm(hw); + return -EIO; + } + + ixgbe_release_nvm(hw); + return 0; +} + +static const struct devlink_region_ops ixgbe_nvm_region_ops = { + .name = "nvm-flash", + .destructor = kvfree, + .snapshot = ixgbe_devlink_nvm_snapshot, + .read = ixgbe_devlink_nvm_read, +}; + +static const struct devlink_region_ops ixgbe_sram_region_ops = { + .name = "shadow-ram", + .destructor = kvfree, + .snapshot = ixgbe_devlink_nvm_snapshot, + .read = ixgbe_devlink_nvm_read, +}; + +static const struct devlink_region_ops ixgbe_devcaps_region_ops = { + .name = "device-caps", + .destructor = kvfree, + .snapshot = ixgbe_devlink_devcaps_snapshot, +}; + +/** + * ixgbe_devlink_init_regions - Initialize devlink regions + * @adapter: adapter instance + * + * Create devlink regions used to enable access to dump the contents of the + * flash memory of the device. + */ +void ixgbe_devlink_init_regions(struct ixgbe_adapter *adapter) +{ + struct devlink *devlink = adapter->devlink; + struct device *dev = &adapter->pdev->dev; + u64 nvm_size, sram_size; + + if (adapter->hw.mac.type != ixgbe_mac_e610) + return; + + nvm_size = adapter->hw.flash.flash_size; + adapter->nvm_region = devl_region_create(devlink, &ixgbe_nvm_region_ops, + 1, nvm_size); + if (IS_ERR(adapter->nvm_region)) { + dev_err(dev, + "Failed to create NVM devlink region, err %ld\n", + PTR_ERR(adapter->nvm_region)); + adapter->nvm_region = NULL; + } + + sram_size = adapter->hw.flash.sr_words * 2u; + adapter->sram_region = devl_region_create(devlink, &ixgbe_sram_region_ops, + 1, sram_size); + if (IS_ERR(adapter->sram_region)) { + dev_err(dev, + "Failed to create shadow-ram devlink region, err %ld\n", + PTR_ERR(adapter->sram_region)); + adapter->sram_region = NULL; + } + + adapter->devcaps_region = devl_region_create(devlink, + &ixgbe_devcaps_region_ops, + 10, IXGBE_ACI_MAX_BUFFER_SIZE); + if (IS_ERR(adapter->devcaps_region)) { + dev_err(dev, + "Failed to create device-caps devlink region, err %ld\n", + PTR_ERR(adapter->devcaps_region)); + adapter->devcaps_region = NULL; + } +} + +/** + * ixgbe_devlink_destroy_regions - Destroy devlink regions + * @adapter: adapter instance + * + * Remove previously created regions for this adapter instance. + */ +void ixgbe_devlink_destroy_regions(struct ixgbe_adapter *adapter) +{ + if (adapter->hw.mac.type != ixgbe_mac_e610) + return; + + if (adapter->nvm_region) + devl_region_destroy(adapter->nvm_region); + + if (adapter->sram_region) + devl_region_destroy(adapter->sram_region); + + if (adapter->devcaps_region) + devl_region_destroy(adapter->devcaps_region); +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index e6a380d4929b..47311b134a7a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -17,6 +17,8 @@ #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#include <net/devlink.h> + #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_dcb.h" @@ -612,6 +614,11 @@ struct ixgbe_adapter { struct bpf_prog *xdp_prog; struct pci_dev *pdev; struct mii_bus *mii_bus; + struct devlink *devlink; + struct devlink_port devlink_port; + struct devlink_region *nvm_region; + struct devlink_region *sram_region; + struct devlink_region *devcaps_region; unsigned long state; @@ -667,6 +674,8 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_PHY_FW_LOAD_FAILED BIT(20) #define IXGBE_FLAG2_NO_MEDIA BIT(21) #define IXGBE_FLAG2_MOD_POWER_UNSUPPORTED BIT(22) +#define IXGBE_FLAG2_API_MISMATCH BIT(23) +#define IXGBE_FLAG2_FW_ROLLBACK BIT(24) /* Tx fast path data */ int num_tx_queues; @@ -755,6 +764,8 @@ struct ixgbe_adapter { u32 atr_sample_rate; spinlock_t fdir_perfect_lock; + bool fw_emp_reset_disabled; + #ifdef IXGBE_FCOE struct ixgbe_fcoe fcoe; #endif /* IXGBE_FCOE */ @@ -830,6 +841,17 @@ struct ixgbe_adapter { spinlock_t vfs_lock; }; +struct ixgbe_netdevice_priv { + struct ixgbe_adapter *adapter; +}; + +static inline struct ixgbe_adapter *ixgbe_from_netdev(struct net_device *netdev) +{ + struct ixgbe_netdevice_priv *priv = netdev_priv(netdev); + + return priv->adapter; +} + static inline int ixgbe_determine_xdp_q_idx(int cpu) { if (static_key_enabled(&ixgbe_xdp_locking_key)) @@ -945,6 +967,8 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter); int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, u16 subdevice_id); +void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter); +void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 4aaaea3b5f8f..444da982593f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1169,6 +1169,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_82598 = { .calc_checksum = &ixgbe_calc_eeprom_checksum_generic, .validate_checksum = &ixgbe_validate_eeprom_checksum_generic, .update_checksum = &ixgbe_update_eeprom_checksum_generic, + .read_pba_string = &ixgbe_read_pba_string_generic, }; static const struct ixgbe_phy_operations phy_ops_82598 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 964988b4d58b..d5b1b974b4a3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -2230,6 +2230,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_82599 = { .calc_checksum = &ixgbe_calc_eeprom_checksum_generic, .validate_checksum = &ixgbe_validate_eeprom_checksum_generic, .update_checksum = &ixgbe_update_eeprom_checksum_generic, + .read_pba_string = &ixgbe_read_pba_string_generic, }; static const struct ixgbe_phy_operations phy_ops_82599 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 7beaf6ea57f9..5784d5d1896e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -332,6 +332,7 @@ int ixgbe_start_hw_generic(struct ixgbe_hw *hw) * Devices in the second generation: * 82599 * X540 + * E610 **/ int ixgbe_start_hw_gen2(struct ixgbe_hw *hw) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index 19d6b6fa8fb3..3dd5a16a14df 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -118,14 +118,14 @@ static int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max) static u8 ixgbe_dcbnl_get_state(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED); } static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); /* Fail command if not in CEE mode */ if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) @@ -142,7 +142,7 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int i, j; memset(perm_addr, 0xff, MAX_ADDR_LEN); @@ -167,7 +167,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, u8 prio, u8 bwg_id, u8 bw_pct, u8 up_map) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (prio != DCB_ATTR_VALUE_UNDEFINED) adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio; @@ -184,7 +184,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, u8 bw_pct) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct; } @@ -193,7 +193,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc, u8 prio, u8 bwg_id, u8 bw_pct, u8 up_map) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (prio != DCB_ATTR_VALUE_UNDEFINED) adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio; @@ -210,7 +210,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc, static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, u8 bw_pct) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct; } @@ -219,7 +219,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, u8 *prio, u8 *bwg_id, u8 *bw_pct, u8 *up_map) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); *prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type; *bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id; @@ -230,7 +230,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, u8 *bw_pct) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); *bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id]; } @@ -239,7 +239,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc, u8 *prio, u8 *bwg_id, u8 *bw_pct, u8 *up_map) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); *prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type; *bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id; @@ -250,7 +250,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc, static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, u8 *bw_pct) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); *bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id]; } @@ -258,7 +258,7 @@ static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, u8 setting) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting; if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc != @@ -269,14 +269,14 @@ static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, u8 *setting) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); *setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc; } static void ixgbe_dcbnl_devreset(struct net_device *dev) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) usleep_range(1000, 2000); @@ -295,7 +295,7 @@ static void ixgbe_dcbnl_devreset(struct net_device *dev) static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg; struct ixgbe_hw *hw = &adapter->hw; int ret = DCB_NO_HW_CHG; @@ -383,7 +383,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); switch (capid) { case DCB_CAP_ATTR_PG: @@ -420,7 +420,7 @@ static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap) static int ixgbe_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { switch (tcid) { @@ -447,14 +447,14 @@ static int ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num) static u8 ixgbe_dcbnl_getpfcstate(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); return adapter->dcb_cfg.pfc_mode_enable; } static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->temp_dcb_cfg.pfc_mode_enable = state; } @@ -471,7 +471,7 @@ static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state) */ static int ixgbe_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct dcb_app app = { .selector = idtype, .protocol = id, @@ -486,7 +486,7 @@ static int ixgbe_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) static int ixgbe_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ieee_ets *my_ets = adapter->ixgbe_ieee_ets; ets->ets_cap = adapter->dcb_cfg.num_tcs.pg_tcs; @@ -506,7 +506,7 @@ static int ixgbe_dcbnl_ieee_getets(struct net_device *dev, static int ixgbe_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN; int i, err; __u8 max_tc = 0; @@ -559,7 +559,7 @@ static int ixgbe_dcbnl_ieee_setets(struct net_device *dev, static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ieee_pfc *my_pfc = adapter->ixgbe_ieee_pfc; int i; @@ -584,7 +584,7 @@ static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev, static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; u8 *prio_tc; int err; @@ -616,7 +616,7 @@ static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev, static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); int err; if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) @@ -661,7 +661,7 @@ static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev, static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); int err; if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) @@ -705,13 +705,13 @@ static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev, static u8 ixgbe_dcbnl_getdcbx(struct net_device *dev) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); return adapter->dcbx_cap; } static u8 ixgbe_dcbnl_setdcbx(struct net_device *dev, u8 mode) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ieee_ets ets = {0}; struct ieee_pfc pfc = {0}; int err = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 00935747c8c5..71ea25de1bac 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -344,6 +344,40 @@ void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode) } /** + * ixgbe_aci_get_fw_ver - Get the firmware version + * @hw: pointer to the HW struct + * + * Get the firmware version using ACI command (0x0001). + * + * Return: the exit code of the operation. + */ +static int ixgbe_aci_get_fw_ver(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_ver *resp; + struct ixgbe_aci_desc desc; + int err; + + resp = &desc.params.get_ver; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_ver); + + err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (!err) { + hw->fw_branch = resp->fw_branch; + hw->fw_maj_ver = resp->fw_major; + hw->fw_min_ver = resp->fw_minor; + hw->fw_patch = resp->fw_patch; + hw->fw_build = le32_to_cpu(resp->fw_build); + hw->api_branch = resp->api_branch; + hw->api_maj_ver = resp->api_major; + hw->api_min_ver = resp->api_minor; + hw->api_patch = resp->api_patch; + } + + return err; +} + +/** * ixgbe_aci_req_res - request a common resource * @hw: pointer to the HW struct * @res: resource ID @@ -554,6 +588,20 @@ static bool ixgbe_parse_e610_caps(struct ixgbe_hw *hw, break; case IXGBE_ACI_CAPS_NVM_VER: break; + case IXGBE_ACI_CAPS_PENDING_NVM_VER: + caps->nvm_update_pending_nvm = true; + break; + case IXGBE_ACI_CAPS_PENDING_OROM_VER: + caps->nvm_update_pending_orom = true; + break; + case IXGBE_ACI_CAPS_PENDING_NET_VER: + caps->nvm_update_pending_netlist = true; + break; + case IXGBE_ACI_CAPS_NVM_MGMT: + caps->nvm_unified_update = + (number & IXGBE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ? + true : false; + break; case IXGBE_ACI_CAPS_MAX_MTU: caps->max_mtu = number; break; @@ -1411,6 +1459,61 @@ int ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask) } /** + * ixgbe_start_hw_e610 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Get firmware version and start the hardware using the generic + * start_hw() and ixgbe_start_hw_gen2() functions. + * + * Return: the exit code of the operation. + */ +static int ixgbe_start_hw_e610(struct ixgbe_hw *hw) +{ + int err; + + err = ixgbe_aci_get_fw_ver(hw); + if (err) + return err; + + err = ixgbe_start_hw_generic(hw); + if (err) + return err; + + ixgbe_start_hw_gen2(hw); + + return 0; +} + +/** + * ixgbe_aci_set_port_id_led - set LED value for the given port + * @hw: pointer to the HW struct + * @orig_mode: set LED original mode + * + * Set LED value for the given port (0x06E9) + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_set_port_id_led(struct ixgbe_hw *hw, bool orig_mode) +{ + struct ixgbe_aci_cmd_set_port_id_led *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.set_port_id_led; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_port_id_led); + + cmd->lport_num = (u8)hw->bus.func; + cmd->lport_num_valid = IXGBE_ACI_PORT_ID_PORT_NUM_VALID; + + if (orig_mode) + cmd->ident_mode = IXGBE_ACI_PORT_IDENT_LED_ORIG; + else + cmd->ident_mode = IXGBE_ACI_PORT_IDENT_LED_BLINK; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** * ixgbe_get_media_type_e610 - Gets media type * @hw: pointer to the HW struct * @@ -1743,6 +1846,38 @@ void ixgbe_disable_rx_e610(struct ixgbe_hw *hw) } /** + * ixgbe_fw_recovery_mode_e610 - Check FW NVM recovery mode + * @hw: pointer to hardware structure + * + * Check FW NVM recovery mode by reading the value of + * the dedicated register. + * + * Return: true if FW is in recovery mode, otherwise false. + */ +static bool ixgbe_fw_recovery_mode_e610(struct ixgbe_hw *hw) +{ + u32 fwsm = IXGBE_READ_REG(hw, IXGBE_GL_MNG_FWSM); + + return !!(fwsm & IXGBE_GL_MNG_FWSM_RECOVERY_M); +} + +/** + * ixgbe_fw_rollback_mode_e610 - Check FW NVM rollback mode + * @hw: pointer to hardware structure + * + * Check FW NVM rollback mode by reading the value of + * the dedicated register. + * + * Return: true if FW is in rollback mode, otherwise false. + */ +static bool ixgbe_fw_rollback_mode_e610(struct ixgbe_hw *hw) +{ + u32 fwsm = IXGBE_READ_REG(hw, IXGBE_GL_MNG_FWSM); + + return !!(fwsm & IXGBE_GL_MNG_FWSM_ROLLBACK_M); +} + +/** * ixgbe_init_phy_ops_e610 - PHY specific init * @hw: pointer to hardware structure * @@ -2226,6 +2361,131 @@ int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, } /** + * ixgbe_aci_erase_nvm - erase NVM sector + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * + * Erase the NVM sector using the ACI command (0x0702). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid) +{ + struct ixgbe_aci_cmd_nvm *cmd; + struct ixgbe_aci_desc desc; + __le16 len; + int err; + + /* Read a length value from SR, so module_typeid is equal to 0, + * calculate offset where module size is placed from bytes to words + * set last command and read from SR values to true. + */ + err = ixgbe_aci_read_nvm(hw, 0, 2 * module_typeid + 2, 2, &len, true, + true); + if (err) + return err; + + cmd = &desc.params.nvm; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_erase); + + cmd->module_typeid = cpu_to_le16(module_typeid); + cmd->length = len; + cmd->offset_low = 0; + cmd->offset_high = 0; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_update_nvm - update NVM + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be written (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @command_flags: command parameters + * + * Update the NVM using the ACI command (0x0703). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid, + u32 offset, u16 length, void *data, + bool last_command, u8 command_flags) +{ + struct ixgbe_aci_cmd_nvm *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.nvm; + + /* In offset the highest byte must be zeroed. */ + if (offset & 0xFF000000) + return -EINVAL; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_write); + + cmd->cmd_flags |= command_flags; + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= IXGBE_ACI_NVM_LAST_CMD; + cmd->module_typeid = cpu_to_le16(module_typeid); + cmd->offset_low = cpu_to_le16(offset & 0xFFFF); + cmd->offset_high = FIELD_GET(IXGBE_ACI_NVM_OFFSET_HI_U_MASK, offset); + cmd->length = cpu_to_le16(length); + + desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD); + + return ixgbe_aci_send_cmd(hw, &desc, data, length); +} + +/** + * ixgbe_nvm_write_activate - NVM activate write + * @hw: pointer to the HW struct + * @cmd_flags: flags for write activate command + * @response_flags: response indicators from firmware + * + * Update the control word with the required banks' validity bits + * and dumps the Shadow RAM to flash using ACI command (0x0707). + * + * cmd_flags controls which banks to activate, the preservation level to use + * when activating the NVM bank, and whether an EMP reset is required for + * activation. + * + * Note that the 16bit cmd_flags value is split between two separate 1 byte + * flag values in the descriptor. + * + * On successful return of the firmware command, the response_flags variable + * is updated with the flags reported by firmware indicating certain status, + * such as whether EMP reset is enabled. + * + * Return: the exit code of the operation. + */ +int ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags, + u8 *response_flags) +{ + struct ixgbe_aci_cmd_nvm *cmd; + struct ixgbe_aci_desc desc; + s32 err; + + cmd = &desc.params.nvm; + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_nvm_write_activate); + + cmd->cmd_flags = (u8)(cmd_flags & 0xFF); + cmd->offset_high = (u8)FIELD_GET(IXGBE_ACI_NVM_OFFSET_HI_A_MASK, + cmd_flags); + + err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + if (!err && response_flags) + *response_flags = cmd->cmd_flags; + + return err; +} + +/** * ixgbe_nvm_validate_checksum - validate checksum * @hw: pointer to the HW struct * @@ -2267,6 +2527,955 @@ int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw) } /** + * ixgbe_discover_flash_size - Discover the available flash size + * @hw: pointer to the HW struct + * + * The device flash could be up to 16MB in size. However, it is possible that + * the actual size is smaller. Use bisection to determine the accessible size + * of flash memory. + * + * Return: the exit code of the operation. + */ +static int ixgbe_discover_flash_size(struct ixgbe_hw *hw) +{ + u32 min_size = 0, max_size = IXGBE_ACI_NVM_MAX_OFFSET + 1; + int err; + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + while ((max_size - min_size) > 1) { + u32 offset = (max_size + min_size) / 2; + u32 len = 1; + u8 data; + + err = ixgbe_read_flat_nvm(hw, offset, &len, &data, false); + if (err == -EIO && + hw->aci.last_status == IXGBE_ACI_RC_EINVAL) { + err = 0; + max_size = offset; + } else if (!err) { + min_size = offset; + } else { + /* an unexpected error occurred */ + goto err_read_flat_nvm; + } + } + + hw->flash.flash_size = max_size; + +err_read_flat_nvm: + ixgbe_release_nvm(hw); + + return err; +} + +/** + * ixgbe_read_sr_base_address - Read the value of a Shadow RAM pointer word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM word to read + * @pointer: pointer value read from Shadow RAM + * + * Read the given Shadow RAM word, and convert it to a pointer value specified + * in bytes. This function assumes the specified offset is a valid pointer + * word. + * + * Each pointer word specifies whether it is stored in word size or 4KB + * sector size by using the highest bit. The reported pointer value will be in + * bytes, intended for flat NVM reads. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_sr_base_address(struct ixgbe_hw *hw, u16 offset, + u32 *pointer) +{ + u16 value; + int err; + + err = ixgbe_read_ee_aci_e610(hw, offset, &value); + if (err) + return err; + + /* Determine if the pointer is in 4KB or word units */ + if (value & IXGBE_SR_NVM_PTR_4KB_UNITS) + *pointer = (value & ~IXGBE_SR_NVM_PTR_4KB_UNITS) * SZ_4K; + else + *pointer = value * sizeof(u16); + + return 0; +} + +/** + * ixgbe_read_sr_area_size - Read an area size from a Shadow RAM word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM to read + * @size: size value read from the Shadow RAM + * + * Read the given Shadow RAM word, and convert it to an area size value + * specified in bytes. This function assumes the specified offset is a valid + * area size word. + * + * Each area size word is specified in 4KB sector units. This function reports + * the size in bytes, intended for flat NVM reads. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_sr_area_size(struct ixgbe_hw *hw, u16 offset, u32 *size) +{ + u16 value; + int err; + + err = ixgbe_read_ee_aci_e610(hw, offset, &value); + if (err) + return err; + + /* Area sizes are always specified in 4KB units */ + *size = value * SZ_4K; + + return 0; +} + +/** + * ixgbe_determine_active_flash_banks - Discover active bank for each module + * @hw: pointer to the HW struct + * + * Read the Shadow RAM control word and determine which banks are active for + * the NVM, OROM, and Netlist modules. Also read and calculate the associated + * pointer and size. These values are then cached into the ixgbe_flash_info + * structure for later use in order to calculate the correct offset to read + * from the active module. + * + * Return: the exit code of the operation. + */ +static int ixgbe_determine_active_flash_banks(struct ixgbe_hw *hw) +{ + struct ixgbe_bank_info *banks = &hw->flash.banks; + u16 ctrl_word; + int err; + + err = ixgbe_read_ee_aci_e610(hw, IXGBE_E610_SR_NVM_CTRL_WORD, + &ctrl_word); + if (err) + return err; + + if (FIELD_GET(IXGBE_SR_CTRL_WORD_1_M, ctrl_word) != + IXGBE_SR_CTRL_WORD_VALID) + return -ENODATA; + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NVM_BANK)) + banks->nvm_bank = IXGBE_1ST_FLASH_BANK; + else + banks->nvm_bank = IXGBE_2ND_FLASH_BANK; + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_OROM_BANK)) + banks->orom_bank = IXGBE_1ST_FLASH_BANK; + else + banks->orom_bank = IXGBE_2ND_FLASH_BANK; + + if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NETLIST_BANK)) + banks->netlist_bank = IXGBE_1ST_FLASH_BANK; + else + banks->netlist_bank = IXGBE_2ND_FLASH_BANK; + + err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_1ST_NVM_BANK_PTR, + &banks->nvm_ptr); + if (err) + return err; + + err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_NVM_BANK_SIZE, + &banks->nvm_size); + if (err) + return err; + + err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_1ST_OROM_BANK_PTR, + &banks->orom_ptr); + if (err) + return err; + + err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_OROM_BANK_SIZE, + &banks->orom_size); + if (err) + return err; + + err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_NETLIST_BANK_PTR, + &banks->netlist_ptr); + if (err) + return err; + + err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_NETLIST_BANK_SIZE, + &banks->netlist_size); + + return err; +} + +/** + * ixgbe_get_flash_bank_offset - Get offset into requested flash bank + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive flash bank + * @module: the module to read from + * + * Based on the module, lookup the module offset from the beginning of the + * flash. + * + * Return: the flash offset. Note that a value of zero is invalid and must be + * treated as an error. + */ +static int ixgbe_get_flash_bank_offset(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u16 module) +{ + struct ixgbe_bank_info *banks = &hw->flash.banks; + enum ixgbe_flash_bank active_bank; + bool second_bank_active; + u32 offset, size; + + switch (module) { + case IXGBE_E610_SR_1ST_NVM_BANK_PTR: + offset = banks->nvm_ptr; + size = banks->nvm_size; + active_bank = banks->nvm_bank; + break; + case IXGBE_E610_SR_1ST_OROM_BANK_PTR: + offset = banks->orom_ptr; + size = banks->orom_size; + active_bank = banks->orom_bank; + break; + case IXGBE_E610_SR_NETLIST_BANK_PTR: + offset = banks->netlist_ptr; + size = banks->netlist_size; + active_bank = banks->netlist_bank; + break; + default: + return 0; + } + + switch (active_bank) { + case IXGBE_1ST_FLASH_BANK: + second_bank_active = false; + break; + case IXGBE_2ND_FLASH_BANK: + second_bank_active = true; + break; + default: + return 0; + } + + /* The second flash bank is stored immediately following the first + * bank. Based on whether the 1st or 2nd bank is active, and whether + * we want the active or inactive bank, calculate the desired offset. + */ + switch (bank) { + case IXGBE_ACTIVE_FLASH_BANK: + return offset + (second_bank_active ? size : 0); + case IXGBE_INACTIVE_FLASH_BANK: + return offset + (second_bank_active ? 0 : size); + } + + return 0; +} + +/** + * ixgbe_read_flash_module - Read a word from one of the main NVM modules + * @hw: pointer to the HW structure + * @bank: which bank of the module to read + * @module: the module to read + * @offset: the offset into the module in bytes + * @data: storage for the word read from the flash + * @length: bytes of data to read + * + * Read data from the specified flash module. The bank parameter indicates + * whether or not to read from the active bank or the inactive bank of that + * module. + * + * The word will be read using flat NVM access, and relies on the + * hw->flash.banks data being setup by ixgbe_determine_active_flash_banks() + * during initialization. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_flash_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u16 module, u32 offset, u8 *data, u32 length) +{ + u32 start; + int err; + + start = ixgbe_get_flash_bank_offset(hw, bank, module); + if (!start) + return -EINVAL; + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + err = ixgbe_read_flat_nvm(hw, start + offset, &length, data, false); + + ixgbe_release_nvm(hw); + + return err; +} + +/** + * ixgbe_read_nvm_module - Read from the active main NVM module + * @hw: pointer to the HW structure + * @bank: whether to read from active or inactive NVM module + * @offset: offset into the NVM module to read, in words + * @data: storage for returned word value + * + * Read the specified word from the active NVM module. This includes the CSS + * header at the start of the NVM module. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_nvm_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + __le16 data_local; + int err; + + err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_1ST_NVM_BANK_PTR, + offset * sizeof(data_local), + (u8 *)&data_local, + sizeof(data_local)); + if (!err) + *data = le16_to_cpu(data_local); + + return err; +} + +/** + * ixgbe_read_netlist_module - Read data from the netlist module area + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive module + * @offset: offset into the netlist to read from + * @data: storage for returned word value + * + * Read a word from the specified netlist bank. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_netlist_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + __le16 data_local; + int err; + + err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_NETLIST_BANK_PTR, + offset * sizeof(data_local), + (u8 *)&data_local, sizeof(data_local)); + if (!err) + *data = le16_to_cpu(data_local); + + return err; +} + +/** + * ixgbe_read_orom_module - Read from the active Option ROM module + * @hw: pointer to the HW structure + * @bank: whether to read from active or inactive OROM module + * @offset: offset into the OROM module to read, in words + * @data: storage for returned word value + * + * Read the specified word from the active Option ROM module of the flash. + * Note that unlike the NVM module, the CSS data is stored at the end of the + * module instead of at the beginning. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_orom_module(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + __le16 data_local; + int err; + + err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_1ST_OROM_BANK_PTR, + offset * sizeof(data_local), + (u8 *)&data_local, sizeof(data_local)); + if (!err) + *data = le16_to_cpu(data_local); + + return err; +} + +/** + * ixgbe_get_nvm_css_hdr_len - Read the CSS header length + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @hdr_len: storage for header length in words + * + * Read the CSS header length from the NVM CSS header and add the + * Authentication header size, and then convert to words. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_nvm_css_hdr_len(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 *hdr_len) +{ + u16 hdr_len_l, hdr_len_h; + u32 hdr_len_dword; + int err; + + err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_L, + &hdr_len_l); + if (err) + return err; + + err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_H, + &hdr_len_h); + if (err) + return err; + + /* CSS header length is in DWORD, so convert to words and add + * authentication header size. + */ + hdr_len_dword = (hdr_len_h << 16) | hdr_len_l; + *hdr_len = hdr_len_dword * 2 + IXGBE_NVM_AUTH_HEADER_LEN; + + return 0; +} + +/** + * ixgbe_read_nvm_sr_copy - Read a word from the Shadow RAM copy + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive NVM module + * @offset: offset into the Shadow RAM copy to read, in words + * @data: storage for returned word value + * + * Read the specified word from the copy of the Shadow RAM found in the + * specified NVM module. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_nvm_sr_copy(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 offset, u16 *data) +{ + u32 hdr_len; + int err; + + err = ixgbe_get_nvm_css_hdr_len(hw, bank, &hdr_len); + if (err) + return err; + + hdr_len = round_up(hdr_len, IXGBE_HDR_LEN_ROUNDUP); + + return ixgbe_read_nvm_module(hw, bank, hdr_len + offset, data); +} + +/** + * ixgbe_get_nvm_srev - Read the security revision from the NVM CSS header + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @srev: storage for security revision + * + * Read the security revision out of the CSS header of the active NVM module + * bank. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_nvm_srev(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, u32 *srev) +{ + u16 srev_l, srev_h; + int err; + + err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_L, &srev_l); + if (err) + return err; + + err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_H, &srev_h); + if (err) + return err; + + *srev = (srev_h << 16) | srev_l; + + return 0; +} + +/** + * ixgbe_get_orom_civd_data - Get the combo version information from Option ROM + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash module + * @civd: storage for the Option ROM CIVD data. + * + * Searches through the Option ROM flash contents to locate the CIVD data for + * the image. + * + * Return: the exit code of the operation. + */ +static int +ixgbe_get_orom_civd_data(struct ixgbe_hw *hw, enum ixgbe_bank_select bank, + struct ixgbe_orom_civd_info *civd) +{ + struct ixgbe_orom_civd_info tmp; + u32 offset; + int err; + + /* The CIVD section is located in the Option ROM aligned to 512 bytes. + * The first 4 bytes must contain the ASCII characters "$CIV". + * A simple modulo 256 sum of all of the bytes of the structure must + * equal 0. + */ + for (offset = 0; (offset + SZ_512) <= hw->flash.banks.orom_size; + offset += SZ_512) { + u8 sum = 0; + u32 i; + + err = ixgbe_read_flash_module(hw, bank, + IXGBE_E610_SR_1ST_OROM_BANK_PTR, + offset, + (u8 *)&tmp, sizeof(tmp)); + if (err) + return err; + + /* Skip forward until we find a matching signature */ + if (memcmp(IXGBE_OROM_CIV_SIGNATURE, tmp.signature, + sizeof(tmp.signature))) + continue; + + /* Verify that the simple checksum is zero */ + for (i = 0; i < sizeof(tmp); i++) + sum += ((u8 *)&tmp)[i]; + + if (sum) + return -EDOM; + + *civd = tmp; + return 0; + } + + return -ENODATA; +} + +/** + * ixgbe_get_orom_srev - Read the security revision from the OROM CSS header + * @hw: pointer to the HW struct + * @bank: whether to read from active or inactive flash module + * @srev: storage for security revision + * + * Read the security revision out of the CSS header of the active OROM module + * bank. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_orom_srev(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + u32 *srev) +{ + u32 orom_size_word = hw->flash.banks.orom_size / 2; + u32 css_start, hdr_len; + u16 srev_l, srev_h; + int err; + + err = ixgbe_get_nvm_css_hdr_len(hw, bank, &hdr_len); + if (err) + return err; + + if (orom_size_word < hdr_len) + return -EINVAL; + + /* Calculate how far into the Option ROM the CSS header starts. Note + * that ixgbe_read_orom_module takes a word offset. + */ + css_start = orom_size_word - hdr_len; + err = ixgbe_read_orom_module(hw, bank, + css_start + IXGBE_NVM_CSS_SREV_L, + &srev_l); + if (err) + return err; + + err = ixgbe_read_orom_module(hw, bank, + css_start + IXGBE_NVM_CSS_SREV_H, + &srev_h); + if (err) + return err; + + *srev = srev_h << 16 | srev_l; + + return 0; +} + +/** + * ixgbe_get_orom_ver_info - Read Option ROM version information + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash module + * @orom: pointer to Option ROM info structure + * + * Read Option ROM version and security revision from the Option ROM flash + * section. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + struct ixgbe_orom_info *orom) +{ + struct ixgbe_orom_civd_info civd; + u32 combo_ver; + int err; + + err = ixgbe_get_orom_civd_data(hw, bank, &civd); + if (err) + return err; + + combo_ver = le32_to_cpu(civd.combo_ver); + + orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver); + orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver); + orom->build = (u16)FIELD_GET(IXGBE_OROM_VER_BUILD_MASK, combo_ver); + + return ixgbe_get_orom_srev(hw, bank, &orom->srev); +} + +/** + * ixgbe_get_inactive_orom_ver - Read Option ROM version from the inactive bank + * @hw: pointer to the HW structure + * @orom: storage for Option ROM version information + * + * Read the Option ROM version and security revision data for the inactive + * section of flash. Used to access version data for a pending update that has + * not yet been activated. + * + * Return: the exit code of the operation. + */ +int ixgbe_get_inactive_orom_ver(struct ixgbe_hw *hw, + struct ixgbe_orom_info *orom) +{ + return ixgbe_get_orom_ver_info(hw, IXGBE_INACTIVE_FLASH_BANK, orom); +} + +/** + * ixgbe_get_nvm_ver_info - Read NVM version information + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @nvm: pointer to NVM info structure + * + * Read the NVM EETRACK ID and map version of the main NVM image bank, filling + * in the nvm info structure. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_nvm_ver_info(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + struct ixgbe_nvm_info *nvm) +{ + u16 eetrack_lo, eetrack_hi, ver; + int err; + + err = ixgbe_read_nvm_sr_copy(hw, bank, + IXGBE_E610_SR_NVM_DEV_STARTER_VER, &ver); + if (err) + return err; + + nvm->major = FIELD_GET(IXGBE_E610_NVM_VER_HI_MASK, ver); + nvm->minor = FIELD_GET(IXGBE_E610_NVM_VER_LO_MASK, ver); + + err = ixgbe_read_nvm_sr_copy(hw, bank, IXGBE_E610_SR_NVM_EETRACK_LO, + &eetrack_lo); + if (err) + return err; + + err = ixgbe_read_nvm_sr_copy(hw, bank, IXGBE_E610_SR_NVM_EETRACK_HI, + &eetrack_hi); + if (err) + return err; + + nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + + ixgbe_get_nvm_srev(hw, bank, &nvm->srev); + + return 0; +} + +/** + * ixgbe_get_inactive_nvm_ver - Read Option ROM version from the inactive bank + * @hw: pointer to the HW structure + * @nvm: storage for Option ROM version information + * + * Read the NVM EETRACK ID, Map version, and security revision of the + * inactive NVM bank. Used to access version data for a pending update that + * has not yet been activated. + * + * Return: the exit code of the operation. + */ +int ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm) +{ + return ixgbe_get_nvm_ver_info(hw, IXGBE_INACTIVE_FLASH_BANK, nvm); +} + +/** + * ixgbe_get_active_nvm_ver - Read Option ROM version from the active bank + * @hw: pointer to the HW structure + * @nvm: storage for Option ROM version information + * + * Reads the NVM EETRACK ID, Map version, and security revision of the + * active NVM bank. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_active_nvm_ver(struct ixgbe_hw *hw, + struct ixgbe_nvm_info *nvm) +{ + return ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, nvm); +} + +/** + * ixgbe_get_netlist_info - Read the netlist version information + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank + * @netlist: pointer to netlist version info structure + * + * Get the netlist version information from the requested bank. Reads the Link + * Topology section to find the Netlist ID block and extract the relevant + * information into the netlist version structure. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_netlist_info(struct ixgbe_hw *hw, + enum ixgbe_bank_select bank, + struct ixgbe_netlist_info *netlist) +{ + u16 module_id, length, node_count, i; + u16 *id_blk; + int err; + + err = ixgbe_read_netlist_module(hw, bank, IXGBE_NETLIST_TYPE_OFFSET, + &module_id); + if (err) + return err; + + if (module_id != IXGBE_NETLIST_LINK_TOPO_MOD_ID) + return -EIO; + + err = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_MODULE_LEN, + &length); + if (err) + return err; + + /* Sanity check that we have at least enough words to store the + * netlist ID block. + */ + if (length < IXGBE_NETLIST_ID_BLK_SIZE) + return -EIO; + + err = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_NODE_COUNT, + &node_count); + if (err) + return err; + + node_count &= IXGBE_LINK_TOPO_NODE_COUNT_M; + + id_blk = kcalloc(IXGBE_NETLIST_ID_BLK_SIZE, sizeof(*id_blk), GFP_KERNEL); + if (!id_blk) + return -ENOMEM; + + /* Read out the entire Netlist ID Block at once. */ + err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_NETLIST_BANK_PTR, + IXGBE_NETLIST_ID_BLK_OFFSET(node_count) * + sizeof(*id_blk), (u8 *)id_blk, + IXGBE_NETLIST_ID_BLK_SIZE * + sizeof(*id_blk)); + if (err) + goto free_id_blk; + + for (i = 0; i < IXGBE_NETLIST_ID_BLK_SIZE; i++) + id_blk[i] = le16_to_cpu(((__le16 *)id_blk)[i]); + + netlist->major = id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW]; + netlist->minor = id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW]; + netlist->type = id_blk[IXGBE_NETLIST_ID_BLK_TYPE_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_TYPE_LOW]; + netlist->rev = id_blk[IXGBE_NETLIST_ID_BLK_REV_HIGH] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_REV_LOW]; + netlist->cust_ver = id_blk[IXGBE_NETLIST_ID_BLK_CUST_VER]; + /* Read the left most 4 bytes of SHA */ + netlist->hash = id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 | + id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(14)]; + +free_id_blk: + kfree(id_blk); + return err; +} + +/** + * ixgbe_get_inactive_netlist_ver - Read netlist version from the inactive bank + * @hw: pointer to the HW struct + * @netlist: pointer to netlist version info structure + * + * Read the netlist version data from the inactive netlist bank. Used to + * extract version data of a pending flash update in order to display the + * version data. + * + * Return: the exit code of the operation. + */ +int ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw, + struct ixgbe_netlist_info *netlist) +{ + return ixgbe_get_netlist_info(hw, IXGBE_INACTIVE_FLASH_BANK, netlist); +} + +/** + * ixgbe_get_flash_data - get flash data + * @hw: pointer to the HW struct + * + * Read and populate flash data such as Shadow RAM size, + * max_timeout and blank_nvm_mode + * + * Return: the exit code of the operation. + */ +int ixgbe_get_flash_data(struct ixgbe_hw *hw) +{ + struct ixgbe_flash_info *flash = &hw->flash; + u32 fla, gens_stat; + u8 sr_size; + int err; + + /* The SR size is stored regardless of the NVM programming mode + * as the blank mode may be used in the factory line. + */ + gens_stat = IXGBE_READ_REG(hw, GLNVM_GENS); + sr_size = FIELD_GET(GLNVM_GENS_SR_SIZE_M, gens_stat); + + /* Switching to words (sr_size contains power of 2) */ + flash->sr_words = BIT(sr_size) * (SZ_1K / sizeof(u16)); + + /* Check if we are in the normal or blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, IXGBE_GLNVM_FLA); + if (fla & IXGBE_GLNVM_FLA_LOCKED_M) { + flash->blank_nvm_mode = false; + } else { + flash->blank_nvm_mode = true; + return -EIO; + } + + err = ixgbe_discover_flash_size(hw); + if (err) + return err; + + err = ixgbe_determine_active_flash_banks(hw); + if (err) + return err; + + err = ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, + &flash->nvm); + if (err) + return err; + + err = ixgbe_get_orom_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, + &flash->orom); + if (err) + return err; + + err = ixgbe_get_netlist_info(hw, IXGBE_ACTIVE_FLASH_BANK, + &flash->netlist); + return err; +} + +/** + * ixgbe_aci_nvm_update_empr - update NVM using EMPR + * @hw: pointer to the HW struct + * + * Force EMP reset using ACI command (0x0709). This command allows SW to + * request an EMPR to activate new FW. + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_nvm_update_empr(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_desc desc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_update_empr); + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/* ixgbe_nvm_set_pkg_data - NVM set package data + * @hw: pointer to the HW struct + * @del_pkg_data_flag: If is set then the current pkg_data store by FW + * is deleted. + * If bit is set to 1, then buffer should be size 0. + * @data: pointer to buffer + * @length: length of the buffer + * + * Set package data using ACI command (0x070A). + * This command is equivalent to the reception of + * a PLDM FW Update GetPackageData cmd. This command should be sent + * as part of the NVM update as the first cmd in the flow. + * + * Return: the exit code of the operation. + */ +int ixgbe_nvm_set_pkg_data(struct ixgbe_hw *hw, bool del_pkg_data_flag, + u8 *data, u16 length) +{ + struct ixgbe_aci_cmd_nvm_pkg_data *cmd; + struct ixgbe_aci_desc desc; + + if (length != 0 && !data) + return -EINVAL; + + cmd = &desc.params.pkg_data; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_pkg_data); + desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD); + + if (del_pkg_data_flag) + cmd->cmd_flags |= IXGBE_ACI_NVM_PKG_DELETE; + + return ixgbe_aci_send_cmd(hw, &desc, data, length); +} + +/* ixgbe_nvm_pass_component_tbl - NVM pass component table + * @hw: pointer to the HW struct + * @data: pointer to buffer + * @length: length of the buffer + * @transfer_flag: parameter for determining stage of the update + * @comp_response: a pointer to the response from the 0x070B ACI. + * @comp_response_code: a pointer to the response code from the 0x070B ACI. + * + * Pass component table using ACI command (0x070B). This command is equivalent + * to the reception of a PLDM FW Update PassComponentTable cmd. + * This command should be sent once per component. It can be only sent after + * Set Package Data cmd and before actual update. FW will assume these + * commands are going to be sent until the TransferFlag is set to End or + * StartAndEnd. + * + * Return: the exit code of the operation. + */ +int ixgbe_nvm_pass_component_tbl(struct ixgbe_hw *hw, u8 *data, u16 length, + u8 transfer_flag, u8 *comp_response, + u8 *comp_response_code) +{ + struct ixgbe_aci_cmd_nvm_pass_comp_tbl *cmd; + struct ixgbe_aci_desc desc; + int err; + + if (!data || !comp_response || !comp_response_code) + return -EINVAL; + + cmd = &desc.params.pass_comp_tbl; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, + ixgbe_aci_opc_nvm_pass_component_tbl); + desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD); + + cmd->transfer_flag = transfer_flag; + err = ixgbe_aci_send_cmd(hw, &desc, data, length); + if (!err) { + *comp_response = cmd->component_response; + *comp_response_code = cmd->component_response_code; + } + + return err; +} + +/** * ixgbe_read_sr_word_aci - Reads Shadow RAM via ACI * @hw: pointer to the HW structure * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) @@ -2485,7 +3694,7 @@ int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val) if (err) return err; - err = ixgbe_read_sr_word_aci(hw, E610_SR_SW_CHECKSUM_WORD, + err = ixgbe_read_sr_word_aci(hw, IXGBE_E610_SR_SW_CHECKSUM_WORD, &tmp_checksum); ixgbe_release_nvm(hw); @@ -2580,9 +3789,129 @@ reset_hw_out: return err; } +/** + * ixgbe_get_pfa_module_tlv - Read sub module TLV from NVM PFA + * @hw: pointer to hardware structure + * @module_tlv: pointer to module TLV to return + * @module_tlv_len: pointer to module TLV length to return + * @module_type: module type requested + * + * Find the requested sub module TLV type from the Preserved Field + * Area (PFA) and returns the TLV pointer and length. The caller can + * use these to read the variable length TLV value. + * + * Return: the exit code of the operation. + */ +static int ixgbe_get_pfa_module_tlv(struct ixgbe_hw *hw, u16 *module_tlv, + u16 *module_tlv_len, u16 module_type) +{ + u16 pfa_len, pfa_ptr, pfa_end_ptr; + u16 next_tlv; + int err; + + err = ixgbe_read_ee_aci_e610(hw, IXGBE_E610_SR_PFA_PTR, &pfa_ptr); + if (err) + return err; + + err = ixgbe_read_ee_aci_e610(hw, pfa_ptr, &pfa_len); + if (err) + return err; + + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; + pfa_end_ptr = pfa_ptr + pfa_len; + while (next_tlv < pfa_end_ptr) { + u16 tlv_sub_module_type, tlv_len; + + /* Read TLV type */ + err = ixgbe_read_ee_aci_e610(hw, next_tlv, + &tlv_sub_module_type); + if (err) + break; + + /* Read TLV length */ + err = ixgbe_read_ee_aci_e610(hw, next_tlv + 1, &tlv_len); + if (err) + break; + + if (tlv_sub_module_type == module_type) { + if (tlv_len) { + *module_tlv = next_tlv; + *module_tlv_len = tlv_len; + return 0; + } + return -EIO; + } + /* Check next TLV, i.e. current TLV pointer + length + 2 words + * (for current TLV's type and length). + */ + next_tlv = next_tlv + tlv_len + 2; + } + /* Module does not exist */ + return -ENODATA; +} + +/** + * ixgbe_read_pba_string_e610 - Read PBA string from NVM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the NVM + * @pba_num_size: part number string buffer length + * + * Read the part number string from the NVM. + * + * Return: the exit code of the operation. + */ +static int ixgbe_read_pba_string_e610(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + u16 pba_tlv, pba_tlv_len; + u16 pba_word, pba_size; + int err; + + *pba_num = '\0'; + + err = ixgbe_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len, + IXGBE_E610_SR_PBA_BLOCK_PTR); + if (err) + return err; + + /* pba_size is the next word */ + err = ixgbe_read_ee_aci_e610(hw, (pba_tlv + 2), &pba_size); + if (err) + return err; + + if (pba_tlv_len < pba_size) + return -EINVAL; + + /* Subtract one to get PBA word count (PBA Size word is included in + * total size). + */ + pba_size--; + + if (pba_num_size < (((u32)pba_size * 2) + 1)) + return -EINVAL; + + for (u16 i = 0; i < pba_size; i++) { + err = ixgbe_read_ee_aci_e610(hw, (pba_tlv + 2 + 1) + i, + &pba_word); + if (err) + return err; + + pba_num[(i * 2)] = FIELD_GET(IXGBE_E610_SR_PBA_BLOCK_MASK, + pba_word); + pba_num[(i * 2) + 1] = pba_word & 0xFF; + } + + pba_num[(pba_size * 2)] = '\0'; + + return err; +} + static const struct ixgbe_mac_operations mac_ops_e610 = { .init_hw = ixgbe_init_hw_generic, - .start_hw = ixgbe_start_hw_X540, + .start_hw = ixgbe_start_hw_e610, .clear_hw_cntrs = ixgbe_clear_hw_cntrs_generic, .enable_rx_dma = ixgbe_enable_rx_dma_generic, .get_mac_addr = ixgbe_get_mac_addr_generic, @@ -2621,8 +3950,12 @@ static const struct ixgbe_mac_operations mac_ops_e610 = { .led_off = ixgbe_led_off_generic, .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = ixgbe_reset_hw_e610, + .get_fw_ver = ixgbe_aci_get_fw_ver, .get_media_type = ixgbe_get_media_type_e610, .setup_link = ixgbe_setup_link_e610, + .fw_recovery_mode = ixgbe_fw_recovery_mode_e610, + .fw_rollback_mode = ixgbe_fw_rollback_mode_e610, + .get_nvm_ver = ixgbe_get_active_nvm_ver, .get_link_capabilities = ixgbe_get_link_capabilities_e610, .get_bus_info = ixgbe_get_bus_info_generic, .acquire_swfw_sync = ixgbe_acquire_swfw_sync_X540, @@ -2647,6 +3980,8 @@ static const struct ixgbe_eeprom_operations eeprom_ops_e610 = { .read = ixgbe_read_ee_aci_e610, .read_buffer = ixgbe_read_ee_aci_buffer_e610, .validate_checksum = ixgbe_validate_eeprom_checksum_e610, + .read_pba_string = ixgbe_read_pba_string_e610, + .init_params = ixgbe_init_eeprom_params_e610, }; const struct ixgbe_info ixgbe_e610_info = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index ba8c06b73810..bb31d65bd1c8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -36,6 +36,7 @@ int ixgbe_aci_get_link_info(struct ixgbe_hw *hw, bool ena_lse, struct ixgbe_link_status *link); int ixgbe_aci_set_event_mask(struct ixgbe_hw *hw, u8 port_num, u16 mask); int ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask); +int ixgbe_aci_set_port_id_led(struct ixgbe_hw *hw, bool orig_mode); enum ixgbe_media_type ixgbe_get_media_type_e610(struct ixgbe_hw *hw); int ixgbe_setup_link_e610(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait); @@ -67,6 +68,11 @@ int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, u16 length, void *data, bool last_command, bool read_shadow_ram); int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw); +int ixgbe_get_inactive_orom_ver(struct ixgbe_hw *hw, + struct ixgbe_orom_info *orom); +int ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm); +int ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw, + struct ixgbe_netlist_info *netlist); int ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data); int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, u8 *data, bool read_shadow_ram); @@ -77,5 +83,18 @@ int ixgbe_read_ee_aci_buffer_e610(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val); int ixgbe_reset_hw_e610(struct ixgbe_hw *hw); +int ixgbe_get_flash_data(struct ixgbe_hw *hw); +int ixgbe_aci_nvm_update_empr(struct ixgbe_hw *hw); +int ixgbe_nvm_set_pkg_data(struct ixgbe_hw *hw, bool del_pkg_data_flag, + u8 *data, u16 length); +int ixgbe_nvm_pass_component_tbl(struct ixgbe_hw *hw, u8 *data, u16 length, + u8 transfer_flag, u8 *comp_response, + u8 *comp_response_code); +int ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid); +int ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid, + u32 offset, u16 length, void *data, + bool last_command, u8 command_flags); +int ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags, + u8 *response_flags); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f03925c1f521..d8a919ab7027 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -213,7 +213,7 @@ static void ixgbe_set_advertising_10gtypes(struct ixgbe_hw *hw, static int ixgbe_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed supported_link; bool autoneg = false; @@ -458,7 +458,7 @@ static int ixgbe_get_link_ksettings(struct net_device *netdev, static int ixgbe_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 advertised, old; int err = 0; @@ -535,7 +535,7 @@ static int ixgbe_set_link_ksettings(struct net_device *netdev, static void ixgbe_get_pause_stats(struct net_device *netdev, struct ethtool_pause_stats *stats) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw_stats *hwstats = &adapter->stats; stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc; @@ -545,7 +545,7 @@ static void ixgbe_get_pause_stats(struct net_device *netdev, static void ixgbe_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (ixgbe_device_supports_autoneg_fc(hw) && @@ -564,10 +564,26 @@ static void ixgbe_get_pauseparam(struct net_device *netdev, } } +static void ixgbe_set_pauseparam_finalize(struct net_device *netdev, + struct ixgbe_fc_info *fc) +{ + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + /* If the thing changed then we'll update and use new autoneg. */ + if (memcmp(fc, &hw->fc, sizeof(*fc))) { + hw->fc = *fc; + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + else + ixgbe_reset(adapter); + } +} + static int ixgbe_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_fc_info fc = hw->fc; @@ -592,27 +608,52 @@ static int ixgbe_set_pauseparam(struct net_device *netdev, else fc.requested_mode = ixgbe_fc_none; - /* if the thing changed then we'll update and use new autoneg */ - if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) { - hw->fc = fc; - if (netif_running(netdev)) - ixgbe_reinit_locked(adapter); - else - ixgbe_reset(adapter); + ixgbe_set_pauseparam_finalize(netdev, &fc); + + return 0; +} + +static int ixgbe_set_pauseparam_e610(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fc_info fc = hw->fc; + + if (!ixgbe_device_supports_autoneg_fc(hw)) + return -EOPNOTSUPP; + + if (pause->autoneg == AUTONEG_DISABLE) { + netdev_info(netdev, + "Cannot disable autonegotiation on this device.\n"); + return -EOPNOTSUPP; } + fc.disable_fc_autoneg = false; + + if (pause->rx_pause && pause->tx_pause) + fc.requested_mode = ixgbe_fc_full; + else if (pause->rx_pause) + fc.requested_mode = ixgbe_fc_rx_pause; + else if (pause->tx_pause) + fc.requested_mode = ixgbe_fc_tx_pause; + else + fc.requested_mode = ixgbe_fc_none; + + ixgbe_set_pauseparam_finalize(netdev, &fc); + return 0; } static u32 ixgbe_get_msglevel(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); return adapter->msg_enable; } static void ixgbe_set_msglevel(struct net_device *netdev, u32 data) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->msg_enable = data; } @@ -627,7 +668,7 @@ static int ixgbe_get_regs_len(struct net_device *netdev) static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 *regs_buff = p; u8 i; @@ -994,14 +1035,14 @@ static void ixgbe_get_regs(struct net_device *netdev, static int ixgbe_get_eeprom_len(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); return adapter->hw.eeprom.word_size * 2; } static int ixgbe_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *bytes) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u16 *eeprom_buff; int first_word, last_word, eeprom_len; @@ -1037,7 +1078,7 @@ static int ixgbe_get_eeprom(struct net_device *netdev, static int ixgbe_set_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *bytes) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u16 *eeprom_buff; void *ptr; @@ -1104,10 +1145,22 @@ err: return ret_val; } +void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + ixgbe_get_flash_data(hw); + ixgbe_set_fw_version_e610(adapter); +} + static void ixgbe_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + + /* need to refresh info for e610 in case fw reloads in runtime */ + if (adapter->hw.mac.type == ixgbe_mac_e610) + ixgbe_refresh_fw_version(adapter); strscpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); @@ -1161,7 +1214,7 @@ static void ixgbe_get_ringparam(struct net_device *netdev, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_ring *tx_ring = adapter->tx_ring[0]; struct ixgbe_ring *rx_ring = adapter->rx_ring[0]; @@ -1176,7 +1229,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_ring *temp_ring; int i, j, err = 0; u32 new_rx_count, new_tx_count; @@ -1336,7 +1389,7 @@ static int ixgbe_get_sset_count(struct net_device *netdev, int sset) static void ixgbe_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *net_stats; unsigned int start; @@ -1710,7 +1763,7 @@ static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) static irqreturn_t ixgbe_test_intr(int irq, void *data) { struct net_device *netdev = (struct net_device *) data; - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); @@ -2183,7 +2236,7 @@ out: static void ixgbe_diag_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); bool if_running = netif_running(netdev); if (ixgbe_removed(adapter->hw.hw_addr)) { @@ -2306,7 +2359,7 @@ static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter, static void ixgbe_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); wol->supported = WAKE_UCAST | WAKE_MCAST | WAKE_BCAST | WAKE_MAGIC; @@ -2328,7 +2381,7 @@ static void ixgbe_get_wol(struct net_device *netdev, static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE | WAKE_FILTER)) @@ -2353,9 +2406,53 @@ static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } +static int ixgbe_set_wol_acpi(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 grc; + + if (ixgbe_wol_exclusion(adapter, wol)) + return wol->wolopts ? -EOPNOTSUPP : 0; + + /* disable APM wakeup */ + grc = IXGBE_READ_REG(hw, IXGBE_GRC_X550EM_a); + grc &= ~IXGBE_GRC_APME; + IXGBE_WRITE_REG(hw, IXGBE_GRC_X550EM_a, grc); + + /* erase existing filters */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); + adapter->wol = 0; + + if (wol->wolopts & WAKE_UCAST) + adapter->wol |= IXGBE_WUFC_EX; + if (wol->wolopts & WAKE_MCAST) + adapter->wol |= IXGBE_WUFC_MC; + if (wol->wolopts & WAKE_BCAST) + adapter->wol |= IXGBE_WUFC_BC; + + IXGBE_WRITE_REG(hw, IXGBE_WUC, IXGBE_WUC_PME_EN); + IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wol); + + hw->wol_enabled = adapter->wol; + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + +static int ixgbe_set_wol_e610(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + if (wol->wolopts & (WAKE_UCAST | WAKE_MCAST | WAKE_BCAST)) + return ixgbe_set_wol_acpi(netdev, wol); + else + return ixgbe_set_wol(netdev, wol); +} + static int ixgbe_nway_reset(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (netif_running(netdev)) ixgbe_reinit_locked(adapter); @@ -2366,7 +2463,7 @@ static int ixgbe_nway_reset(struct net_device *netdev) static int ixgbe_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (!hw->mac.ops.led_on || !hw->mac.ops.led_off) @@ -2394,12 +2491,32 @@ static int ixgbe_set_phys_id(struct net_device *netdev, return 0; } +static int ixgbe_set_phys_id_e610(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + bool led_active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + led_active = true; + break; + case ETHTOOL_ID_INACTIVE: + led_active = false; + break; + default: + return -EOPNOTSUPP; + } + + return ixgbe_aci_set_port_id_led(&adapter->hw, !led_active); +} + static int ixgbe_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); /* only valid if in constant ITR mode */ if (adapter->rx_itr_setting <= 1) @@ -2455,7 +2572,7 @@ static int ixgbe_set_coalesce(struct net_device *netdev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_q_vector *q_vector; int i; u16 tx_itr_param, rx_itr_param, tx_itr_prev; @@ -2681,7 +2798,7 @@ static int ixgbe_rss_indir_tbl_max(struct ixgbe_adapter *adapter) static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); int ret = -EOPNOTSUPP; switch (cmd->cmd) { @@ -3069,7 +3186,7 @@ static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter, static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); int ret = -EOPNOTSUPP; switch (cmd->cmd) { @@ -3096,7 +3213,7 @@ static u32 ixgbe_get_rxfh_key_size(struct net_device *netdev) static u32 ixgbe_rss_indir_size(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); return ixgbe_rss_indir_tbl_entries(adapter); } @@ -3116,7 +3233,7 @@ static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir) static int ixgbe_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); rxfh->hfunc = ETH_RSS_HASH_TOP; @@ -3134,7 +3251,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int i; u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter); @@ -3176,7 +3293,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev, static int ixgbe_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); /* we always support timestamping disabled */ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE); @@ -3252,7 +3369,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) static void ixgbe_get_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); /* report maximum channels */ ch->max_combined = ixgbe_max_channels(adapter); @@ -3289,7 +3406,7 @@ static void ixgbe_get_channels(struct net_device *dev, static int ixgbe_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); unsigned int count = ch->combined_count; u8 max_rss_indices = ixgbe_max_rss_indices(adapter); @@ -3327,7 +3444,7 @@ static int ixgbe_set_channels(struct net_device *dev, static int ixgbe_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; u8 sff8472_rev, addr_mode; bool page_swap = false; @@ -3373,7 +3490,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; int status = -EFAULT; u8 databyte = 0xFF; @@ -3469,7 +3586,7 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE)) @@ -3483,7 +3600,7 @@ static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata) static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; struct ethtool_keee eee_data; int ret_val; @@ -3538,7 +3655,7 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata) static u32 ixgbe_get_priv_flags(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); u32 priv_flags = 0; if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) @@ -3555,7 +3672,7 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev) static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); unsigned int flags2 = adapter->flags2; unsigned int i; @@ -3638,7 +3755,57 @@ static const struct ethtool_ops ixgbe_ethtool_ops = { .set_link_ksettings = ixgbe_set_link_ksettings, }; +static const struct ethtool_ops ixgbe_ethtool_ops_e610 = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .get_drvinfo = ixgbe_get_drvinfo, + .get_regs_len = ixgbe_get_regs_len, + .get_regs = ixgbe_get_regs, + .get_wol = ixgbe_get_wol, + .set_wol = ixgbe_set_wol_e610, + .nway_reset = ixgbe_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = ixgbe_get_eeprom_len, + .get_eeprom = ixgbe_get_eeprom, + .set_eeprom = ixgbe_set_eeprom, + .get_ringparam = ixgbe_get_ringparam, + .set_ringparam = ixgbe_set_ringparam, + .get_pause_stats = ixgbe_get_pause_stats, + .get_pauseparam = ixgbe_get_pauseparam, + .set_pauseparam = ixgbe_set_pauseparam_e610, + .get_msglevel = ixgbe_get_msglevel, + .set_msglevel = ixgbe_set_msglevel, + .self_test = ixgbe_diag_test, + .get_strings = ixgbe_get_strings, + .set_phys_id = ixgbe_set_phys_id_e610, + .get_sset_count = ixgbe_get_sset_count, + .get_ethtool_stats = ixgbe_get_ethtool_stats, + .get_coalesce = ixgbe_get_coalesce, + .set_coalesce = ixgbe_set_coalesce, + .get_rxnfc = ixgbe_get_rxnfc, + .set_rxnfc = ixgbe_set_rxnfc, + .get_rxfh_indir_size = ixgbe_rss_indir_size, + .get_rxfh_key_size = ixgbe_get_rxfh_key_size, + .get_rxfh = ixgbe_get_rxfh, + .set_rxfh = ixgbe_set_rxfh, + .get_eee = ixgbe_get_eee, + .set_eee = ixgbe_set_eee, + .get_channels = ixgbe_get_channels, + .set_channels = ixgbe_set_channels, + .get_priv_flags = ixgbe_get_priv_flags, + .set_priv_flags = ixgbe_set_priv_flags, + .get_ts_info = ixgbe_get_ts_info, + .get_module_info = ixgbe_get_module_info, + .get_module_eeprom = ixgbe_get_module_eeprom, + .get_link_ksettings = ixgbe_get_link_ksettings, + .set_link_ksettings = ixgbe_set_link_ksettings, +}; + void ixgbe_set_ethtool_ops(struct net_device *netdev) { - netdev->ethtool_ops = &ixgbe_ethtool_ops; + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); + + if (adapter->hw.mac.type == ixgbe_mac_e610) + netdev->ethtool_ops = &ixgbe_ethtool_ops_e610; + else + netdev->ethtool_ops = &ixgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 955dced844a9..7dcf6ecd157b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -56,7 +56,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid) if (xid >= netdev->fcoe_ddp_xid) return 0; - adapter = netdev_priv(netdev); + adapter = ixgbe_from_netdev(netdev); fcoe = &adapter->fcoe; ddp = &fcoe->ddp[xid]; if (!ddp->udl) @@ -153,7 +153,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, if (!netdev || !sgl) return 0; - adapter = netdev_priv(netdev); + adapter = ixgbe_from_netdev(netdev); if (xid >= netdev->fcoe_ddp_xid) { e_warn(drv, "xid=0x%x out-of-range\n", xid); return 0; @@ -834,7 +834,7 @@ static void ixgbe_fcoe_ddp_disable(struct ixgbe_adapter *adapter) */ int ixgbe_fcoe_enable(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_fcoe *fcoe = &adapter->fcoe; atomic_inc(&fcoe->refcnt); @@ -881,7 +881,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev) */ int ixgbe_fcoe_disable(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (!atomic_dec_and_test(&adapter->fcoe.refcnt)) return -EINVAL; @@ -927,7 +927,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev) int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type) { u16 prefix = 0xffff; - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_mac_info *mac = &adapter->hw.mac; switch (type) { @@ -967,7 +967,7 @@ int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type) int ixgbe_fcoe_get_hbainfo(struct net_device *netdev, struct netdev_fcoe_hbainfo *info) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u64 dsn; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c new file mode 100644 index 000000000000..49d3b66add7e --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c @@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Intel Corporation. */ + +#include <linux/crc32.h> +#include <linux/pldmfw.h> +#include <linux/uuid.h> + +#include "ixgbe.h" +#include "ixgbe_fw_update.h" + +struct ixgbe_fwu_priv { + struct pldmfw context; + + struct ixgbe_adapter *adapter; + struct netlink_ext_ack *extack; + + /* Track which NVM banks to activate at the end of the update */ + u8 activate_flags; + bool emp_reset_available; +}; + +/** + * ixgbe_send_package_data - Send record package data to firmware + * @context: PLDM fw update structure + * @data: pointer to the package data + * @length: length of the package data + * + * Send a copy of the package data associated with the PLDM record matching + * this device to the firmware. + * + * Note that this function sends an AdminQ command that will fail unless the + * NVM resource has been acquired. + * + * Return: zero on success, or a negative error code on failure. + */ +static int ixgbe_send_package_data(struct pldmfw *context, + const u8 *data, u16 length) +{ + struct ixgbe_fwu_priv *priv = container_of(context, + struct ixgbe_fwu_priv, + context); + struct ixgbe_adapter *adapter = priv->adapter; + struct ixgbe_hw *hw = &adapter->hw; + u8 *package_data; + int err; + + package_data = kmemdup(data, length, GFP_KERNEL); + if (!package_data) + return -ENOMEM; + + err = ixgbe_nvm_set_pkg_data(hw, false, package_data, length); + + kfree(package_data); + + return err; +} + +/** + * ixgbe_check_component_response - Report firmware response to a component + * @adapter: device private data structure + * @response: indicates whether this component can be updated + * @code: code indicating reason for response + * @extack: netlink extended ACK structure + * + * Check whether firmware indicates if this component can be updated. Report + * a suitable error message over the netlink extended ACK if the component + * cannot be updated. + * + * Return: 0 if the component can be updated, or -ECANCELED if the + * firmware indicates the component cannot be updated. + */ +static int ixgbe_check_component_response(struct ixgbe_adapter *adapter, + u8 response, u8 code, + struct netlink_ext_ack *extack) +{ + struct ixgbe_hw *hw = &adapter->hw; + + switch (response) { + case IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED: + /* Firmware indicated this update is good to proceed. */ + return 0; + case IXGBE_ACI_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE: + NL_SET_ERR_MSG_MOD(extack, + "Firmware recommends not updating, as it may result in a downgrade. Continuing anyways"); + return 0; + case IXGBE_ACI_NVM_PASS_COMP_CAN_NOT_BE_UPDATED: + NL_SET_ERR_MSG_MOD(extack, "Firmware has rejected updating."); + break; + case IXGBE_ACI_NVM_PASS_COMP_PARTIAL_CHECK: + if (hw->mac.ops.fw_recovery_mode && + hw->mac.ops.fw_recovery_mode(hw)) + return 0; + break; + } + + switch (code) { + case IXGBE_ACI_NVM_PASS_COMP_STAMP_IDENTICAL_CODE: + NL_SET_ERR_MSG_MOD(extack, + "Component comparison stamp is identical to running image"); + break; + case IXGBE_ACI_NVM_PASS_COMP_STAMP_LOWER: + NL_SET_ERR_MSG_MOD(extack, + "Component comparison stamp is lower than running image"); + break; + case IXGBE_ACI_NVM_PASS_COMP_INVALID_STAMP_CODE: + NL_SET_ERR_MSG_MOD(extack, + "Component comparison stamp is invalid"); + break; + case IXGBE_ACI_NVM_PASS_COMP_CONFLICT_CODE: + NL_SET_ERR_MSG_MOD(extack, + "Component table conflict occurred"); + break; + case IXGBE_ACI_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE: + NL_SET_ERR_MSG_MOD(extack, "Component pre-requisites not met"); + break; + case IXGBE_ACI_NVM_PASS_COMP_NOT_SUPPORTED_CODE: + NL_SET_ERR_MSG_MOD(extack, "Component not supported"); + break; + case IXGBE_ACI_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE: + NL_SET_ERR_MSG_MOD(extack, "Component cannot be downgraded"); + break; + case IXGBE_ACI_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE: + NL_SET_ERR_MSG_MOD(extack, "Incomplete component image"); + break; + case IXGBE_ACI_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE: + NL_SET_ERR_MSG_MOD(extack, + "Component version is identical to running image"); + break; + case IXGBE_ACI_NVM_PASS_COMP_VER_STR_LOWER_CODE: + NL_SET_ERR_MSG_MOD(extack, + "Component version is lower than the running image"); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Received unexpected response code from firmware"); + break; + } + + return -ECANCELED; +} + +/** + * ixgbe_send_component_table - Send PLDM component table to firmware + * @context: PLDM fw update structure + * @component: the component to process + * @transfer_flag: relative transfer order of this component + * + * Read relevant data from the component and forward it to the device + * firmware. Check the response to determine if the firmware indicates that + * the update can proceed. + * + * This function sends ACI commands related to the NVM, and assumes that + * the NVM resource has been acquired. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_send_component_table(struct pldmfw *context, + struct pldmfw_component *component, + u8 transfer_flag) +{ + struct ixgbe_fwu_priv *priv = container_of(context, + struct ixgbe_fwu_priv, + context); + struct ixgbe_adapter *adapter = priv->adapter; + struct netlink_ext_ack *extack = priv->extack; + struct ixgbe_aci_cmd_nvm_comp_tbl *comp_tbl; + u8 comp_response, comp_response_code; + struct ixgbe_hw *hw = &adapter->hw; + size_t length; + int err; + + switch (component->identifier) { + case NVM_COMP_ID_OROM: + case NVM_COMP_ID_NVM: + case NVM_COMP_ID_NETLIST: + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Unable to update due to unknown firmware component"); + return -EOPNOTSUPP; + } + + length = struct_size(comp_tbl, cvs, component->version_len); + comp_tbl = kzalloc(length, GFP_KERNEL); + if (!comp_tbl) + return -ENOMEM; + + comp_tbl->comp_class = cpu_to_le16(component->classification); + comp_tbl->comp_id = cpu_to_le16(component->identifier); + comp_tbl->comp_class_idx = FWU_COMP_CLASS_IDX_NOT_USE; + comp_tbl->comp_cmp_stamp = cpu_to_le32(component->comparison_stamp); + comp_tbl->cvs_type = component->version_type; + comp_tbl->cvs_len = component->version_len; + + memcpy(comp_tbl->cvs, component->version_string, + component->version_len); + + err = ixgbe_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length, + transfer_flag, &comp_response, + &comp_response_code); + + kfree(comp_tbl); + + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to transfer component table to firmware"); + return -EIO; + } + + return ixgbe_check_component_response(adapter, + comp_response, + comp_response_code, extack); +} + +/** + * ixgbe_write_one_nvm_block - Write an NVM block and await completion response + * @adapter: the PF data structure + * @module: the module to write to + * @offset: offset in bytes + * @block_size: size of the block to write, up to 4k + * @block: pointer to block of data to write + * @last_cmd: whether this is the last command + * @extack: netlink extended ACK structure + * + * Write a block of data to a flash module, and await for the completion + * response message from firmware. + * + * Note this function assumes the caller has acquired the NVM resource. + * + * On successful return, reset level indicates the device reset required to + * complete the update. + * + * 0 - IXGBE_ACI_NVM_POR_FLAG - A full power on is required + * 1 - IXGBE_ACI_NVM_PERST_FLAG - A cold PCIe reset is required + * 2 - IXGBE_ACI_NVM_EMPR_FLAG - An EMP reset is required + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_write_one_nvm_block(struct ixgbe_adapter *adapter, + u16 module, u32 offset, + u16 block_size, u8 *block, bool last_cmd, + struct netlink_ext_ack *extack) +{ + struct ixgbe_hw *hw = &adapter->hw; + + return ixgbe_aci_update_nvm(hw, module, offset, block_size, block, + last_cmd, 0); +} + +/** + * ixgbe_write_nvm_module - Write data to an NVM module + * @adapter: the PF driver structure + * @module: the module id to program + * @component: the name of the component being updated + * @image: buffer of image data to write to the NVM + * @length: length of the buffer + * @extack: netlink extended ACK structure + * + * Loop over the data for a given NVM module and program it in 4 Kb + * blocks. Notify devlink core of progress after each block is programmed. + * Loops over a block of data and programs the NVM in 4k block chunks. + * + * Note this function assumes the caller has acquired the NVM resource. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_write_nvm_module(struct ixgbe_adapter *adapter, u16 module, + const char *component, const u8 *image, + u32 length, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = adapter->devlink; + u32 offset = 0; + bool last_cmd; + u8 *block; + int err; + + devlink_flash_update_status_notify(devlink, "Flashing", + component, 0, length); + + block = kzalloc(IXGBE_ACI_MAX_BUFFER_SIZE, GFP_KERNEL); + if (!block) + return -ENOMEM; + + do { + u32 block_size; + + block_size = min_t(u32, IXGBE_ACI_MAX_BUFFER_SIZE, + length - offset); + last_cmd = !(offset + block_size < length); + + memcpy(block, image + offset, block_size); + + err = ixgbe_write_one_nvm_block(adapter, module, offset, + block_size, block, last_cmd, + extack); + if (err) + break; + + offset += block_size; + + devlink_flash_update_status_notify(devlink, "Flashing", + component, offset, length); + } while (!last_cmd); + + if (err) + devlink_flash_update_status_notify(devlink, "Flashing failed", + component, length, length); + else + devlink_flash_update_status_notify(devlink, "Flashing done", + component, length, length); + + kfree(block); + + return err; +} + +/* Length in seconds to wait before timing out when erasing a flash module. + * Yes, erasing really can take minutes to complete. + */ +#define IXGBE_FW_ERASE_TIMEOUT 300 + +/** + * ixgbe_erase_nvm_module - Erase an NVM module and await firmware completion + * @adapter: the PF data structure + * @module: the module to erase + * @component: name of the component being updated + * @extack: netlink extended ACK structure + * + * Erase the inactive NVM bank associated with this module, and await for + * a completion response message from firmware. + * + * Note this function assumes the caller has acquired the NVM resource. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_erase_nvm_module(struct ixgbe_adapter *adapter, u16 module, + const char *component, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = adapter->devlink; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + devlink_flash_update_timeout_notify(devlink, "Erasing", component, + IXGBE_FW_ERASE_TIMEOUT); + + err = ixgbe_aci_erase_nvm(hw, module); + if (err) + devlink_flash_update_status_notify(devlink, "Erasing failed", + component, 0, 0); + else + devlink_flash_update_status_notify(devlink, "Erasing done", + component, 0, 0); + + return err; +} + +/** + * ixgbe_switch_flash_banks - Tell firmware to switch NVM banks + * @adapter: Pointer to the PF data structure + * @activate_flags: flags used for the activation command + * @emp_reset_available: on return, indicates if EMP reset is available + * @extack: netlink extended ACK structure + * + * Notify firmware to activate the newly written flash banks, and wait for the + * firmware response. + * + * Return: 0 on success or an error code on failure. + */ +static int ixgbe_switch_flash_banks(struct ixgbe_adapter *adapter, + u8 activate_flags, + bool *emp_reset_available, + struct netlink_ext_ack *extack) +{ + struct ixgbe_hw *hw = &adapter->hw; + u8 response_flags; + int err; + + err = ixgbe_nvm_write_activate(hw, activate_flags, &response_flags); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to switch active flash banks"); + return err; + } + + if (emp_reset_available) { + if (hw->dev_caps.common_cap.reset_restrict_support) + *emp_reset_available = + response_flags & IXGBE_ACI_NVM_EMPR_ENA; + else + *emp_reset_available = true; + } + + return 0; +} + +/** + * ixgbe_flash_component - Flash a component of the NVM + * @context: PLDM fw update structure + * @component: the component table to program + * + * Program the flash contents for a given component. First, determine the + * module id. Then, erase the secondary bank for this module. Finally, write + * the contents of the component to the NVM. + * + * Note this function assumes the caller has acquired the NVM resource. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_flash_component(struct pldmfw *context, + struct pldmfw_component *component) +{ + struct ixgbe_fwu_priv *priv = container_of(context, + struct ixgbe_fwu_priv, + context); + struct netlink_ext_ack *extack = priv->extack; + struct ixgbe_adapter *adapter = priv->adapter; + const char *name; + u16 module; + int err; + u8 flag; + + switch (component->identifier) { + case NVM_COMP_ID_OROM: + module = IXGBE_E610_SR_1ST_OROM_BANK_PTR; + flag = IXGBE_ACI_NVM_ACTIV_SEL_OROM; + name = "fw.undi"; + break; + case NVM_COMP_ID_NVM: + module = IXGBE_E610_SR_1ST_NVM_BANK_PTR; + flag = IXGBE_ACI_NVM_ACTIV_SEL_NVM; + name = "fw.mgmt"; + break; + case NVM_COMP_ID_NETLIST: + module = IXGBE_E610_SR_NETLIST_BANK_PTR; + flag = IXGBE_ACI_NVM_ACTIV_SEL_NETLIST; + name = "fw.netlist"; + break; + + default: + return -EOPNOTSUPP; + } + + /* Mark this component for activating at the end. */ + priv->activate_flags |= flag; + + err = ixgbe_erase_nvm_module(adapter, module, name, extack); + if (err) + return err; + + return ixgbe_write_nvm_module(adapter, module, name, + component->component_data, + component->component_size, extack); +} + +/** + * ixgbe_finalize_update - Perform last steps to complete device update + * @context: PLDM fw update structure + * + * Called as the last step of the update process. Complete the update by + * telling the firmware to switch active banks, and perform a reset of + * configured. + * + * Return: 0 on success, or an error code on failure. + */ +static int ixgbe_finalize_update(struct pldmfw *context) +{ + struct ixgbe_fwu_priv *priv = container_of(context, + struct ixgbe_fwu_priv, + context); + struct ixgbe_adapter *adapter = priv->adapter; + struct netlink_ext_ack *extack = priv->extack; + struct devlink *devlink = adapter->devlink; + int err; + + /* Finally, notify firmware to activate the written NVM banks */ + err = ixgbe_switch_flash_banks(adapter, priv->activate_flags, + &priv->emp_reset_available, extack); + if (err) + return err; + + adapter->fw_emp_reset_disabled = !priv->emp_reset_available; + + if (!adapter->fw_emp_reset_disabled) + devlink_flash_update_status_notify(devlink, + "Suggested is to activate new firmware by devlink reload, if it doesn't work then a power cycle is required", + NULL, 0, 0); + + return 0; +} + +static const struct pldmfw_ops ixgbe_fwu_ops_e610 = { + .match_record = &pldmfw_op_pci_match_record, + .send_package_data = &ixgbe_send_package_data, + .send_component_table = &ixgbe_send_component_table, + .flash_component = &ixgbe_flash_component, + .finalize_update = &ixgbe_finalize_update, +}; + +/** + * ixgbe_get_pending_updates - Check if the component has a pending update + * @adapter: the PF driver structure + * @pending: on return, bitmap of updates pending + * @extack: Netlink extended ACK + * + * Check if the device has any pending updates on any flash components. + * + * Return: 0 on success, or a negative error code on failure. Update + * pending with the bitmap of pending updates. + */ +int ixgbe_get_pending_updates(struct ixgbe_adapter *adapter, u8 *pending, + struct netlink_ext_ack *extack) +{ + struct ixgbe_hw_dev_caps *dev_caps; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + dev_caps = kzalloc(sizeof(*dev_caps), GFP_KERNEL); + if (!dev_caps) + return -ENOMEM; + + err = ixgbe_discover_dev_caps(hw, dev_caps); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to read device capabilities"); + kfree(dev_caps); + return -EIO; + } + + *pending = 0; + + if (dev_caps->common_cap.nvm_update_pending_nvm) + *pending |= IXGBE_ACI_NVM_ACTIV_SEL_NVM; + + if (dev_caps->common_cap.nvm_update_pending_orom) + *pending |= IXGBE_ACI_NVM_ACTIV_SEL_OROM; + + if (dev_caps->common_cap.nvm_update_pending_netlist) + *pending |= IXGBE_ACI_NVM_ACTIV_SEL_NETLIST; + + kfree(dev_caps); + + return 0; +} + +/** + * ixgbe_cancel_pending_update - Cancel any pending update for a component + * @adapter: the PF driver structure + * @component: if not NULL, the name of the component being updated + * @extack: Netlink extended ACK structure + * + * Cancel any pending update for the specified component. If component is + * NULL, all device updates will be canceled. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int ixgbe_cancel_pending_update(struct ixgbe_adapter *adapter, + const char *component, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = adapter->devlink; + struct ixgbe_hw *hw = &adapter->hw; + u8 pending; + int err; + + err = ixgbe_get_pending_updates(adapter, &pending, extack); + if (err) + return err; + + /* If the flash_update request is for a specific component, ignore all + * of the other components. + */ + if (component) { + if (strcmp(component, "fw.mgmt") == 0) + pending &= IXGBE_ACI_NVM_ACTIV_SEL_NVM; + else if (strcmp(component, "fw.undi") == 0) + pending &= IXGBE_ACI_NVM_ACTIV_SEL_OROM; + else if (strcmp(component, "fw.netlist") == 0) + pending &= IXGBE_ACI_NVM_ACTIV_SEL_NETLIST; + else + return -EINVAL; + } + + /* There is no previous pending update, so this request may continue */ + if (!pending) + return 0; + + /* In order to allow overwriting a previous pending update, notify + * firmware to cancel that update by issuing the appropriate command. + */ + devlink_flash_update_status_notify(devlink, + "Canceling previous pending update", + component, 0, 0); + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to acquire device flash lock"); + return -EIO; + } + + pending |= IXGBE_ACI_NVM_REVERT_LAST_ACTIV; + err = ixgbe_switch_flash_banks(adapter, pending, NULL, extack); + + ixgbe_release_nvm(hw); + + return err; +} + +/** + * ixgbe_flash_pldm_image - Write a PLDM-formatted firmware image to the device + * @devlink: pointer to devlink associated with the device to update + * @params: devlink flash update parameters + * @extack: netlink extended ACK structure + * + * Parse the data for a given firmware file, verifying that it is a valid PLDM + * formatted image that matches this device. + * + * Extract the device record Package Data and Component Tables and send them + * to the firmware. Extract and write the flash data for each of the three + * main flash components, "fw.mgmt", "fw.undi", and "fw.netlist". Notify + * firmware once the data is written to the inactive banks. + * + * Return: 0 on success or a negative error code on failure. + */ +int ixgbe_flash_pldm_image(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct ixgbe_adapter *adapter = devlink_priv(devlink); + struct device *dev = &adapter->pdev->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fwu_priv priv; + u8 preservation; + int err; + + if (hw->mac.type != ixgbe_mac_e610) + return -EOPNOTSUPP; + + switch (params->overwrite_mask) { + case 0: + /* preserve all settings and identifiers */ + preservation = IXGBE_ACI_NVM_PRESERVE_ALL; + break; + case DEVLINK_FLASH_OVERWRITE_SETTINGS: + /* Overwrite settings, but preserve vital information such as + * device identifiers. + */ + preservation = IXGBE_ACI_NVM_PRESERVE_SELECTED; + break; + case (DEVLINK_FLASH_OVERWRITE_SETTINGS | + DEVLINK_FLASH_OVERWRITE_IDENTIFIERS): + /* overwrite both settings and identifiers, preserve nothing */ + preservation = IXGBE_ACI_NVM_NO_PRESERVATION; + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Requested overwrite mask is not supported"); + return -EOPNOTSUPP; + } + + /* Cannot get caps in recovery mode, so lack of nvm_unified_update bit + * cannot lead to error + */ + if (!hw->dev_caps.common_cap.nvm_unified_update && + (hw->mac.ops.fw_recovery_mode && + !hw->mac.ops.fw_recovery_mode(hw))) { + NL_SET_ERR_MSG_MOD(extack, + "Current firmware does not support unified update"); + return -EOPNOTSUPP; + } + + memset(&priv, 0, sizeof(priv)); + + priv.context.ops = &ixgbe_fwu_ops_e610; + priv.context.dev = dev; + priv.extack = extack; + priv.adapter = adapter; + priv.activate_flags = preservation; + + devlink_flash_update_status_notify(devlink, + "Preparing to flash", NULL, 0, 0); + + err = ixgbe_cancel_pending_update(adapter, NULL, extack); + if (err) + return err; + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to acquire device flash lock"); + return -EIO; + } + + err = pldmfw_flash_image(&priv.context, params->fw); + if (err == -ENOENT) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware image has no record matching this device"); + } else if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to flash PLDM image"); + } + + ixgbe_release_nvm(hw); + + return err; +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h new file mode 100644 index 000000000000..abdd708c93df --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation. */ + +#ifndef _IXGBE_FW_UPDATE_H_ +#define _IXGBE_FW_UPDATE_H_ + +int ixgbe_flash_pldm_image(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack); +int ixgbe_get_pending_updates(struct ixgbe_adapter *adapter, u8 *pending, + struct netlink_ext_ack *extack); +#endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 07ea1954a276..648a7c618cd1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -478,7 +478,7 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) { struct net_device *dev = xs->xso.real_dev; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; u32 mfval, manc, reg; int num_filters = 4; @@ -563,7 +563,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, struct netlink_ext_ack *extack) { struct net_device *dev = xs->xso.real_dev; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; int checked, match, first; @@ -757,7 +757,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, static void ixgbe_ipsec_del_sa(struct xfrm_state *xs) { struct net_device *dev = xs->xso.real_dev; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; u32 zerobuf[4] = {0, 0, 0, 0}; @@ -1052,7 +1052,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, struct ixgbe_ipsec_tx_data *itd) { - struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(tx_ring->netdev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct xfrm_state *xs; struct sec_path *sp; @@ -1142,7 +1142,7 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(rx_ring->netdev); __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; __le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH | IXGBE_RXDADV_PKTTYPE_IPSEC_ESP); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a2718218963e..03d31e5b131d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -49,6 +49,7 @@ #include "ixgbe_sriov.h" #include "ixgbe_model.h" #include "ixgbe_txrx_common.h" +#include "devlink/devlink.h" char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -1095,7 +1096,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) static int ixgbe_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 bcnrc_val = ixgbe_link_mbps(adapter); @@ -4678,7 +4679,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; /* add VID to filter table */ @@ -4737,7 +4738,7 @@ void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid) static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; /* remove VID from filter table */ @@ -4962,7 +4963,7 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) **/ static int ixgbe_write_mc_addr_list(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (!netif_running(netdev)) @@ -5138,7 +5139,7 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int ret; ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0)); @@ -5148,7 +5149,7 @@ static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0)); @@ -5166,7 +5167,7 @@ static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) **/ void ixgbe_set_rx_mode(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; netdev_features_t features = netdev->features; @@ -5268,7 +5269,7 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) static int ixgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; struct udp_tunnel_info ti; @@ -6600,7 +6601,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) **/ static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); /* Do the reset outside of interrupt context */ ixgbe_tx_timeout_reset(adapter); @@ -6849,7 +6850,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK; /* initialize eeprom parameters */ - if (ixgbe_init_eeprom_params_generic(hw)) { + if (hw->eeprom.ops.init_params(hw)) { e_dev_err("EEPROM initialization failed\n"); return -EIO; } @@ -7165,7 +7166,7 @@ static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) **/ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (ixgbe_enabled_xdp_adapter(adapter)) { int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; @@ -7212,7 +7213,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) **/ int ixgbe_open(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; int err, queues; @@ -7316,7 +7317,7 @@ static void ixgbe_close_suspend(struct ixgbe_adapter *adapter) **/ int ixgbe_close(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); ixgbe_ptp_stop(adapter); @@ -8364,6 +8365,34 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) rtnl_unlock(); } +static int ixgbe_check_fw_api_mismatch(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + if (hw->mac.type != ixgbe_mac_e610) + return 0; + + if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw)) + return 0; + + if (hw->api_maj_ver > IXGBE_FW_API_VER_MAJOR) { + e_dev_err("The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + + adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH; + return -EOPNOTSUPP; + } else if (hw->api_maj_ver == IXGBE_FW_API_VER_MAJOR && + hw->api_min_ver > IXGBE_FW_API_VER_MINOR + IXGBE_FW_API_VER_DIFF_ALLOWED) { + e_dev_info("The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); + adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH; + } else if (hw->api_maj_ver < IXGBE_FW_API_VER_MAJOR || + hw->api_min_ver < IXGBE_FW_API_VER_MINOR - IXGBE_FW_API_VER_DIFF_ALLOWED) { + e_dev_info("The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); + adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH; + } + + return 0; +} + /** * ixgbe_check_fw_error - Check firmware for errors * @adapter: the adapter private structure @@ -8374,12 +8403,14 @@ static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 fwsm; + int err; /* read fwsm.ext_err_ind register and log errors */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw)); + /* skip if E610's FW is reloading, warning in that case may be misleading */ if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK || - !(fwsm & IXGBE_FWSM_FW_VAL_BIT)) + (!(fwsm & IXGBE_FWSM_FW_VAL_BIT) && !(hw->mac.type == ixgbe_mac_e610))) e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n", fwsm); @@ -8387,10 +8418,53 @@ static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter) e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); return true; } + if (!(adapter->flags2 & IXGBE_FLAG2_API_MISMATCH)) { + err = ixgbe_check_fw_api_mismatch(adapter); + if (err) + return true; + } + + /* return here if FW rollback mode has been already detected */ + if (adapter->flags2 & IXGBE_FLAG2_FW_ROLLBACK) + return false; + + if (hw->mac.ops.fw_rollback_mode && hw->mac.ops.fw_rollback_mode(hw)) { + struct ixgbe_nvm_info *nvm_info = &adapter->hw.flash.nvm; + char ver_buff[64] = ""; + + if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw)) + goto no_version; + + if (hw->mac.ops.get_nvm_ver && + hw->mac.ops.get_nvm_ver(hw, nvm_info)) + goto no_version; + + snprintf(ver_buff, sizeof(ver_buff), + "Current version is NVM:%x.%x.%x, FW:%d.%d. ", + nvm_info->major, nvm_info->minor, nvm_info->eetrack, + hw->fw_maj_ver, hw->fw_maj_ver); +no_version: + e_dev_warn("Firmware rollback mode detected. %sDevice may exhibit limited functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware rollback mode.", + ver_buff); + + adapter->flags2 |= IXGBE_FLAG2_FW_ROLLBACK; + } return false; } +static void ixgbe_recovery_service_task(struct work_struct *work) +{ + struct ixgbe_adapter *adapter = container_of(work, + struct ixgbe_adapter, + service_task); + + ixgbe_handle_fw_event(adapter); + ixgbe_service_event_complete(adapter); + + mod_timer(&adapter->service_timer, jiffies + msecs_to_jiffies(100)); +} + /** * ixgbe_service_task - manages and runs subtasks * @work: pointer to work_struct containing our data @@ -8410,8 +8484,13 @@ static void ixgbe_service_task(struct work_struct *work) return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) + if (!test_bit(__IXGBE_DOWN, &adapter->state)) { + if (adapter->mii_bus) { + mdiobus_unregister(adapter->mii_bus); + adapter->mii_bus = NULL; + } unregister_netdev(adapter->netdev); + } ixgbe_service_event_complete(adapter); return; } @@ -9001,7 +9080,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, switch (vlan_get_protocol(skb)) { case htons(ETH_P_FCOE): case htons(ETH_P_FIP): - adapter = netdev_priv(dev); + adapter = ixgbe_from_netdev(dev); if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) break; @@ -9260,7 +9339,7 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev, struct ixgbe_ring *ring) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_ring *tx_ring; /* @@ -9292,7 +9371,7 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, **/ static int ixgbe_set_mac(struct net_device *netdev, void *p) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; @@ -9310,7 +9389,7 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) static int ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; u16 value; int rc; @@ -9336,7 +9415,7 @@ ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr) static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad, u16 addr, u16 value) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (adapter->mii_bus) { @@ -9356,7 +9435,7 @@ static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad, static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); switch (cmd) { case SIOCSHWTSTAMP: @@ -9382,7 +9461,7 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) static int ixgbe_add_sanmac_netdev(struct net_device *dev) { int err = 0; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; if (is_valid_ether_addr(hw->mac.san_addr)) { @@ -9406,7 +9485,7 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev) static int ixgbe_del_sanmac_netdev(struct net_device *dev) { int err = 0; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_mac_info *mac = &adapter->hw.mac; if (is_valid_ether_addr(mac->san_addr)) { @@ -9437,7 +9516,7 @@ static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats, static void ixgbe_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int i; rcu_read_lock(); @@ -9480,7 +9559,7 @@ static void ixgbe_get_stats64(struct net_device *netdev, static int ixgbe_ndo_get_vf_stats(struct net_device *netdev, int vf, struct ifla_vf_stats *vf_stats) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (vf < 0 || vf >= adapter->num_vfs) return -EINVAL; @@ -9597,7 +9676,7 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, static void ixgbe_defrag_macvlan_pools(struct net_device *dev) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct netdev_nested_priv priv = { .data = (void *)adapter, }; @@ -9618,7 +9697,7 @@ static void ixgbe_defrag_macvlan_pools(struct net_device *dev) */ int ixgbe_setup_tc(struct net_device *dev, u8 tc) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_hw *hw = &adapter->hw; /* Hardware supports up to 8 traffic classes */ @@ -10176,7 +10255,7 @@ static LIST_HEAD(ixgbe_block_cb_list); static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); switch (type) { case TC_SETUP_BLOCK: @@ -10204,7 +10283,7 @@ void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) #endif void ixgbe_do_reset(struct net_device *netdev) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (netif_running(netdev)) ixgbe_reinit_locked(adapter); @@ -10215,7 +10294,7 @@ void ixgbe_do_reset(struct net_device *netdev) static netdev_features_t ixgbe_fix_features(struct net_device *netdev, netdev_features_t features) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); /* If Rx checksum is disabled, then RSC/LRO should also be disabled */ if (!(features & NETIF_F_RXCSUM)) @@ -10252,7 +10331,7 @@ static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter) static int ixgbe_set_features(struct net_device *netdev, netdev_features_t features) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); netdev_features_t changed = netdev->features ^ features; bool need_reset = false; @@ -10328,7 +10407,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); u16 pool = VMDQ_P(0); if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool)) @@ -10416,7 +10495,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, struct netlink_ext_ack *extack) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct nlattr *attr, *br_spec; int rem; @@ -10444,7 +10523,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return 0; @@ -10456,7 +10535,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) { - struct ixgbe_adapter *adapter = netdev_priv(pdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(pdev); struct ixgbe_fwd_adapter *accel; int tcs = adapter->hw_tcs ? : 1; int pool, err; @@ -10553,7 +10632,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) static void ixgbe_fwd_del(struct net_device *pdev, void *priv) { struct ixgbe_fwd_adapter *accel = priv; - struct ixgbe_adapter *adapter = netdev_priv(pdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(pdev); unsigned int rxbase = accel->rx_base_queue; unsigned int i; @@ -10631,7 +10710,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) { int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct bpf_prog *old_prog; bool need_reset; int num_queues; @@ -10703,7 +10782,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); switch (xdp->command) { case XDP_SETUP_PROG: @@ -10738,7 +10817,7 @@ void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring) static int ixgbe_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_ring *ring; int nxmit = 0; int i; @@ -11146,7 +11225,7 @@ bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, * format to display. The FW version is taken from the EEPROM/NVM. * */ -static void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter) +void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter) { struct ixgbe_orom_info *orom = &adapter->hw.flash.orom; struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm; @@ -11197,6 +11276,66 @@ static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter) } /** + * ixgbe_recovery_probe - Handle FW recovery mode during probe + * @adapter: the adapter private structure + * + * Perform limited driver initialization when FW error is detected. + * + * Return: 0 on successful probe for E610, -EIO if recovery mode is detected + * for non-E610 adapter, error status code on any other case. + */ +static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct ixgbe_hw *hw = &adapter->hw; + bool disable_dev; + int err = -EIO; + + if (hw->mac.type != ixgbe_mac_e610) + goto clean_up_probe; + + ixgbe_get_hw_control(adapter); + mutex_init(&hw->aci.lock); + err = ixgbe_get_flash_data(&adapter->hw); + if (err) + goto shutdown_aci; + + timer_setup(&adapter->service_timer, ixgbe_service_timer, 0); + INIT_WORK(&adapter->service_task, ixgbe_recovery_service_task); + set_bit(__IXGBE_SERVICE_INITED, &adapter->state); + clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); + + if (hw->mac.ops.get_bus_info) + hw->mac.ops.get_bus_info(hw); + + pci_set_drvdata(pdev, adapter); + /* We are creating devlink interface so NIC can be managed, + * e.g. new NVM image loaded + */ + devl_lock(adapter->devlink); + ixgbe_devlink_register_port(adapter); + SET_NETDEV_DEVLINK_PORT(adapter->netdev, + &adapter->devlink_port); + ixgbe_devlink_init_regions(adapter); + devl_register(adapter->devlink); + devl_unlock(adapter->devlink); + + return 0; +shutdown_aci: + mutex_destroy(&adapter->hw.aci.lock); + ixgbe_release_hw_control(adapter); + devlink_free(adapter->devlink); +clean_up_probe: + disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); + free_netdev(netdev); + pci_release_mem_regions(pdev); + if (disable_dev) + pci_disable_device(pdev); + return err; +} + +/** * ixgbe_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in ixgbe_pci_tbl @@ -11210,6 +11349,7 @@ static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter) static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; + struct ixgbe_netdevice_priv *netdev_priv_wrapper; struct ixgbe_adapter *adapter = NULL; struct ixgbe_hw *hw; const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; @@ -11263,7 +11403,13 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) indices = IXGBE_MAX_RSS_INDICES_X550; } - netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); + adapter = ixgbe_allocate_devlink(&pdev->dev); + if (IS_ERR(adapter)) { + err = PTR_ERR(adapter); + goto err_devlink; + } + + netdev = alloc_etherdev_mq(sizeof(*netdev_priv_wrapper), indices); if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; @@ -11271,7 +11417,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(netdev, &pdev->dev); - adapter = netdev_priv(netdev); + netdev_priv_wrapper = netdev_priv(netdev); + netdev_priv_wrapper->adapter = adapter; adapter->netdev = netdev; adapter->pdev = pdev; @@ -11287,11 +11434,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_ioremap; } - netdev->netdev_ops = &ixgbe_netdev_ops; - ixgbe_set_ethtool_ops(netdev); - netdev->watchdog_timeo = 5 * HZ; - strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); - /* Setup hw api */ hw->mac.ops = *ii->mac_ops; hw->mac.type = ii->mac; @@ -11321,15 +11463,31 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->phy.mdio.mdio_read = ixgbe_mdio_read; hw->phy.mdio.mdio_write = ixgbe_mdio_write; + netdev->netdev_ops = &ixgbe_netdev_ops; + ixgbe_set_ethtool_ops(netdev); + netdev->watchdog_timeo = 5 * HZ; + strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); + /* setup the private structure */ err = ixgbe_sw_init(adapter, ii); if (err) goto err_sw_init; + /* Make sure the SWFW semaphore is in a valid state */ + if (hw->mac.ops.init_swfw_sync) + hw->mac.ops.init_swfw_sync(hw); + + if (ixgbe_check_fw_error(adapter)) + return ixgbe_recovery_probe(adapter); + if (adapter->hw.mac.type == ixgbe_mac_e610) { err = ixgbe_get_caps(&adapter->hw); if (err) dev_err(&pdev->dev, "ixgbe_get_caps failed %d\n", err); + + err = ixgbe_get_flash_data(&adapter->hw); + if (err) + goto err_sw_init; } if (adapter->hw.mac.type == ixgbe_mac_82599EB) @@ -11348,10 +11506,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) break; } - /* Make sure the SWFW semaphore is in a valid state */ - if (hw->mac.ops.init_swfw_sync) - hw->mac.ops.init_swfw_sync(hw); - /* Make it possible the adapter to be woken up via WOL */ switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: @@ -11504,11 +11658,6 @@ skip_sriov: if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) netdev->features |= NETIF_F_LRO; - if (ixgbe_check_fw_error(adapter)) { - err = -EIO; - goto err_sw_init; - } - /* make sure the EEPROM is good */ if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) { e_dev_err("The EEPROM Checksum Is Not Valid\n"); @@ -11591,7 +11740,7 @@ skip_sriov: if (expected_gts > 0) ixgbe_check_minimum_link(adapter, expected_gts); - err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str)); + err = hw->eeprom.ops.read_pba_string(hw, part_str, sizeof(part_str)); if (err) strscpy(part_str, "Unknown", sizeof(part_str)); if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) @@ -11617,6 +11766,11 @@ skip_sriov: } strcpy(netdev->name, "eth%d"); pci_set_drvdata(pdev, adapter); + + devl_lock(adapter->devlink); + ixgbe_devlink_register_port(adapter); + SET_NETDEV_DEVLINK_PORT(adapter->netdev, &adapter->devlink_port); + err = register_netdev(netdev); if (err) goto err_register; @@ -11671,11 +11825,16 @@ skip_sriov: if (err) goto err_netdev; + ixgbe_devlink_init_regions(adapter); + devl_register(adapter->devlink); + devl_unlock(adapter->devlink); return 0; err_netdev: unregister_netdev(netdev); err_register: + devl_port_unregister(&adapter->devlink_port); + devl_unlock(adapter->devlink); ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); if (hw->mac.type == ixgbe_mac_e610) @@ -11692,7 +11851,9 @@ err_ioremap: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); err_alloc_etherdev: + devlink_free(adapter->devlink); pci_release_mem_regions(pdev); +err_devlink: err_pci_reg: err_dma: if (!adapter || disable_dev) @@ -11721,6 +11882,9 @@ static void ixgbe_remove(struct pci_dev *pdev) return; netdev = adapter->netdev; + devl_lock(adapter->devlink); + devl_unregister(adapter->devlink); + ixgbe_devlink_destroy_regions(adapter); ixgbe_dbg_adapter_exit(adapter); set_bit(__IXGBE_REMOVING, &adapter->state); @@ -11756,6 +11920,10 @@ static void ixgbe_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + devl_port_unregister(&adapter->devlink_port); + devl_unlock(adapter->devlink); + devlink_free(adapter->devlink); + ixgbe_stop_ipsec_offload(adapter); ixgbe_clear_interrupt_scheme(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 0a03a8bb5f88..2d54828bdfbb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -167,7 +167,7 @@ int ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; - int max_retry = 1; + int max_retry = 3; int retry = 0; u8 reg_high; u8 csum; @@ -2285,7 +2285,7 @@ static int ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, u8 data, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; - u32 max_retry = 1; + u32 max_retry = 3; u32 retry = 0; int status; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index ccdce80edd14..0dbbd2befd4d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1418,7 +1418,7 @@ void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter) int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int retval; if (vf >= adapter->num_vfs) @@ -1526,7 +1526,7 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { int err = 0; - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; @@ -1644,7 +1644,7 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int link_speed; /* verify VF is active */ @@ -1679,7 +1679,7 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; if (vf >= adapter->num_vfs) @@ -1757,7 +1757,7 @@ void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state) **/ int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int ret = 0; if (vf < 0 || vf >= adapter->num_vfs) { @@ -1794,7 +1794,7 @@ int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state) int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, bool setting) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); /* This operation is currently supported only for 82599 and x540 * devices. @@ -1813,7 +1813,7 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (vf >= adapter->num_vfs) return -EINVAL; @@ -1836,7 +1836,7 @@ int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi) { - struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); if (vf >= adapter->num_vfs) return -EINVAL; ivi->vf = vf; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 5fdf32d79d82..892fa6c1f879 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3446,6 +3446,8 @@ struct ixgbe_eeprom_operations { int (*validate_checksum)(struct ixgbe_hw *, u16 *); int (*update_checksum)(struct ixgbe_hw *); int (*calc_checksum)(struct ixgbe_hw *); + int (*read_pba_string)(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size); }; struct ixgbe_mac_operations { @@ -3454,6 +3456,7 @@ struct ixgbe_mac_operations { int (*start_hw)(struct ixgbe_hw *); int (*clear_hw_cntrs)(struct ixgbe_hw *); enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *); + int (*get_fw_ver)(struct ixgbe_hw *hw); int (*get_mac_addr)(struct ixgbe_hw *, u8 *); int (*get_san_mac_addr)(struct ixgbe_hw *, u8 *); int (*get_device_caps)(struct ixgbe_hw *, u16 *); @@ -3522,6 +3525,8 @@ struct ixgbe_mac_operations { int (*get_thermal_sensor_data)(struct ixgbe_hw *); int (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); bool (*fw_recovery_mode)(struct ixgbe_hw *hw); + bool (*fw_rollback_mode)(struct ixgbe_hw *hw); + int (*get_nvm_ver)(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm); void (*disable_rx)(struct ixgbe_hw *hw); void (*enable_rx)(struct ixgbe_hw *hw); void (*set_source_address_pruning)(struct ixgbe_hw *, bool, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index 617e07878e4f..09df67f03cf4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -10,11 +10,75 @@ #define IXGBE_MAX_VSI 768 /* Checksum and Shadow RAM pointers */ -#define E610_SR_SW_CHECKSUM_WORD 0x3F +#define IXGBE_E610_SR_NVM_CTRL_WORD 0x00 +#define IXGBE_E610_SR_PBA_BLOCK_PTR 0x16 +#define IXGBE_E610_SR_PBA_BLOCK_MASK GENMASK(15, 8) +#define IXGBE_E610_SR_NVM_DEV_STARTER_VER 0x18 +#define IXGBE_E610_SR_NVM_EETRACK_LO 0x2D +#define IXGBE_E610_SR_NVM_EETRACK_HI 0x2E +#define IXGBE_E610_NVM_VER_LO_MASK GENMASK(7, 0) +#define IXGBE_E610_NVM_VER_HI_MASK GENMASK(15, 12) +#define IXGBE_E610_SR_SW_CHECKSUM_WORD 0x3F +#define IXGBE_E610_SR_PFA_PTR 0x40 +#define IXGBE_E610_SR_1ST_NVM_BANK_PTR 0x42 +#define IXGBE_E610_SR_NVM_BANK_SIZE 0x43 +#define IXGBE_E610_SR_1ST_OROM_BANK_PTR 0x44 +#define IXGBE_E610_SR_OROM_BANK_SIZE 0x45 +#define IXGBE_E610_SR_NETLIST_BANK_PTR 0x46 +#define IXGBE_E610_SR_NETLIST_BANK_SIZE 0x47 + +/* The OROM version topology */ +#define IXGBE_OROM_VER_PATCH_MASK GENMASK_ULL(7, 0) +#define IXGBE_OROM_VER_BUILD_MASK GENMASK_ULL(23, 8) +#define IXGBE_OROM_VER_MASK GENMASK_ULL(31, 24) + +/* CSS Header words */ +#define IXGBE_NVM_CSS_HDR_LEN_L 0x02 +#define IXGBE_NVM_CSS_HDR_LEN_H 0x03 +#define IXGBE_NVM_CSS_SREV_L 0x14 +#define IXGBE_NVM_CSS_SREV_H 0x15 + +#define IXGBE_HDR_LEN_ROUNDUP 32 + +/* Length of Authentication header section in words */ +#define IXGBE_NVM_AUTH_HEADER_LEN 0x08 /* Shadow RAM related */ #define IXGBE_SR_WORDS_IN_1KB 512 +/* The Netlist ID Block is located after all of the Link Topology nodes. */ +#define IXGBE_NETLIST_ID_BLK_SIZE 0x30 +#define IXGBE_NETLIST_ID_BLK_OFFSET(n) IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n)) + +/* netlist ID block field offsets (word offsets) */ +#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW 0x02 +#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH 0x03 +#define IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW 0x04 +#define IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH 0x05 +#define IXGBE_NETLIST_ID_BLK_TYPE_LOW 0x06 +#define IXGBE_NETLIST_ID_BLK_TYPE_HIGH 0x07 +#define IXGBE_NETLIST_ID_BLK_REV_LOW 0x08 +#define IXGBE_NETLIST_ID_BLK_REV_HIGH 0x09 +#define IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(n) (0x0A + (n)) +#define IXGBE_NETLIST_ID_BLK_CUST_VER 0x2F + +/* The Link Topology Netlist section is stored as a series of words. It is + * stored in the NVM as a TLV, with the first two words containing the type + * and length. + */ +#define IXGBE_NETLIST_LINK_TOPO_MOD_ID 0x011B +#define IXGBE_NETLIST_TYPE_OFFSET 0x0000 +#define IXGBE_NETLIST_LEN_OFFSET 0x0001 + +/* The Link Topology section follows the TLV header. When reading the netlist + * using ixgbe_read_netlist_module, we need to account for the 2-word TLV + * header. + */ +#define IXGBE_NETLIST_LINK_TOPO_OFFSET(n) ((n) + 2) +#define IXGBE_LINK_TOPO_MODULE_LEN IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0000) +#define IXGBE_LINK_TOPO_NODE_COUNT IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0001) +#define IXGBE_LINK_TOPO_NODE_COUNT_M GENMASK_ULL(9, 0) + /* Firmware Status Register (GL_FWSTS) */ #define GL_FWSTS 0x00083048 /* Reset Source: POR */ #define GL_FWSTS_EP_PF0 BIT(24) @@ -24,11 +88,23 @@ #define GLNVM_GENS 0x000B6100 /* Reset Source: POR */ #define GLNVM_GENS_SR_SIZE_M GENMASK(7, 5) +#define IXGBE_GL_MNG_FWSM 0x000B6134 /* Reset Source: POR */ +#define IXGBE_GL_MNG_FWSM_RECOVERY_M BIT(1) +#define IXGBE_GL_MNG_FWSM_ROLLBACK_M BIT(2) + /* Flash Access Register */ #define IXGBE_GLNVM_FLA 0x000B6108 /* Reset Source: POR */ #define IXGBE_GLNVM_FLA_LOCKED_S 6 #define IXGBE_GLNVM_FLA_LOCKED_M BIT(6) +/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */ +#define IXGBE_SR_CTRL_WORD_1_M GENMASK(7, 6) +#define IXGBE_SR_CTRL_WORD_VALID BIT(0) +#define IXGBE_SR_CTRL_WORD_OROM_BANK BIT(3) +#define IXGBE_SR_CTRL_WORD_NETLIST_BANK BIT(4) +#define IXGBE_SR_CTRL_WORD_NVM_BANK BIT(5) +#define IXGBE_SR_NVM_PTR_4KB_UNITS BIT(15) + /* Admin Command Interface (ACI) registers */ #define IXGBE_PF_HIDA(_i) (0x00085000 + ((_i) * 4)) #define IXGBE_PF_HIDA_2(_i) (0x00085020 + ((_i) * 4)) @@ -40,6 +116,10 @@ #define IXGBE_PF_HICR_SV BIT(2) #define IXGBE_PF_HICR_EV BIT(3) +#define IXGBE_FW_API_VER_MAJOR 0x01 +#define IXGBE_FW_API_VER_MINOR 0x07 +#define IXGBE_FW_API_VER_DIFF_ALLOWED 0x02 + #define IXGBE_ACI_DESC_SIZE 32 #define IXGBE_ACI_DESC_SIZE_IN_DWORDS (IXGBE_ACI_DESC_SIZE / BYTES_PER_DWORD) @@ -143,6 +223,7 @@ enum ixgbe_aci_opc { ixgbe_aci_opc_write_mdio = 0x06E5, ixgbe_aci_opc_set_gpio_by_func = 0x06E6, ixgbe_aci_opc_get_gpio_by_func = 0x06E7, + ixgbe_aci_opc_set_port_id_led = 0x06E9, ixgbe_aci_opc_set_gpio = 0x06EC, ixgbe_aci_opc_get_gpio = 0x06ED, ixgbe_aci_opc_sff_eeprom = 0x06EE, @@ -728,6 +809,18 @@ struct ixgbe_aci_cmd_get_link_topo_pin { u8 rsvd[7]; }; +/* Set Port Identification LED (direct, 0x06E9) */ +struct ixgbe_aci_cmd_set_port_id_led { + u8 lport_num; + u8 lport_num_valid; + u8 ident_mode; + u8 rsvd[13]; +}; + +#define IXGBE_ACI_PORT_ID_PORT_NUM_VALID BIT(0) +#define IXGBE_ACI_PORT_IDENT_LED_ORIG 0 +#define IXGBE_ACI_PORT_IDENT_LED_BLINK BIT(0) + /* Read/Write SFF EEPROM command (indirect 0x06EE) */ struct ixgbe_aci_cmd_sff_eeprom { u8 lport_num; @@ -761,6 +854,8 @@ struct ixgbe_aci_cmd_nvm { #define IXGBE_ACI_NVM_MAX_OFFSET 0xFFFFFF __le16 offset_low; u8 offset_high; /* For Write Activate offset_high is used as flags2 */ +#define IXGBE_ACI_NVM_OFFSET_HI_A_MASK GENMASK(15, 8) +#define IXGBE_ACI_NVM_OFFSET_HI_U_MASK GENMASK(23, 16) u8 cmd_flags; #define IXGBE_ACI_NVM_LAST_CMD BIT(0) #define IXGBE_ACI_NVM_PCIR_REQ BIT(0) /* Used by NVM Write reply */ @@ -776,6 +871,9 @@ struct ixgbe_aci_cmd_nvm { #define IXGBE_ACI_NVM_PERST_FLAG 1 #define IXGBE_ACI_NVM_EMPR_FLAG 2 #define IXGBE_ACI_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */ +#define IXGBE_ACI_NVM_NO_PRESERVATION 0x0 +#define IXGBE_ACI_NVM_PRESERVE_SELECTED 0x6 + /* For Write Activate, several flags are sent as part of a separate * flags2 field using a separate byte. For simplicity of the software * interface, we pass the flags as a 16 bit value so these flags are @@ -805,6 +903,63 @@ struct ixgbe_aci_cmd_nvm_checksum { u8 rsvd2[12]; }; +/* Used for NVM Set Package Data command - 0x070A */ +struct ixgbe_aci_cmd_nvm_pkg_data { + u8 reserved[3]; + u8 cmd_flags; +#define IXGBE_ACI_NVM_PKG_DELETE BIT(0) /* used for command call */ + + u32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* Used for Pass Component Table command - 0x070B */ +struct ixgbe_aci_cmd_nvm_pass_comp_tbl { + u8 component_response; /* Response only */ +#define IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED 0x0 +#define IXGBE_ACI_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE 0x1 +#define IXGBE_ACI_NVM_PASS_COMP_CAN_NOT_BE_UPDATED 0x2 +#define IXGBE_ACI_NVM_PASS_COMP_PARTIAL_CHECK 0x3 + u8 component_response_code; /* Response only */ +#define IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED_CODE 0x0 +#define IXGBE_ACI_NVM_PASS_COMP_STAMP_IDENTICAL_CODE 0x1 +#define IXGBE_ACI_NVM_PASS_COMP_STAMP_LOWER 0x2 +#define IXGBE_ACI_NVM_PASS_COMP_INVALID_STAMP_CODE 0x3 +#define IXGBE_ACI_NVM_PASS_COMP_CONFLICT_CODE 0x4 +#define IXGBE_ACI_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE 0x5 +#define IXGBE_ACI_NVM_PASS_COMP_NOT_SUPPORTED_CODE 0x6 +#define IXGBE_ACI_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE 0x7 +#define IXGBE_ACI_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE 0x8 +#define IXGBE_ACI_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE 0xA +#define IXGBE_ACI_NVM_PASS_COMP_VER_STR_LOWER_CODE 0xB + u8 reserved; + u8 transfer_flag; + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +struct ixgbe_aci_cmd_nvm_comp_tbl { + __le16 comp_class; +#define NVM_COMP_CLASS_ALL_FW 0x000A + + __le16 comp_id; +#define NVM_COMP_ID_OROM 0x5 +#define NVM_COMP_ID_NVM 0x6 +#define NVM_COMP_ID_NETLIST 0x8 + + u8 comp_class_idx; +#define FWU_COMP_CLASS_IDX_NOT_USE 0x0 + + __le32 comp_cmp_stamp; + u8 cvs_type; +#define NVM_CVS_TYPE_ASCII 0x1 + + u8 cvs_len; + u8 cvs[]; /* Component Version String */ +} __packed; + /** * struct ixgbe_aci_desc - Admin Command (AC) descriptor * @flags: IXGBE_ACI_FLAG_* flags @@ -843,11 +998,14 @@ struct ixgbe_aci_desc { struct ixgbe_aci_cmd_restart_an restart_an; struct ixgbe_aci_cmd_get_link_status get_link_status; struct ixgbe_aci_cmd_set_event_mask set_event_mask; + struct ixgbe_aci_cmd_set_port_id_led set_port_id_led; struct ixgbe_aci_cmd_get_link_topo get_link_topo; struct ixgbe_aci_cmd_get_link_topo_pin get_link_topo_pin; struct ixgbe_aci_cmd_sff_eeprom read_write_sff_param; struct ixgbe_aci_cmd_nvm nvm; struct ixgbe_aci_cmd_nvm_checksum nvm_checksum; + struct ixgbe_aci_cmd_nvm_pkg_data pkg_data; + struct ixgbe_aci_cmd_nvm_pass_comp_tbl pass_comp_tbl; } params; }; @@ -984,6 +1142,16 @@ struct ixgbe_hw_caps { #define IXGBE_EXT_TOPO_DEV_IMG_PROG_EN BIT(1) } __packed; +#define IXGBE_OROM_CIV_SIGNATURE "$CIV" + +struct ixgbe_orom_civd_info { + u8 signature[4]; /* Must match ASCII '$CIV' characters */ + u8 checksum; /* Simple modulo 256 sum of all structure bytes must equal 0 */ + __le32 combo_ver; /* Combo Image Version number */ + u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ + __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ +}; + /* Function specific capabilities */ struct ixgbe_hw_func_caps { u32 num_allocd_vfs; /* Number of allocated VFs */ @@ -1015,6 +1183,11 @@ struct ixgbe_aci_info { enum ixgbe_aci_err last_status; /* last status of sent admin command */ }; +enum ixgbe_bank_select { + IXGBE_ACTIVE_FLASH_BANK, + IXGBE_INACTIVE_FLASH_BANK, +}; + /* Option ROM version information */ struct ixgbe_orom_info { u8 major; /* Major version of OROM */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 1fc821fb351a..f1ab95aa8c83 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -894,6 +894,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_X540 = { .calc_checksum = &ixgbe_calc_eeprom_checksum_X540, .validate_checksum = &ixgbe_validate_eeprom_checksum_X540, .update_checksum = &ixgbe_update_eeprom_checksum_X540, + .read_pba_string = &ixgbe_read_pba_string_generic, }; static const struct ixgbe_phy_operations phy_ops_X540 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 277ceaf8a793..1d2acdb64f45 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3959,6 +3959,7 @@ static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = { .validate_checksum = &ixgbe_validate_eeprom_checksum_X550, \ .update_checksum = &ixgbe_update_eeprom_checksum_X550, \ .calc_checksum = &ixgbe_calc_eeprom_checksum_X550, \ + .read_pba_string = &ixgbe_read_pba_string_generic, \ static const struct ixgbe_eeprom_operations eeprom_ops_X550 = { X550_COMMON_EEP diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 3e3b471e53f0..ac58964b2f08 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -508,7 +508,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { - struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_ring *ring; if (test_bit(__IXGBE_DOWN, &adapter->state)) diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 837295fecd17..50f7c59e8a04 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -34,7 +34,6 @@ config MV643XX_ETH config MVMDIO tristate "Marvell MDIO interface support" depends on HAS_IOMEM - select MDIO_DEVRES select PHYLIB help This driver supports the MDIO interface found in the network diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 416a926a8281..a7872d14a49d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5173,38 +5173,40 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = dev->stats.tx_dropped; } -static int mvpp2_set_ts_config(struct mvpp2_port *port, struct ifreq *ifr) +static int mvpp2_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; + struct mvpp2_port *port = netdev_priv(dev); void __iomem *ptp; u32 gcr, int_mask; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + if (!port->hwtstamp) + return -EOPNOTSUPP; - if (config.tx_type != HWTSTAMP_TX_OFF && - config.tx_type != HWTSTAMP_TX_ON) + if (config->tx_type != HWTSTAMP_TX_OFF && + config->tx_type != HWTSTAMP_TX_ON) return -ERANGE; ptp = port->priv->iface_base + MVPP22_PTP_BASE(port->gop_id); int_mask = gcr = 0; - if (config.tx_type != HWTSTAMP_TX_OFF) { + if (config->tx_type != HWTSTAMP_TX_OFF) { gcr |= MVPP22_PTP_GCR_TSU_ENABLE | MVPP22_PTP_GCR_TX_RESET; int_mask |= MVPP22_PTP_INT_MASK_QUEUE1 | MVPP22_PTP_INT_MASK_QUEUE0; } /* It seems we must also release the TX reset when enabling the TSU */ - if (config.rx_filter != HWTSTAMP_FILTER_NONE) + if (config->rx_filter != HWTSTAMP_FILTER_NONE) gcr |= MVPP22_PTP_GCR_TSU_ENABLE | MVPP22_PTP_GCR_RX_RESET | MVPP22_PTP_GCR_TX_RESET; if (gcr & MVPP22_PTP_GCR_TSU_ENABLE) mvpp22_tai_start(port->priv->tai); - if (config.rx_filter != HWTSTAMP_FILTER_NONE) { - config.rx_filter = HWTSTAMP_FILTER_ALL; + if (config->rx_filter != HWTSTAMP_FILTER_NONE) { + config->rx_filter = HWTSTAMP_FILTER_ALL; mvpp2_modify(ptp + MVPP22_PTP_GCR, MVPP22_PTP_GCR_RX_RESET | MVPP22_PTP_GCR_TX_RESET | @@ -5225,26 +5227,22 @@ static int mvpp2_set_ts_config(struct mvpp2_port *port, struct ifreq *ifr) if (!(gcr & MVPP22_PTP_GCR_TSU_ENABLE)) mvpp22_tai_stop(port->priv->tai); - port->tx_hwtstamp_type = config.tx_type; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + port->tx_hwtstamp_type = config->tx_type; return 0; } -static int mvpp2_get_ts_config(struct mvpp2_port *port, struct ifreq *ifr) +static int mvpp2_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; - - memset(&config, 0, sizeof(config)); + struct mvpp2_port *port = netdev_priv(dev); - config.tx_type = port->tx_hwtstamp_type; - config.rx_filter = port->rx_hwtstamp ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + if (!port->hwtstamp) + return -EOPNOTSUPP; - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + config->tx_type = port->tx_hwtstamp_type; + config->rx_filter = port->rx_hwtstamp ? HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; return 0; } @@ -5274,18 +5272,6 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mvpp2_port *port = netdev_priv(dev); - switch (cmd) { - case SIOCSHWTSTAMP: - if (port->hwtstamp) - return mvpp2_set_ts_config(port, ifr); - break; - - case SIOCGHWTSTAMP: - if (port->hwtstamp) - return mvpp2_get_ts_config(port, ifr); - break; - } - if (!port->phylink) return -ENOTSUPP; @@ -5799,6 +5785,8 @@ static const struct net_device_ops mvpp2_netdev_ops = { .ndo_set_features = mvpp2_set_features, .ndo_bpf = mvpp2_xdp, .ndo_xdp_xmit = mvpp2_xdp_xmit, + .ndo_hwtstamp_get = mvpp2_hwtstamp_get, + .ndo_hwtstamp_set = mvpp2_hwtstamp_set, }; static const struct ethtool_ops mvpp2_eth_tool_ops = { diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index ccb69bc5c952..420c3f4cf741 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -18,8 +18,6 @@ #include "octep_vf_config.h" #include "octep_vf_main.h" -struct workqueue_struct *octep_vf_wq; - /* Supported Devices */ static const struct pci_device_id octep_vf_pci_id_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_VF)}, diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h index 1a352f41f823..b9f13506f462 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h @@ -320,8 +320,6 @@ static inline u16 OCTEP_VF_MINOR_REV(struct octep_vf_device *oct) #define octep_vf_read_csr64(octep_vf_dev, reg_off) \ readq((octep_vf_dev)->mmio.hw_addr + (reg_off)) -extern struct workqueue_struct *octep_vf_wq; - int octep_vf_device_setup(struct octep_vf_device *oct); int octep_vf_setup_iqs(struct octep_vf_device *oct); void octep_vf_free_iqs(struct octep_vf_device *oct); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 1e5aa5397504..7d21905deed8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -188,14 +188,13 @@ int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid) { unsigned long timeout = jiffies + msecs_to_jiffies(MBOX_RSP_TIMEOUT); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; - struct device *sender = &mbox->pdev->dev; while (!time_after(jiffies, timeout)) { if (mdev->num_msgs == mdev->msgs_acked) return 0; usleep_range(800, 1000); } - dev_dbg(sender, "timed out while waiting for rsp\n"); + trace_otx2_msg_wait_rsp(mbox->pdev); return -EIO; } EXPORT_SYMBOL(otx2_mbox_wait_for_rsp); @@ -219,6 +218,7 @@ static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; void *hw_mbase = mdev->hwbase; + struct mbox_msghdr *msg; u64 intr_val; tx_hdr = hw_mbase + mbox->tx_start; @@ -251,7 +251,10 @@ static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) tx_hdr->num_msgs = mdev->num_msgs; rx_hdr->num_msgs = 0; - trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size); + msg = (struct mbox_msghdr *)(hw_mbase + mbox->tx_start + msgs_offset); + + trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size, + msg->id, msg->pcifunc); spin_unlock(&mdev->mbox_lock); @@ -445,6 +448,14 @@ const char *otx2_mbox_id2name(u16 id) #define M(_name, _id, _1, _2, _3) case _id: return # _name; MBOX_MESSAGES #undef M + +#define M(_name, _id, _1, _2, _3) case _id: return # _name; + MBOX_UP_CGX_MESSAGES +#undef M + +#define M(_name, _id, _1, _2, _3) case _id: return # _name; + MBOX_UP_CPT_MESSAGES +#undef M default: return "INVALID ID"; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 6575c422635b..511eb5b2a2d4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2173,7 +2173,7 @@ static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid, if (rsp && err) \ rsp->hdr.rc = err; \ \ - trace_otx2_msg_process(mbox->pdev, _id, err); \ + trace_otx2_msg_process(mbox->pdev, _id, err, req->pcifunc); \ return rsp ? err : -ENOMEM; \ } MBOX_MESSAGES diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 60f085b00a8c..147d7f5c1fcc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -969,8 +969,6 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf, bool enable); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); -void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, - bool enable); void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 992fa0b82e8d..38db06b1784c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -34,7 +34,7 @@ static struct _req_type __maybe_unused \ return NULL; \ req->hdr.sig = OTX2_MBOX_REQ_SIG; \ req->hdr.id = _id; \ - trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req)); \ + trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req), 0); \ return req; \ } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 821fe242f821..6296a3cdabbb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -820,24 +820,6 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } -void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, - bool enable) -{ - struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, index; - - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); - if (blkaddr < 0) - return; - - /* Get 'pcifunc' of PF device */ - pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; - - index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, - NIXLF_BCAST_ENTRY); - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); -} - void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c index 775fd4c35794..5f69380ccc82 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c @@ -11,3 +11,4 @@ EXPORT_TRACEPOINT_SYMBOL(otx2_msg_alloc); EXPORT_TRACEPOINT_SYMBOL(otx2_msg_interrupt); EXPORT_TRACEPOINT_SYMBOL(otx2_msg_process); +EXPORT_TRACEPOINT_SYMBOL(otx2_msg_status); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index 5704520f9b02..db02b4d6ed4b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -18,33 +18,42 @@ #include "mbox.h" TRACE_EVENT(otx2_msg_alloc, - TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size), - TP_ARGS(pdev, id, size), + TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size, u16 pcifunc), + TP_ARGS(pdev, id, size, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, id) __field(u64, size) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->id = id; __entry->size = size; + __entry->pcifunc = pcifunc; ), - TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev), - otx2_mbox_id2name(__entry->id), __entry->size) + TP_printk("[%s] msg:(%s) size:%lld pcifunc:0x%x\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->size, + __entry->pcifunc) ); TRACE_EVENT(otx2_msg_send, - TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size), - TP_ARGS(pdev, num_msgs, msg_size), + TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size, + u16 id, u16 pcifunc), + TP_ARGS(pdev, num_msgs, msg_size, id, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, num_msgs) __field(u64, msg_size) + __field(u16, id) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->num_msgs = num_msgs; __entry->msg_size = msg_size; + __entry->id = id; + __entry->pcifunc = pcifunc; ), - TP_printk("[%s] sent %d msg(s) of size:%lld\n", __get_str(dev), - __entry->num_msgs, __entry->msg_size) + TP_printk("[%s] sent %d msg(s) of size:%lld msg:(%s) pcifunc:0x%x\n", + __get_str(dev), __entry->num_msgs, __entry->msg_size, + otx2_mbox_id2name(__entry->id), __entry->pcifunc) ); TRACE_EVENT(otx2_msg_check, @@ -81,18 +90,47 @@ TRACE_EVENT(otx2_msg_interrupt, ); TRACE_EVENT(otx2_msg_process, - TP_PROTO(const struct pci_dev *pdev, u16 id, int err), - TP_ARGS(pdev, id, err), + TP_PROTO(const struct pci_dev *pdev, u16 id, int err, u16 pcifunc), + TP_ARGS(pdev, id, err, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, id) __field(int, err) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->id = id; __entry->err = err; + __entry->pcifunc = pcifunc; + ), + TP_printk("[%s] msg:(%s) error:%d pcifunc:0x%x\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), + __entry->err, __entry->pcifunc) +); + +TRACE_EVENT(otx2_msg_wait_rsp, + TP_PROTO(const struct pci_dev *pdev), + TP_ARGS(pdev), + TP_STRUCT__entry(__string(dev, pci_name(pdev)) + ), + TP_fast_assign(__assign_str(dev) + ), + TP_printk("[%s] timed out while waiting for response\n", + __get_str(dev)) +); + +TRACE_EVENT(otx2_msg_status, + TP_PROTO(const struct pci_dev *pdev, const char *msg, u16 num_msgs), + TP_ARGS(pdev, msg, num_msgs), + TP_STRUCT__entry(__string(dev, pci_name(pdev)) + __string(str, msg) + __field(u16, num_msgs) + ), + TP_fast_assign(__assign_str(dev); + __assign_str(str); + __entry->num_msgs = num_msgs; ), - TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev), - otx2_mbox_id2name(__entry->id), __entry->err) + TP_printk("[%s] %s num_msgs:%d\n", __get_str(dev), + __get_str(str), __entry->num_msgs) ); #endif /* __RVU_TRACE_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index c3b6e0f60a79..7f6a435ac680 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -357,9 +357,12 @@ int cn10k_free_matchall_ipolicer(struct otx2_nic *pfvf) mutex_lock(&pfvf->mbox.lock); /* Remove RQ's policer mapping */ - for (qidx = 0; qidx < hw->rx_queues; qidx++) - cn10k_map_unmap_rq_policer(pfvf, qidx, - hw->matchall_ipolicer, false); + for (qidx = 0; qidx < hw->rx_queues; qidx++) { + rc = cn10k_map_unmap_rq_policer(pfvf, qidx, hw->matchall_ipolicer, false); + if (rc) + dev_warn(pfvf->dev, "Failed to unmap RQ %d's policer (error %d).", + qidx, rc); + } rc = cn10k_free_leaf_profile(pfvf, hw->matchall_ipolicer); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index d6b4b74e4002..6a38f91bbf5e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -872,6 +872,7 @@ static struct _req_type __maybe_unused \ *otx2_mbox_alloc_msg_ ## _fn_name(struct mbox *mbox) \ { \ struct _req_type *req; \ + u16 pcifunc = mbox->pfvf->pcifunc; \ \ req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \ &mbox->mbox, 0, sizeof(struct _req_type), \ @@ -880,7 +881,8 @@ static struct _req_type __maybe_unused \ return NULL; \ req->hdr.sig = OTX2_MBOX_REQ_SIG; \ req->hdr.id = _id; \ - trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req)); \ + req->hdr.pcifunc = pcifunc; \ + trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req), pcifunc); \ return req; \ } @@ -1108,6 +1110,8 @@ int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, struct xdp_frame *xdpf, u64 iova, int len, u16 qidx, u16 flags); +void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, struct xdp_frame *xdpf, + u64 dma_addr, int len, int *offset, u16 flags); u16 otx2_get_max_mtu(struct otx2_nic *pfvf); int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index cfed9ec5b157..d79b4b30176d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -465,6 +465,9 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-VF down queue handler(forwarding)", + vf_mbox->num_msgs); + for (id = 0; id < vf_mbox->num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + mbox->rx_start + offset); @@ -473,7 +476,7 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) goto inval_msg; /* Set VF's number in each of the msg */ - msg->pcifunc &= RVU_PFVF_FUNC_MASK; + msg->pcifunc &= ~RVU_PFVF_FUNC_MASK; msg->pcifunc |= (vf_idx + 1) & RVU_PFVF_FUNC_MASK; offset = msg->next_msgoff; } @@ -503,6 +506,9 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-VF up queue handler(response)", + vf_mbox->up_num_msgs); + for (id = 0; id < vf_mbox->up_num_msgs; id++) { msg = mdev->mbase + offset; @@ -819,6 +825,9 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); pf = af_mbox->pfvf; + trace_otx2_msg_status(pf->pdev, "PF-AF down queue handler(response)", + num_msgs); + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); @@ -974,6 +983,9 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-AF up queue handler(notification)", + num_msgs); + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); @@ -1023,6 +1035,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) trace_otx2_msg_interrupt(pf->pdev, "UP message from AF to PF", BIT_ULL(0)); + + trace_otx2_msg_status(pf->pdev, "PF-AF up work queued(interrupt)", + hdr->num_msgs); } if (mbox_data & MBOX_DOWN_MSG) { @@ -1039,6 +1054,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) trace_otx2_msg_interrupt(pf->pdev, "DOWN reply from AF to PF", BIT_ULL(0)); + + trace_otx2_msg_status(pf->pdev, "PF-AF down work queued(interrupt)", + hdr->num_msgs); } return IRQ_HANDLED; @@ -3048,7 +3066,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pci_request_regions(pdev, DRV_NAME); + err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); return err; @@ -3057,7 +3075,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "DMA mask config failed, abort\n"); - goto err_release_regions; + return err; } pci_set_master(pdev); @@ -3067,10 +3085,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); netdev = alloc_etherdev_mqs(sizeof(*pf), qcount + qos_txqs, qcount); - if (!netdev) { - err = -ENOMEM; - goto err_release_regions; - } + if (!netdev) + return -ENOMEM; pci_set_drvdata(pdev, netdev); SET_NETDEV_DEV(netdev, &pdev->dev); @@ -3246,8 +3262,6 @@ err_detach_rsrc: err_free_netdev: pci_set_drvdata(pdev, NULL); free_netdev(netdev); -err_release_regions: - pci_release_regions(pdev); return err; } @@ -3289,6 +3303,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) req = (struct cgx_link_info_msg *)msghdr; req->hdr.id = MBOX_MSG_CGX_LINK_EVENT; req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = pf->pcifunc; memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); @@ -3447,8 +3462,6 @@ static void otx2_remove(struct pci_dev *pdev) pci_free_irq_vectors(pf->pdev); pci_set_drvdata(pdev, NULL); free_netdev(netdev); - - pci_release_regions(pdev); } static struct pci_driver otx2_pf_driver = { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 0a6bb346ba45..9593627b35a3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1410,9 +1410,8 @@ void otx2_free_pending_sqe(struct otx2_nic *pfvf) } } -static void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, - struct xdp_frame *xdpf, - u64 dma_addr, int len, int *offset, u16 flags) +void otx2_xdp_sqe_add_sg(struct otx2_snd_queue *sq, struct xdp_frame *xdpf, + u64 dma_addr, int len, int *offset, u16 flags) { struct nix_sqe_sg_s *sg = NULL; u64 *iova = NULL; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d49..ba4ae6d9c569 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -136,7 +136,7 @@ static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf, rsp->hdr.id = MBOX_MSG_CGX_LINK_EVENT; rsp->hdr.sig = OTX2_MBOX_RSP_SIG; - rsp->hdr.pcifunc = 0; + rsp->hdr.pcifunc = req->pcifunc; rsp->hdr.rc = 0; err = otx2_mbox_up_handler_cgx_link_event( vf, (struct cgx_link_info_msg *)req, rsp); @@ -548,7 +548,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pci_request_regions(pdev, DRV_NAME); + err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); return err; @@ -557,7 +557,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "DMA mask config failed, abort\n"); - goto err_release_regions; + return err; } pci_set_master(pdev); @@ -565,10 +565,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) qcount = num_online_cpus(); qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); netdev = alloc_etherdev_mqs(sizeof(*vf), qcount + qos_txqs, qcount); - if (!netdev) { - err = -ENOMEM; - goto err_release_regions; - } + if (!netdev) + return -ENOMEM; pci_set_drvdata(pdev, netdev); SET_NETDEV_DEV(netdev, &pdev->dev); @@ -765,8 +763,6 @@ err_free_irq_vectors: err_free_netdev: pci_set_drvdata(pdev, NULL); free_netdev(netdev); -err_release_regions: - pci_release_regions(pdev); return err; } @@ -815,8 +811,6 @@ static void otx2vf_remove(struct pci_dev *pdev) pci_free_irq_vectors(vf->pdev); pci_set_drvdata(pdev, NULL); free_netdev(netdev); - - pci_release_regions(pdev); } static struct pci_driver otx2vf_driver = { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c index ce10caea8511..b328aae23d73 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c @@ -11,6 +11,7 @@ #include <net/xdp.h> #include "otx2_common.h" +#include "otx2_struct.h" #include "otx2_xsk.h" int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, @@ -196,11 +197,39 @@ void otx2_attach_xsk_buff(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, int sq->xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, qidx); } +static void otx2_xsk_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, + u16 qidx) +{ + struct nix_sqe_hdr_s *sqe_hdr; + struct otx2_snd_queue *sq; + int offset; + + sq = &pfvf->qset.sq[qidx]; + memset(sq->sqe_base + 8, 0, sq->sqe_size - 8); + + sqe_hdr = (struct nix_sqe_hdr_s *)(sq->sqe_base); + + if (!sqe_hdr->total) { + sqe_hdr->aura = sq->aura_id; + sqe_hdr->df = 1; + sqe_hdr->sq = qidx; + sqe_hdr->pnc = 1; + } + sqe_hdr->total = len; + sqe_hdr->sqe_id = sq->head; + + offset = sizeof(*sqe_hdr); + + otx2_xdp_sqe_add_sg(sq, NULL, iova, len, &offset, OTX2_AF_XDP_FRAME); + sqe_hdr->sizem1 = (offset / 16) - 1; + pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); +} + void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool, int queue, int budget) { struct xdp_desc *xdp_desc = pool->tx_descs; - int err, i, work_done = 0, batch; + int i, batch; budget = min(budget, otx2_read_free_sqe(pfvf, queue)); batch = xsk_tx_peek_release_desc_batch(pool, budget); @@ -211,15 +240,6 @@ void otx2_zc_napi_handler(struct otx2_nic *pfvf, struct xsk_buff_pool *pool, dma_addr_t dma_addr; dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc[i].addr); - err = otx2_xdp_sq_append_pkt(pfvf, NULL, dma_addr, xdp_desc[i].len, - queue, OTX2_AF_XDP_FRAME); - if (!err) { - netdev_err(pfvf->netdev, "AF_XDP: Unable to transfer packet err%d\n", err); - break; - } - work_done++; + otx2_xsk_sq_append_pkt(pfvf, dma_addr, xdp_desc[i].len, queue); } - - if (work_done) - xsk_tx_release(pool); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 7153a71dfc86..2cd3da3b6843 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -765,7 +765,7 @@ static int rvu_rep_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pci_request_regions(pdev, DRV_NAME); + err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); return err; @@ -774,7 +774,7 @@ static int rvu_rep_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "DMA mask config failed, abort\n"); - goto err_release_regions; + goto err_set_drv_data; } pci_set_master(pdev); @@ -782,7 +782,7 @@ static int rvu_rep_probe(struct pci_dev *pdev, const struct pci_device_id *id) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) { err = -ENOMEM; - goto err_release_regions; + goto err_set_drv_data; } pci_set_drvdata(pdev, priv); @@ -799,7 +799,7 @@ static int rvu_rep_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = otx2_init_rsrc(pdev, priv); if (err) - goto err_release_regions; + goto err_set_drv_data; priv->iommu_domain = iommu_get_domain_for_dev(dev); @@ -822,9 +822,8 @@ err_detach_rsrc: otx2_disable_mbox_intr(priv); otx2_pfaf_mbox_destroy(priv); pci_free_irq_vectors(pdev); -err_release_regions: +err_set_drv_data: pci_set_drvdata(pdev, NULL); - pci_release_regions(pdev); return err; } @@ -844,7 +843,6 @@ static void rvu_rep_remove(struct pci_dev *pdev) otx2_pfaf_mbox_destroy(priv); pci_free_irq_vectors(priv->pdev); pci_set_drvdata(pdev, NULL); - pci_release_regions(pdev); } static struct pci_driver rvu_rep_driver = { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_counter.c b/drivers/net/ethernet/marvell/prestera/prestera_counter.c index 4cd53a2dae46..634f4543c1d7 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_counter.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_counter.c @@ -336,8 +336,7 @@ prestera_counter_block_get_by_idx(struct prestera_counter *counter, u32 idx) static void prestera_counter_stats_work(struct work_struct *work) { - struct delayed_work *dl_work = - container_of(work, struct delayed_work, work); + struct delayed_work *dl_work = to_delayed_work(work); struct prestera_counter *counter = container_of(dl_work, struct prestera_counter, stats_dw); struct prestera_counter_block *block; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index 35857dc19542..c45d108b2f6d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -845,9 +845,9 @@ static int prestera_pci_probe(struct pci_dev *pdev, goto err_pci_enable_device; } - err = pci_request_regions(pdev, driver_name); + err = pcim_request_all_regions(pdev, driver_name); if (err) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); + dev_err(&pdev->dev, "pcim_request_all_regions failed\n"); goto err_pci_request_regions; } @@ -938,7 +938,6 @@ err_pci_dev_alloc: err_pp_ioremap: err_mem_ioremap: err_dma_mask: - pci_release_regions(pdev); err_pci_request_regions: err_pci_enable_device: return err; @@ -953,7 +952,6 @@ static void prestera_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdev); destroy_workqueue(fw->wq); prestera_fw_uninit(fw); - pci_release_regions(pdev); } static const struct pci_device_id prestera_pci_devices[] = { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c index 7c27a19c4d8f..b4c01e2878f6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_path.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c @@ -14,7 +14,7 @@ struct mtk_eth_muxc { const char *name; - int cap_bit; + u64 cap_bit; int (*set_path)(struct mtk_eth *eth, u64 path); }; @@ -31,6 +31,8 @@ static const char *mtk_eth_path_name(u64 path) return "gmac2_rgmii"; case MTK_ETH_PATH_GMAC2_SGMII: return "gmac2_sgmii"; + case MTK_ETH_PATH_GMAC2_2P5GPHY: + return "gmac2_2p5gphy"; case MTK_ETH_PATH_GMAC2_GEPHY: return "gmac2_gephy"; case MTK_ETH_PATH_GDM1_ESW: @@ -127,6 +129,29 @@ static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, u64 path) return 0; } +static int set_mux_gmac2_to_2p5gphy(struct mtk_eth *eth, u64 path) +{ + int ret; + + if (path == MTK_ETH_PATH_GMAC2_2P5GPHY) { + ret = regmap_clear_bits(eth->ethsys, ETHSYS_SYSCFG0, + SYSCFG0_SGMII_GMAC2_V2); + if (ret) + return ret; + + /* Setup mux to 2p5g PHY */ + ret = regmap_clear_bits(eth->infra, TOP_MISC_NETSYS_PCS_MUX, + MUX_G2_USXGMII_SEL); + if (ret) + return ret; + + dev_dbg(eth->dev, "path %s in %s updated\n", + mtk_eth_path_name(path), __func__); + } + + return 0; +} + static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, u64 path) { unsigned int val = 0; @@ -210,6 +235,10 @@ static const struct mtk_eth_muxc mtk_eth_muxc[] = { .cap_bit = MTK_ETH_MUX_U3_GMAC2_TO_QPHY, .set_path = set_mux_u3_gmac2_to_qphy, }, { + .name = "mux_gmac2_to_2p5gphy", + .cap_bit = MTK_ETH_MUX_GMAC2_TO_2P5GPHY, + .set_path = set_mux_gmac2_to_2p5gphy, + }, { .name = "mux_gmac1_gmac2_to_sgmii_rgmii", .cap_bit = MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII, .set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii, @@ -260,6 +289,20 @@ int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id) return mtk_eth_mux_setup(eth, path); } +int mtk_gmac_2p5gphy_path_setup(struct mtk_eth *eth, int mac_id) +{ + u64 path = 0; + + if (mac_id == MTK_GMAC2_ID) + path = MTK_ETH_PATH_GMAC2_2P5GPHY; + + if (!path) + return -EINVAL; + + /* Setup proper MUXes along the path */ + return mtk_eth_mux_setup(eth, path); +} + int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id) { u64 path = 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6c92072b4c28..b38e4f2de674 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -503,7 +503,7 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, static void mtk_setup_bridge_switch(struct mtk_eth *eth) { /* Force Port1 XGMAC Link Up */ - mtk_m32(eth, 0, MTK_XGMAC_FORCE_LINK(MTK_GMAC1_ID), + mtk_m32(eth, 0, MTK_XGMAC_FORCE_MODE(MTK_GMAC1_ID), MTK_XGMAC_STS(MTK_GMAC1_ID)); /* Adjust GSW bridge IPG to 11 */ @@ -532,6 +532,26 @@ static struct phylink_pcs *mtk_mac_select_pcs(struct phylink_config *config, return NULL; } +static int mtk_mac_prepare(struct phylink_config *config, unsigned int mode, + phy_interface_t iface) +{ + struct mtk_mac *mac = container_of(config, struct mtk_mac, + phylink_config); + struct mtk_eth *eth = mac->hw; + + if (mtk_interface_mode_is_xgmii(eth, iface) && + mac->id != MTK_GMAC1_ID) { + mtk_m32(mac->hw, XMAC_MCR_TRX_DISABLE, + XMAC_MCR_TRX_DISABLE, MTK_XMAC_MCR(mac->id)); + + mtk_m32(mac->hw, MTK_XGMAC_FORCE_MODE(mac->id) | + MTK_XGMAC_FORCE_LINK(mac->id), + MTK_XGMAC_FORCE_MODE(mac->id), MTK_XGMAC_STS(mac->id)); + } + + return 0; +} + static void mtk_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -573,6 +593,12 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, } break; case PHY_INTERFACE_MODE_INTERNAL: + if (mac->id == MTK_GMAC2_ID && + MTK_HAS_CAPS(eth->soc->caps, MTK_2P5GPHY)) { + err = mtk_gmac_2p5gphy_path_setup(eth, mac->id); + if (err) + goto init_err; + } break; default: goto err_phy; @@ -644,12 +670,12 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, } /* Setup gmac */ - if (mtk_is_netsys_v3_or_greater(eth) && - mac->interface == PHY_INTERFACE_MODE_INTERNAL) { + if (mtk_interface_mode_is_xgmii(eth, state->interface)) { mtk_w32(mac->hw, MTK_GDMA_XGDM_SEL, MTK_GDMA_EG_CTRL(mac->id)); mtk_w32(mac->hw, MAC_MCR_FORCE_LINK_DOWN, MTK_MAC_MCR(mac->id)); - mtk_setup_bridge_switch(eth); + if (mac->id == MTK_GMAC1_ID) + mtk_setup_bridge_switch(eth); } return; @@ -696,10 +722,19 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, { struct mtk_mac *mac = container_of(config, struct mtk_mac, phylink_config); - u32 mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); - mcr &= ~(MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK); - mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); + if (!mtk_interface_mode_is_xgmii(mac->hw, interface)) { + /* GMAC modes */ + mtk_m32(mac->hw, + MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_FORCE_LINK, 0, + MTK_MAC_MCR(mac->id)); + } else if (mac->id != MTK_GMAC1_ID) { + /* XGMAC except for built-in switch */ + mtk_m32(mac->hw, XMAC_MCR_TRX_DISABLE, XMAC_MCR_TRX_DISABLE, + MTK_XMAC_MCR(mac->id)); + mtk_m32(mac->hw, MTK_XGMAC_FORCE_LINK(mac->id), 0, + MTK_XGMAC_STS(mac->id)); + } } static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, @@ -771,13 +806,12 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs); } -static void mtk_mac_link_up(struct phylink_config *config, - struct phy_device *phy, - unsigned int mode, phy_interface_t interface, - int speed, int duplex, bool tx_pause, bool rx_pause) +static void mtk_gdm_mac_link_up(struct mtk_mac *mac, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, + bool rx_pause) { - struct mtk_mac *mac = container_of(config, struct mtk_mac, - phylink_config); u32 mcr; mcr = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); @@ -811,6 +845,56 @@ static void mtk_mac_link_up(struct phylink_config *config, mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } +static void mtk_xgdm_mac_link_up(struct mtk_mac *mac, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, + bool rx_pause) +{ + u32 mcr; + + if (mac->id == MTK_GMAC1_ID) + return; + + /* Eliminate the interference(before link-up) caused by PHY noise */ + mtk_m32(mac->hw, XMAC_LOGIC_RST, 0, MTK_XMAC_LOGIC_RST(mac->id)); + mdelay(20); + mtk_m32(mac->hw, XMAC_GLB_CNTCLR, XMAC_GLB_CNTCLR, + MTK_XMAC_CNT_CTRL(mac->id)); + + mtk_m32(mac->hw, MTK_XGMAC_FORCE_LINK(mac->id), + MTK_XGMAC_FORCE_LINK(mac->id), MTK_XGMAC_STS(mac->id)); + + mcr = mtk_r32(mac->hw, MTK_XMAC_MCR(mac->id)); + mcr &= ~(XMAC_MCR_FORCE_TX_FC | XMAC_MCR_FORCE_RX_FC | + XMAC_MCR_TRX_DISABLE); + /* Configure pause modes - + * phylink will avoid these for half duplex + */ + if (tx_pause) + mcr |= XMAC_MCR_FORCE_TX_FC; + if (rx_pause) + mcr |= XMAC_MCR_FORCE_RX_FC; + + mtk_w32(mac->hw, mcr, MTK_XMAC_MCR(mac->id)); +} + +static void mtk_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause) +{ + struct mtk_mac *mac = container_of(config, struct mtk_mac, + phylink_config); + + if (mtk_interface_mode_is_xgmii(mac->hw, interface)) + mtk_xgdm_mac_link_up(mac, phy, mode, interface, speed, duplex, + tx_pause, rx_pause); + else + mtk_gdm_mac_link_up(mac, phy, mode, interface, speed, duplex, + tx_pause, rx_pause); +} + static void mtk_mac_disable_tx_lpi(struct phylink_config *config) { struct mtk_mac *mac = container_of(config, struct mtk_mac, @@ -828,6 +912,9 @@ static int mtk_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, struct mtk_eth *eth = mac->hw; u32 val; + if (mtk_interface_mode_is_xgmii(eth, mac->interface)) + return -EOPNOTSUPP; + /* Tx idle timer in ms */ timer = DIV_ROUND_UP(timer, 1000); @@ -858,6 +945,7 @@ static int mtk_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, } static const struct phylink_mac_ops mtk_phylink_ops = { + .mac_prepare = mtk_mac_prepare, .mac_select_pcs = mtk_mac_select_pcs, .mac_config = mtk_mac_config, .mac_finish = mtk_mac_finish, @@ -4768,6 +4856,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->phylink = phylink; + if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_2P5GPHY) && + id == MTK_GMAC2_ID) + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + mac->phylink_config.supported_interfaces); + SET_NETDEV_DEV(eth->netdev[id], eth->dev); eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 88ef2e9c50fc..2d4b9964d3db 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -431,7 +431,8 @@ /* XMAC status registers */ #define MTK_XGMAC_STS(x) (((x) == MTK_GMAC3_ID) ? 0x1001C : 0x1000C) -#define MTK_XGMAC_FORCE_LINK(x) (((x) == MTK_GMAC2_ID) ? BIT(31) : BIT(15)) +#define MTK_XGMAC_FORCE_MODE(x) (((x) == MTK_GMAC2_ID) ? BIT(31) : BIT(15)) +#define MTK_XGMAC_FORCE_LINK(x) (((x) == MTK_GMAC2_ID) ? BIT(27) : BIT(11)) #define MTK_USXGMII_PCS_LINK BIT(8) #define MTK_XGMAC_RX_FC BIT(5) #define MTK_XGMAC_TX_FC BIT(4) @@ -524,6 +525,21 @@ #define INTF_MODE_RGMII_1000 (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED) #define INTF_MODE_RGMII_10_100 0 +/* XFI Mac control registers */ +#define MTK_XMAC_BASE(x) (0x12000 + (((x) - 1) * 0x1000)) +#define MTK_XMAC_MCR(x) (MTK_XMAC_BASE(x)) +#define XMAC_MCR_TRX_DISABLE 0xf +#define XMAC_MCR_FORCE_TX_FC BIT(5) +#define XMAC_MCR_FORCE_RX_FC BIT(4) + +/* XFI Mac logic reset registers */ +#define MTK_XMAC_LOGIC_RST(x) (MTK_XMAC_BASE(x) + 0x10) +#define XMAC_LOGIC_RST BIT(0) + +/* XFI Mac count global control */ +#define MTK_XMAC_CNT_CTRL(x) (MTK_XMAC_BASE(x) + 0x100) +#define XMAC_GLB_CNTCLR BIT(0) + /* GPIO port control registers for GMAC 2*/ #define GPIO_OD33_CTRL8 0x4c0 #define GPIO_BIAS_CTRL 0xed0 @@ -587,6 +603,10 @@ #define GEPHY_MAC_SEL BIT(1) /* Top misc registers */ +#define TOP_MISC_NETSYS_PCS_MUX 0x0 +#define NETSYS_PCS_MUX_MASK GENMASK(1, 0) +#define MUX_G2_USXGMII_SEL BIT(1) + #define USB_PHY_SWITCH_REG 0x218 #define QPHY_SEL_MASK GENMASK(1, 0) #define SGMII_QPHY_SEL 0x2 @@ -951,6 +971,7 @@ enum mkt_eth_capabilities { MTK_RGMII_BIT = 0, MTK_TRGMII_BIT, MTK_SGMII_BIT, + MTK_2P5GPHY_BIT, MTK_ESW_BIT, MTK_GEPHY_BIT, MTK_MUX_BIT, @@ -971,6 +992,7 @@ enum mkt_eth_capabilities { MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT, MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT, MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT, + MTK_ETH_MUX_GMAC2_TO_2P5GPHY_BIT, MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT, MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT, @@ -980,6 +1002,7 @@ enum mkt_eth_capabilities { MTK_ETH_PATH_GMAC1_SGMII_BIT, MTK_ETH_PATH_GMAC2_RGMII_BIT, MTK_ETH_PATH_GMAC2_SGMII_BIT, + MTK_ETH_PATH_GMAC2_2P5GPHY_BIT, MTK_ETH_PATH_GMAC2_GEPHY_BIT, MTK_ETH_PATH_GDM1_ESW_BIT, }; @@ -988,6 +1011,7 @@ enum mkt_eth_capabilities { #define MTK_RGMII BIT_ULL(MTK_RGMII_BIT) #define MTK_TRGMII BIT_ULL(MTK_TRGMII_BIT) #define MTK_SGMII BIT_ULL(MTK_SGMII_BIT) +#define MTK_2P5GPHY BIT_ULL(MTK_2P5GPHY_BIT) #define MTK_ESW BIT_ULL(MTK_ESW_BIT) #define MTK_GEPHY BIT_ULL(MTK_GEPHY_BIT) #define MTK_MUX BIT_ULL(MTK_MUX_BIT) @@ -1010,6 +1034,8 @@ enum mkt_eth_capabilities { BIT_ULL(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT) #define MTK_ETH_MUX_U3_GMAC2_TO_QPHY \ BIT_ULL(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT) +#define MTK_ETH_MUX_GMAC2_TO_2P5GPHY \ + BIT_ULL(MTK_ETH_MUX_GMAC2_TO_2P5GPHY_BIT) #define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \ BIT_ULL(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT) #define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII \ @@ -1021,6 +1047,7 @@ enum mkt_eth_capabilities { #define MTK_ETH_PATH_GMAC1_SGMII BIT_ULL(MTK_ETH_PATH_GMAC1_SGMII_BIT) #define MTK_ETH_PATH_GMAC2_RGMII BIT_ULL(MTK_ETH_PATH_GMAC2_RGMII_BIT) #define MTK_ETH_PATH_GMAC2_SGMII BIT_ULL(MTK_ETH_PATH_GMAC2_SGMII_BIT) +#define MTK_ETH_PATH_GMAC2_2P5GPHY BIT_ULL(MTK_ETH_PATH_GMAC2_2P5GPHY_BIT) #define MTK_ETH_PATH_GMAC2_GEPHY BIT_ULL(MTK_ETH_PATH_GMAC2_GEPHY_BIT) #define MTK_ETH_PATH_GDM1_ESW BIT_ULL(MTK_ETH_PATH_GDM1_ESW_BIT) @@ -1030,6 +1057,7 @@ enum mkt_eth_capabilities { #define MTK_GMAC2_RGMII (MTK_ETH_PATH_GMAC2_RGMII | MTK_RGMII) #define MTK_GMAC2_SGMII (MTK_ETH_PATH_GMAC2_SGMII | MTK_SGMII) #define MTK_GMAC2_GEPHY (MTK_ETH_PATH_GMAC2_GEPHY | MTK_GEPHY) +#define MTK_GMAC2_2P5GPHY (MTK_ETH_PATH_GMAC2_2P5GPHY | MTK_2P5GPHY) #define MTK_GDM1_ESW (MTK_ETH_PATH_GDM1_ESW | MTK_ESW) /* MUXes present on SoCs */ @@ -1049,6 +1077,10 @@ enum mkt_eth_capabilities { (MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_MUX | \ MTK_SHARED_SGMII) +/* 2: GMAC2 -> 2P5GPHY */ +#define MTK_MUX_GMAC2_TO_2P5GPHY \ + (MTK_ETH_MUX_GMAC2_TO_2P5GPHY | MTK_MUX | MTK_INFRA) + /* 0: GMACx -> GEPHY, 1: GMACx -> SGMII where x is 1 or 2 */ #define MTK_MUX_GMAC12_TO_GEPHY_SGMII \ (MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX) @@ -1084,8 +1116,9 @@ enum mkt_eth_capabilities { MTK_MUX_GMAC12_TO_GEPHY_SGMII | MTK_QDMA | \ MTK_RSTCTRL_PPE1 | MTK_SRAM) -#define MT7988_CAPS (MTK_36BIT_DMA | MTK_GDM1_ESW | MTK_QDMA | \ - MTK_RSTCTRL_PPE1 | MTK_RSTCTRL_PPE2 | MTK_SRAM) +#define MT7988_CAPS (MTK_36BIT_DMA | MTK_GDM1_ESW | MTK_GMAC2_2P5GPHY | \ + MTK_MUX_GMAC2_TO_2P5GPHY | MTK_QDMA | MTK_RSTCTRL_PPE1 | \ + MTK_RSTCTRL_PPE2 | MTK_SRAM) struct mtk_tx_dma_desc_info { dma_addr_t addr; @@ -1437,6 +1470,23 @@ static inline u32 mtk_get_ib2_multicast_mask(struct mtk_eth *eth) return MTK_FOE_IB2_MULTICAST; } +static inline bool mtk_interface_mode_is_xgmii(struct mtk_eth *eth, + phy_interface_t interface) +{ + if (!mtk_is_netsys_v3_or_greater(eth)) + return false; + + switch (interface) { + case PHY_INTERFACE_MODE_INTERNAL: + case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10GBASER: + case PHY_INTERFACE_MODE_5GBASER: + return true; + default: + return false; + } +} + /* read the hardware status register */ void mtk_stats_update_mac(struct mtk_mac *mac); @@ -1445,6 +1495,7 @@ u32 mtk_r32(struct mtk_eth *eth, unsigned reg); u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned int reg); int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id); +int mtk_gmac_2p5gphy_path_setup(struct mtk_eth *eth, int mac_id); int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id); int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index cd17a3f4faf8..a68cd3f0304c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1897,6 +1897,7 @@ static int mlx4_en_get_ts_info(struct net_device *dev, if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index d7444782bfdd..698a5d1f0d7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,7 +106,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits), GFP_KERNEL); buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 568bbe5f83f5..d292e6a9e22c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -154,7 +154,8 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \ steering/hws/vport.o \ steering/hws/bwc_complex.o \ steering/hws/fs_hws_pools.o \ - steering/hws/fs_hws.o + steering/hws/fs_hws.o \ + steering/hws/action_ste_pool.o # # SF device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e53dbdc0a7a1..b1aeea7c4a91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -927,8 +927,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force static void cb_timeout_handler(struct work_struct *work) { - struct delayed_work *dwork = container_of(work, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(work); struct mlx5_cmd_work_ent *ent = container_of(dwork, struct mlx5_cmd_work_ent, cb_timeout_work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f803e1c93590..5ce1b463b7a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -707,8 +707,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as we know this is a page_pool page. + /* No need to check page_pool_page_is_pp() as we + * know this is a page_pool page. */ page_pool_recycle_direct(page->pp, page); } while (++n < num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fdf9e9bb99ac..e399d7a3d6cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1689,6 +1689,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, return 0; info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1c121b435016..19664fa7f217 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -2424,8 +2424,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) } if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) - ethtool_sprintf(data, ptp_rq_stats_desc[i].format, - MLX5E_PTP_CHANNEL_IX); + ethtool_puts(data, ptp_rq_stats_desc[i].format); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 8de6fcbd3a03..def5dea1463d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -54,7 +54,7 @@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) -#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq0_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 4fd853d19e31..55a8629f0792 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -337,10 +337,11 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at }; } -static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +static void mlx5e_tx_skb_update_ts_flags(struct sk_buff *skb) { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_tx_timestamp(skb); } static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) @@ -392,7 +393,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); sq->pc += wi->num_wqebbs; @@ -625,7 +626,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 65a94e46edcf..cec18efadc73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -643,13 +643,6 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, int pin = -1; int err = 0; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests to enable time stamping on both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -820,12 +813,6 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo return 0; } -static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) -{ - return ((!mlx5_npps_real_time_supported(mdev) && flags) || - (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); -} - static int mlx5_perout_configure(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) @@ -861,12 +848,6 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, goto unlock; } - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) { - err = -EOPNOTSUPP; - goto unlock; - } - if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; @@ -1034,6 +1015,13 @@ static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) clock->ptp_info.verify = mlx5_ptp_verify; clock->ptp_info.pps = 1; + clock->ptp_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + + if (mlx5_npps_real_time_supported(mdev)) + clock->ptp_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; + for (i = 0; i < clock->ptp_info.n_pins; i++) { snprintf(clock->ptp_info.pin_config[i].name, sizeof(clock->ptp_info.pin_config[i].name), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 2c5f850c31f6..40024cfa3099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -148,7 +148,7 @@ out: * Free the IRQ and other resources such as rmap from the system. * BUT doesn't free or remove reference from mlx5. * This function is very important for the shutdown flow, where we need to - * cleanup system resoruces but keep mlx5 objects alive, + * cleanup system resources but keep mlx5 objects alive, * see mlx5_irq_table_free_irqs(). */ static void mlx5_system_free_irq(struct mlx5_irq *irq) @@ -588,7 +588,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) struct mlx5_irq *irq; unsigned long index; - /* There are cases in which we are destrying the irq_table before + /* There are cases in which we are destroying the irq_table before * freeing all the IRQs, fast teardown for example. Hence, free the irqs * which might not have been freed. */ @@ -617,7 +617,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { - mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index b5332c54d4fb..bef4d25c1a2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -238,6 +238,7 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, enum mlx5hws_table_type table_type, bool is_mirror) { + struct mlx5hws_pool *pool; bool use_fixup = false; u32 fw_tbl_type; u32 base_id; @@ -253,13 +254,11 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, use_fixup = true; break; } + pool = stc_attr->ste_table.ste_pool; if (!is_mirror) - base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_id(pool); else - base_id = - mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_mirror_id(pool); *fixup_stc_attr = *stc_attr; fixup_stc_attr->ste_table.ste_obj_id = base_id; @@ -337,7 +336,7 @@ __must_hold(&ctx->ctrl_lock) if (!mlx5hws_context_cap_dynamic_reparse(ctx)) stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_0_id = mlx5hws_pool_get_base_id(stc_pool); /* According to table/action limitation change the stc_attr */ use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false); @@ -353,7 +352,7 @@ __must_hold(&ctx->ctrl_lock) if (table_type == MLX5HWS_TABLE_TYPE_FDB) { u32 obj_1_id; - obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_1_id = mlx5hws_pool_get_base_mirror_id(stc_pool); use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, @@ -393,11 +392,11 @@ __must_hold(&ctx->ctrl_lock) stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP; stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.stc_offset = stc->offset; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); if (table_type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); } @@ -1575,17 +1574,15 @@ hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx) return definer; } -static struct mlx5hws_matcher_action_ste * +static struct mlx5hws_range_action_table * hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, struct mlx5hws_definer *definer, u32 miss_ft_id) { struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_pool_attr pool_attr = {0}; struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; @@ -1604,7 +1601,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; pool_attr.alloc_log_sz = 1; table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); if (!table_ste->pool) { @@ -1616,8 +1612,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_0_id = &table_ste->rtc_0_id; rtc_1_id = &table_ste->rtc_1_id; ste_pool = table_ste->pool; - ste = &table_ste->ste; - ste->order = 1; rtc_attr.log_size = 0; rtc_attr.log_depth = 0; @@ -1629,18 +1623,16 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_attr.fw_gen_wqe = true; rtc_attr.is_scnd_range = true; - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_id(ste_pool); rtc_attr.pd = ctx->pd_num; rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false); /* STC is a single resource (obj_id), use any STC for the ID */ stc_pool = ctx->stc_pool; - default_stc = ctx->common_res.default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); @@ -1650,11 +1642,11 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, } /* Create mirror RTC */ - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool); rtc_attr.ste_base = obj_id; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); @@ -1677,9 +1669,9 @@ free_ste: return NULL; } -static void -hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste) +static void hws_action_destroy_dest_match_range_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste) { mutex_lock(&ctx->ctrl_lock); @@ -1691,12 +1683,11 @@ hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, mutex_unlock(&ctx->ctrl_lock); } -static int -hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste, - struct mlx5hws_action *hit_ft_action, - struct mlx5hws_definer *range_definer, - u32 min, u32 max) +static int hws_action_create_dest_match_range_fill_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste, + struct mlx5hws_action *hit_ft_action, + struct mlx5hws_definer *range_definer, u32 min, u32 max) { struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0}; struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0}; @@ -1792,7 +1783,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, u32 min, u32 max, u32 flags) { struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; struct mlx5hws_action *action; @@ -1837,7 +1828,6 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = table_ste->ste; stc_attr.ste_table.ste_pool = table_ste->pool; stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index 64b76075f7f8..25fa0d4c9221 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -118,6 +118,12 @@ struct mlx5hws_action_template { u8 only_term; }; +struct mlx5hws_range_action_table { + struct mlx5hws_pool *pool; + u32 rtc_0_id; + u32 rtc_1_id; +}; + struct mlx5hws_action { u8 type; u8 flags; @@ -186,7 +192,7 @@ struct mlx5hws_action { size_t size; } remove_header; struct { - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; } range; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c new file mode 100644 index 000000000000..5766a9c82f96 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#include "internal.h" + +static const char * +hws_pool_opt_to_str(enum mlx5hws_pool_optimize opt) +{ + switch (opt) { + case MLX5HWS_POOL_OPTIMIZE_NONE: + return "rx-and-tx"; + case MLX5HWS_POOL_OPTIMIZE_ORIG: + return "rx-only"; + case MLX5HWS_POOL_OPTIMIZE_MIRROR: + return "tx-only"; + default: + return "unknown"; + } +} + +static int +hws_action_ste_table_create_pool(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + struct mlx5hws_pool_attr pool_attr = { 0 }; + + pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; + pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; + pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY; + pool_attr.opt_type = opt; + pool_attr.alloc_log_sz = log_sz; + + action_tbl->pool = mlx5hws_pool_create(ctx, &pool_attr); + if (!action_tbl->pool) { + mlx5hws_err(ctx, "Failed to allocate STE pool\n"); + return -EINVAL; + } + + return 0; +} + +static int hws_action_ste_table_create_single_rtc( + struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz, bool tx) +{ + struct mlx5hws_cmd_rtc_create_attr rtc_attr = { 0 }; + u32 *rtc_id; + + rtc_attr.log_depth = 0; + rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + /* Action STEs use the default always hit definer. */ + rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; + rtc_attr.is_frst_jumbo = false; + rtc_attr.miss_ft_id = 0; + rtc_attr.pd = ctx->pd_num; + rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); + + if (tx) { + rtc_attr.table_type = FS_FT_FDB_TX; + rtc_attr.ste_base = + mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + rtc_attr.stc_base = + mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_sz; + rtc_id = &action_tbl->rtc_1_id; + } else { + rtc_attr.table_type = FS_FT_FDB_RX; + rtc_attr.ste_base = mlx5hws_pool_get_base_id(action_tbl->pool); + rtc_attr.stc_base = mlx5hws_pool_get_base_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_sz; + rtc_id = &action_tbl->rtc_0_id; + } + + return mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_id); +} + +static int +hws_action_ste_table_create_rtcs(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + int err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, false); + if (err) + return err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, true); + if (err) { + mlx5hws_cmd_rtc_destroy(ctx->mdev, action_tbl->rtc_0_id); + return err; + } + + return 0; +} + +static void +hws_action_ste_table_destroy_rtcs(struct mlx5hws_action_ste_table *action_tbl) +{ + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_1_id); + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_0_id); +} + +static int +hws_action_ste_table_create_stc(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_cmd_stc_modify_attr stc_attr = { 0 }; + + stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; + stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; + stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; + stc_attr.ste_table.ste_pool = action_tbl->pool; + stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; + + return mlx5hws_action_alloc_single_stc(ctx, &stc_attr, + MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); +} + +static struct mlx5hws_action_ste_table * +hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem) +{ + enum mlx5hws_pool_optimize opt = parent_elem->opt; + struct mlx5hws_context *ctx = parent_elem->ctx; + struct mlx5hws_action_ste_table *action_tbl; + size_t log_sz; + int err; + + log_sz = min(parent_elem->log_sz ? + parent_elem->log_sz + + MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ : + MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ, + MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ); + + action_tbl = kzalloc(sizeof(*action_tbl), GFP_KERNEL); + if (!action_tbl) + return ERR_PTR(-ENOMEM); + + err = hws_action_ste_table_create_pool(ctx, action_tbl, opt, log_sz); + if (err) + goto free_tbl; + + err = hws_action_ste_table_create_rtcs(ctx, action_tbl, opt, log_sz); + if (err) + goto destroy_pool; + + err = hws_action_ste_table_create_stc(ctx, action_tbl); + if (err) + goto destroy_rtcs; + + action_tbl->parent_elem = parent_elem; + INIT_LIST_HEAD(&action_tbl->list_node); + action_tbl->last_used = jiffies; + list_add(&action_tbl->list_node, &parent_elem->available); + parent_elem->log_sz = log_sz; + + mlx5hws_dbg(ctx, + "Allocated %s action STE table log_sz %zu; STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(opt), log_sz, + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + return action_tbl; + +destroy_rtcs: + hws_action_ste_table_destroy_rtcs(action_tbl); +destroy_pool: + mlx5hws_pool_destroy(action_tbl->pool); +free_tbl: + kfree(action_tbl); + + return ERR_PTR(err); +} + +static void +hws_action_ste_table_destroy(struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_context *ctx = action_tbl->parent_elem->ctx; + + mlx5hws_dbg(ctx, + "Destroying %s action STE table: STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(action_tbl->parent_elem->opt), + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); + hws_action_ste_table_destroy_rtcs(action_tbl); + mlx5hws_pool_destroy(action_tbl->pool); + + list_del(&action_tbl->list_node); + kfree(action_tbl); +} + +static int +hws_action_ste_pool_element_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool_element *elem, + enum mlx5hws_pool_optimize opt) +{ + elem->ctx = ctx; + elem->opt = opt; + INIT_LIST_HEAD(&elem->available); + INIT_LIST_HEAD(&elem->full); + + return 0; +} + +static void hws_action_ste_pool_element_destroy( + struct mlx5hws_action_ste_pool_element *elem) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + /* This should be empty, but attempt to free its elements anyway. */ + list_for_each_entry_safe(action_tbl, p, &elem->full, list_node) + hws_action_ste_table_destroy(action_tbl); + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static int hws_action_ste_pool_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool *pool) +{ + enum mlx5hws_pool_optimize opt; + int err; + + mutex_init(&pool->lock); + + /* Rules which are added for both RX and TX must use the same action STE + * indices for both. If we were to use a single table, then RX-only and + * TX-only rules would waste the unused entries. Thus, we use separate + * table sets for the three cases. + */ + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + err = hws_action_ste_pool_element_init(ctx, &pool->elems[opt], + opt); + if (err) + goto destroy_elems; + pool->elems[opt].parent_pool = pool; + } + + return 0; + +destroy_elems: + while (opt-- > MLX5HWS_POOL_OPTIMIZE_NONE) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); + + return err; +} + +static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool) +{ + int opt; + + for (opt = MLX5HWS_POOL_OPTIMIZE_MAX - 1; + opt >= MLX5HWS_POOL_OPTIMIZE_NONE; opt--) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); +} + +static void hws_action_ste_pool_element_collect_stale( + struct mlx5hws_action_ste_pool_element *elem, struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + unsigned long expire_time, now; + + expire_time = secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS); + now = jiffies; + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) { + if (mlx5hws_pool_full(action_tbl->pool) && + time_before(action_tbl->last_used + expire_time, now)) + list_move(&action_tbl->list_node, cleanup); + } +} + +static void hws_action_ste_table_cleanup_list(struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + list_for_each_entry_safe(action_tbl, p, cleanup, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static void hws_action_ste_pool_cleanup(struct work_struct *work) +{ + enum mlx5hws_pool_optimize opt; + struct mlx5hws_context *ctx; + LIST_HEAD(cleanup); + int i; + + ctx = container_of(work, struct mlx5hws_context, + action_ste_cleanup.work); + + for (i = 0; i < ctx->queues; i++) { + struct mlx5hws_action_ste_pool *p = &ctx->action_ste_pool[i]; + + mutex_lock(&p->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; + opt < MLX5HWS_POOL_OPTIMIZE_MAX; opt++) + hws_action_ste_pool_element_collect_stale( + &p->elems[opt], &cleanup); + mutex_unlock(&p->lock); + } + + hws_action_ste_table_cleanup_list(&cleanup); + + schedule_delayed_work(&ctx->action_ste_cleanup, + secs_to_jiffies( + MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); +} + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx) +{ + struct mlx5hws_action_ste_pool *pool; + size_t queues = ctx->queues; + int i, err; + + pool = kcalloc(queues, sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + for (i = 0; i < queues; i++) { + err = hws_action_ste_pool_init(ctx, &pool[i]); + if (err) + goto free_pool; + } + + ctx->action_ste_pool = pool; + + INIT_DELAYED_WORK(&ctx->action_ste_cleanup, + hws_action_ste_pool_cleanup); + schedule_delayed_work( + &ctx->action_ste_cleanup, + secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); + + return 0; + +free_pool: + while (i--) + hws_action_ste_pool_destroy(&pool[i]); + kfree(pool); + + return err; +} + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx) +{ + size_t queues = ctx->queues; + int i; + + cancel_delayed_work_sync(&ctx->action_ste_cleanup); + + for (i = 0; i < queues; i++) + hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]); + + kfree(ctx->action_ste_pool); +} + +static struct mlx5hws_action_ste_pool_element * +hws_action_ste_choose_elem(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx) +{ + if (skip_rx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_MIRROR]; + + if (skip_tx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_ORIG]; + + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_NONE]; +} + +static int +hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl, + struct mlx5hws_action_ste_chunk *chunk) +{ + int err; + + err = mlx5hws_pool_chunk_alloc(action_tbl->pool, &chunk->ste); + if (err) + return err; + + chunk->action_tbl = action_tbl; + action_tbl->last_used = jiffies; + + return 0; +} + +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk) +{ + struct mlx5hws_action_ste_pool_element *elem; + struct mlx5hws_action_ste_table *action_tbl; + bool found; + int err; + + if (skip_rx && skip_tx) + return -EINVAL; + + mutex_lock(&pool->lock); + + elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx); + + mlx5hws_dbg(elem->ctx, + "Allocating action STEs skip_rx %d skip_tx %d order %d\n", + skip_rx, skip_tx, chunk->ste.order); + + found = false; + list_for_each_entry(action_tbl, &elem->available, list_node) { + if (!hws_action_ste_table_chunk_alloc(action_tbl, chunk)) { + found = true; + break; + } + } + + if (!found) { + action_tbl = hws_action_ste_table_alloc(elem); + if (IS_ERR(action_tbl)) { + err = PTR_ERR(action_tbl); + goto out; + } + + err = hws_action_ste_table_chunk_alloc(action_tbl, chunk); + if (err) + goto out; + } + + if (mlx5hws_pool_empty(action_tbl->pool)) + list_move(&action_tbl->list_node, &elem->full); + + err = 0; + +out: + mutex_unlock(&pool->lock); + + return err; +} + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk) +{ + struct mutex *lock = &chunk->action_tbl->parent_elem->parent_pool->lock; + + mlx5hws_dbg(chunk->action_tbl->pool->ctx, + "Freeing action STEs offset %d order %d\n", + chunk->ste.offset, chunk->ste.order); + + mutex_lock(lock); + mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste); + chunk->action_tbl->last_used = jiffies; + list_move(&chunk->action_tbl->list_node, + &chunk->action_tbl->parent_elem->available); + mutex_unlock(lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h new file mode 100644 index 000000000000..a8ba97359e31 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#ifndef ACTION_STE_POOL_H_ +#define ACTION_STE_POOL_H_ + +#define MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ 10 +#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1 +#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20 + +#define MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS 300 +#define MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS 300 + +struct mlx5hws_action_ste_pool_element; + +struct mlx5hws_action_ste_table { + struct mlx5hws_action_ste_pool_element *parent_elem; + /* Wraps the RTC and STE range for this given action. */ + struct mlx5hws_pool *pool; + /* Match STEs use this STC to jump to this pool's RTC. */ + struct mlx5hws_pool_chunk stc; + u32 rtc_0_id; + u32 rtc_1_id; + struct list_head list_node; + unsigned long last_used; +}; + +struct mlx5hws_action_ste_pool_element { + struct mlx5hws_context *ctx; + struct mlx5hws_action_ste_pool *parent_pool; + size_t log_sz; /* Size of the largest table so far. */ + enum mlx5hws_pool_optimize opt; + struct list_head available; + struct list_head full; +}; + +/* Central repository of action STEs. The context contains one of these pools + * per queue. + */ +struct mlx5hws_action_ste_pool { + /* Protects the entire pool. We have one pool per queue and only one + * operation can be active per rule at a given time. Thus this lock + * protects solely against concurrent garbage collection and we expect + * very little contention. + */ + struct mutex lock; + struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX]; +}; + +/* A chunk of STEs and the table it was allocated from. Used by rules. */ +struct mlx5hws_action_ste_chunk { + struct mlx5hws_action_ste_table *action_tbl; + struct mlx5hws_pool_chunk ste; +}; + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx); + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx); + +/* Callers are expected to fill chunk->ste.order. On success, this function + * populates chunk->tbl and chunk->ste.offset. + */ +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk); + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk); + +#endif /* ACTION_STE_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 19dce1ba512d..9e057f808ea5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -46,10 +46,14 @@ static void hws_bwc_unlock_all_queues(struct mlx5hws_context *ctx) } } -static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, +static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 priority, - u8 size_log) + u8 size_log, + struct mlx5hws_matcher_attr *attr) { + struct mlx5hws_bwc_matcher *first_matcher = + bwc_matcher->complex_first_bwc_matcher; + memset(attr, 0, sizeof(*attr)); attr->priority = priority; @@ -61,6 +65,9 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, attr->rule.num_log = size_log; attr->resizable = true; attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + + attr->isolated_matcher_end_ft_id = + first_matcher ? first_matcher->matcher->end_ft_id : 0; } int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, @@ -83,20 +90,27 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, for (i = 0; i < bwc_queues; i++) INIT_LIST_HEAD(&bwc_matcher->rules[i]); - hws_bwc_matcher_init_attr(&attr, + hws_bwc_matcher_init_attr(bwc_matcher, priority, - MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG); + MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG, + &attr); bwc_matcher->priority = priority; bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, + sizeof(*bwc_matcher->at), GFP_KERNEL); + if (!bwc_matcher->at) + goto free_bwc_matcher_rules; + /* create dummy action template */ bwc_matcher->at[0] = mlx5hws_action_template_create(action_types ? action_types : init_action_types); if (!bwc_matcher->at[0]) { mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n"); - goto free_bwc_matcher_rules; + goto free_bwc_matcher_at_array; } bwc_matcher->num_of_at = 1; @@ -126,6 +140,8 @@ free_mt: mlx5hws_match_template_destroy(bwc_matcher->mt); free_at: mlx5hws_action_template_destroy(bwc_matcher->at[0]); +free_bwc_matcher_at_array: + kfree(bwc_matcher->at); free_bwc_matcher_rules: kfree(bwc_matcher->rules); err: @@ -153,6 +169,7 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, return NULL; atomic_set(&bwc_matcher->num_of_rules, 0); + atomic_set(&bwc_matcher->rehash_required, false); /* Check if the required match params can be all matched * in single STE, otherwise complex matcher is needed. @@ -192,6 +209,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) for (i = 0; i < bwc_matcher->num_of_at; i++) mlx5hws_action_template_destroy(bwc_matcher->at[i]); + kfree(bwc_matcher->at); mlx5hws_match_template_destroy(bwc_matcher->mt); kfree(bwc_matcher->rules); @@ -208,16 +226,19 @@ int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) "BWC matcher destroy: matcher still has %d rules\n", num_of_rules); - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + if (bwc_matcher->complex) + mlx5hws_bwc_matcher_destroy_complex(bwc_matcher); + else + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); kfree(bwc_matcher); return 0; } -static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, - u16 queue_id, - u32 *pending_rules, - bool drain) +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain) { unsigned long timeout = jiffies + secs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT); @@ -320,16 +341,12 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - atomic_inc(&bwc_matcher->num_of_rules); bwc_rule->bwc_queue_idx = idx; list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]); } static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - - atomic_dec(&bwc_matcher->num_of_rules); list_del_init(&bwc_rule->list_node); } @@ -352,7 +369,8 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); if (unlikely(ret)) return ret; @@ -382,6 +400,7 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) mutex_lock(queue_lock); ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr); + atomic_dec(&bwc_matcher->num_of_rules); hws_bwc_rule_list_remove(bwc_rule); mutex_unlock(queue_lock); @@ -391,9 +410,13 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule) { - int ret; + bool is_complex = !!bwc_rule->bwc_matcher->complex; + int ret = 0; - ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (is_complex) + ret = mlx5hws_bwc_rule_destroy_complex(bwc_rule); + else + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); mlx5hws_bwc_rule_free(bwc_rule); return ret; @@ -433,9 +456,8 @@ hws_bwc_rule_create_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); - - return ret; + return mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); } static int @@ -456,7 +478,8 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); if (unlikely(ret)) mlx5hws_err(ctx, "Failed updating BWC rule (%d)\n", ret); @@ -469,21 +492,9 @@ hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher) struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps; /* check the match RTC size */ - if ((bwc_matcher->size_log + - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > - (caps->ste_alloc_log_max - 1)) - return true; - - /* check the action RTC size */ - if ((bwc_matcher->size_log + - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP + - ilog2(roundup_pow_of_two(bwc_matcher->matcher->action_ste.max_stes)) + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT) > - (caps->ste_alloc_log_max - 1)) - return true; - - return false; + return (bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > + (caps->ste_alloc_log_max - 1); } static bool @@ -520,6 +531,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_rule_action rule_actions[]) { enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS]; + void *p; + + if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) { + if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + bwc_matcher->size_of_at_array *= 2; + p = krealloc(bwc_matcher->at, + bwc_matcher->size_of_at_array * + sizeof(*bwc_matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + bwc_matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + bwc_matcher->at = p; + } hws_bwc_rule_actions_to_action_types(rule_actions, action_types); @@ -582,100 +610,79 @@ hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher, static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) { + bool move_error = false, poll_error = false, drain_error = false; struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; u16 bwc_queues = mlx5hws_bwc_queues(ctx); - struct mlx5hws_bwc_rule **bwc_rules; struct mlx5hws_rule_attr rule_attr; - u32 *pending_rules; - int i, j, ret = 0; - bool all_done; - u16 burst_th; + struct mlx5hws_bwc_rule *bwc_rule; + struct mlx5hws_send_engine *queue; + struct list_head *rules_list; + u32 pending_rules; + int i, ret = 0; mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); - pending_rules = kcalloc(bwc_queues, sizeof(*pending_rules), GFP_KERNEL); - if (!pending_rules) - return -ENOMEM; - - bwc_rules = kcalloc(bwc_queues, sizeof(*bwc_rules), GFP_KERNEL); - if (!bwc_rules) { - ret = -ENOMEM; - goto free_pending_rules; - } - for (i = 0; i < bwc_queues; i++) { if (list_empty(&bwc_matcher->rules[i])) - bwc_rules[i] = NULL; - else - bwc_rules[i] = list_first_entry(&bwc_matcher->rules[i], - struct mlx5hws_bwc_rule, - list_node); - } - - do { - all_done = true; + continue; - for (i = 0; i < bwc_queues; i++) { - rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - burst_th = hws_bwc_get_burst_th(ctx, rule_attr.queue_id); + pending_rules = 0; + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + rules_list = &bwc_matcher->rules[i]; - for (j = 0; j < burst_th && bwc_rules[i]; j++) { - rule_attr.burst = !!((j + 1) % burst_th); - ret = mlx5hws_matcher_resize_rule_move(bwc_matcher->matcher, - bwc_rules[i]->rule, - &rule_attr); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", - ret); - goto free_bwc_rules; - } - - all_done = false; - pending_rules[i]++; - bwc_rules[i] = list_is_last(&bwc_rules[i]->list_node, - &bwc_matcher->rules[i]) ? - NULL : list_next_entry(bwc_rules[i], list_node); + list_for_each_entry(bwc_rule, rules_list, list_node) { + ret = mlx5hws_matcher_resize_rule_move(matcher, + bwc_rule->rule, + &rule_attr); + if (unlikely(ret && !move_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = true; + } - ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id, - &pending_rules[i], false); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", - ret); - goto free_bwc_rules; - } + pending_rules++; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + false); + if (unlikely(ret && !poll_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = true; } } - } while (!all_done); - /* drain all the bwc queues */ - for (i = 0; i < bwc_queues; i++) { - if (pending_rules[i]) { - u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - - mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]); - ret = hws_bwc_queue_poll(ctx, queue_id, - &pending_rules[i], true); - if (unlikely(ret)) { + if (pending_rules) { + queue = &ctx->send_queue[rule_attr.queue_id]; + mlx5hws_send_engine_flush_queue(queue); + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + true); + if (unlikely(ret && !drain_error)) { mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", ret); - goto free_bwc_rules; + "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = true; } } } -free_bwc_rules: - kfree(bwc_rules); -free_pending_rules: - kfree(pending_rules); + if (move_error || poll_error || drain_error) + ret = -EINVAL; return ret; } static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) { - return hws_bwc_matcher_move_all_simple(bwc_matcher); + if (!bwc_matcher->complex) + return hws_bwc_matcher_move_all_simple(bwc_matcher); + + return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); } static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) @@ -686,9 +693,10 @@ static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) struct mlx5hws_matcher *new_matcher; int ret; - hws_bwc_matcher_init_attr(&matcher_attr, + hws_bwc_matcher_init_attr(bwc_matcher, bwc_matcher->priority, - bwc_matcher->size_log); + bwc_matcher->size_log, + &matcher_attr); old_matcher = bwc_matcher->matcher; new_matcher = mlx5hws_matcher_create(old_matcher->tbl, @@ -708,15 +716,18 @@ static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) } ret = hws_bwc_matcher_move_all(bwc_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: moving rules failed\n"); - return -ENOMEM; - } + if (ret) + mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n"); + + /* Error during rehash can't be rolled back. + * The best option here is to allow the rehash to complete and remove + * the old matcher - can't leave the matcher in the 'in_resize' state. + */ bwc_matcher->matcher = new_matcher; mlx5hws_matcher_destroy(old_matcher); - return 0; + return ret; } static int @@ -733,9 +744,9 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) /* It is possible that other rule has already performed rehash. * Need to check again if we really need rehash. - * If the reason for rehash was size, but not any more - skip rehash. */ - if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, + if (!atomic_read(&bwc_matcher->rehash_required) && + !hws_bwc_matcher_rehash_size_needed(bwc_matcher, atomic_read(&bwc_matcher->num_of_rules))) return 0; @@ -746,6 +757,8 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) * - destroy the old matcher */ + atomic_set(&bwc_matcher->rehash_required, false); + ret = hws_bwc_matcher_extend_size(bwc_matcher); if (ret) return ret; @@ -753,17 +766,51 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) return hws_bwc_matcher_move(bwc_matcher); } -static int -hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher) +static int hws_bwc_rule_get_at_idx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_rule_action rule_actions[], + u16 bwc_queue_idx) { - /* Rehash by action template doesn't require any additional checking. - * The bwc_matcher already contains the new action template. - * Just do the usual rehash: - * - create new matcher - * - move all the rules to the new matcher - * - destroy the old matcher - */ - return hws_bwc_matcher_move(bwc_matcher); + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ + int at_idx, ret; + + /* check if rehash needed due to missing action template */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (likely(at_idx >= 0)) + return at_idx; + + /* we need to extend BWC matcher action templates array */ + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + hws_bwc_lock_all_queues(ctx); + + /* check again - perhaps other thread already did extend_at */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (at_idx >= 0) + goto out; + + ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed extending AT (%d)", ret); + at_idx = -EINVAL; + goto out; + } + + /* action templates array was extended, we need the last idx */ + at_idx = bwc_matcher->num_of_at - 1; + ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, + bwc_matcher->at[at_idx]); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed attaching new AT (%d)", ret); + at_idx = -EINVAL; + goto out; + } + +out: + hws_bwc_unlock_all_queues(ctx); + mutex_lock(queue_lock); + return at_idx; } int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, @@ -786,50 +833,16 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, bwc_queue_idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - return ret; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule insertion: rehash AT failed (%d)\n", ret); - return ret; - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule create: failed getting AT (%d)", + ret); + return -EINVAL; } /* check if number of rules require rehash */ - num_of_rules = atomic_read(&bwc_matcher->num_of_rules); + num_of_rules = atomic_inc_return(&bwc_matcher->num_of_rules); if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) { mutex_unlock(queue_lock); @@ -843,6 +856,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, bwc_matcher->size_log, ret); + atomic_dec(&bwc_matcher->num_of_rules); return ret; } @@ -867,6 +881,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, * Try rehash by size and insert rule again - last chance. */ + atomic_set(&bwc_matcher->rehash_required, true); mutex_unlock(queue_lock); hws_bwc_lock_all_queues(ctx); @@ -875,6 +890,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (ret) { mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret); + atomic_dec(&bwc_matcher->num_of_rules); return ret; } @@ -890,6 +906,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) { mutex_unlock(queue_lock); mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret); + atomic_dec(&bwc_matcher->num_of_rules); return ret; } @@ -921,11 +938,18 @@ mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher, bwc_queue_idx = hws_bwc_gen_queue_idx(ctx); - ret = mlx5hws_bwc_rule_create_simple(bwc_rule, - params->match_buf, - rule_actions, - flow_source, - bwc_queue_idx); + if (bwc_matcher->complex) + ret = mlx5hws_bwc_rule_create_complex(bwc_rule, + params, + flow_source, + rule_actions, + bwc_queue_idx); + else + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions, + flow_source, + bwc_queue_idx); if (unlikely(ret)) { mlx5hws_bwc_rule_free(bwc_rule); return NULL; @@ -952,52 +976,11 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - /* check again - perhaps other thread already did extend_at */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); - if (likely(at_idx < 0)) { - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - mlx5hws_err(ctx, "BWC rule update: failed extending AT (%d)", ret); - return -EINVAL; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule update: rehash AT failed (%d)\n", - ret); - return ret; - } - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule update: failed getting AT\n"); + return -EINVAL; } ret = hws_bwc_rule_update_sync(bwc_rule, @@ -1023,5 +1006,10 @@ int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, return -EINVAL; } - return hws_bwc_rule_action_update(bwc_rule, rule_actions); + /* For complex rule, the update should happen on the second matcher */ + if (bwc_rule->isolated_bwc_rule) + return hws_bwc_rule_action_update(bwc_rule->isolated_bwc_rule, + rule_actions); + else + return hws_bwc_rule_action_update(bwc_rule, rule_actions); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 47f7ed141553..d21fc247a510 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -10,9 +10,7 @@ #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 /* Max number of AT attach operations for the same matcher. - * When the limit is reached, next attempt to attach new AT - * will result in creation of a new matcher and moving all - * the rules to this matcher. + * When the limit is reached, a larger buffer is allocated for the ATs. */ #define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 @@ -20,20 +18,27 @@ #define MLX5HWS_BWC_POLLING_TIMEOUT 60 +struct mlx5hws_bwc_matcher_complex_data; struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; - struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; - u32 priority; + struct mlx5hws_action_template **at; + struct mlx5hws_bwc_matcher_complex_data *complex; + struct mlx5hws_bwc_matcher *complex_first_bwc_matcher; u8 num_of_at; + u8 size_of_at_array; u8 size_log; + u32 priority; atomic_t num_of_rules; + atomic_t rehash_required; struct list_head *rules; }; struct mlx5hws_bwc_rule { struct mlx5hws_bwc_matcher *bwc_matcher; struct mlx5hws_rule *rule; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + struct mlx5hws_bwc_complex_rule_hash_node *complex_hash_node; u16 bwc_queue_idx; struct list_head list_node; }; @@ -65,6 +70,11 @@ void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 flow_source, struct mlx5hws_rule_attr *rule_attr); +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain); + static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx) { /* Besides the control queue, half of the queues are diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 9fb059a6511f..5d30c5b094fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -3,6 +3,22 @@ #include "internal.h" +#define HWS_CLEAR_MATCH_PARAM(mask, field) \ + MLX5_SET(fte_match_param, (mask)->match_buf, field, 0) + +#define HWS_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) + +static const struct rhashtable_params hws_refcount_hash = { + .key_len = sizeof_field(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .key_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .head_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + hash_node), + .automatic_shrinking = true, + .min_size = 1, +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) @@ -48,20 +64,1078 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, return is_complex; } +static void +hws_bwc_matcher_complex_params_clear_fld(struct mlx5hws_context *ctx, + enum mlx5hws_definer_fname fname, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_cmd_query_caps *caps = ctx->caps; + + switch (fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_O: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_I: + /* Because of the strict requirements for IP address matching + * that require ethtype/ip_version matching as well, don't clear + * these fields - have them in both parts of the complex matcher + */ + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_sport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_sport); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_TCP_FLAGS_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_flags); + break; + case MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM: + case MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_ack_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_ack_num); + break; + case MLX5HWS_DEFINER_FNAME_GTP_TEID: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_teid); + break; + case MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_flags); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.gtpu_first_ext_dw_0); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_0); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_DW2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_2); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_gre); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_udp); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.geneve_tlv_option_0_data); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_1); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_3); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_3); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.vxlan_vni); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_flags); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD0: + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_next_protocol); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_vni); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_opt_len); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OAM: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_oam); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_protocol_type); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_vni); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_QP: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_sqn); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_GVMI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_port); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.source_eswitch_owner_vhca_id); + break; + case MLX5HWS_DEFINER_FNAME_REG_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_0); + break; + case MLX5HWS_DEFINER_FNAME_REG_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_1); + break; + case MLX5HWS_DEFINER_FNAME_REG_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_2); + break; + case MLX5HWS_DEFINER_FNAME_REG_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_3); + break; + case MLX5HWS_DEFINER_FNAME_REG_4: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_4); + break; + case MLX5HWS_DEFINER_FNAME_REG_5: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_5); + break; + case MLX5HWS_DEFINER_FNAME_REG_7: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_7); + break; + case MLX5HWS_DEFINER_FNAME_REG_A: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_a); + break; + case MLX5HWS_DEFINER_FNAME_GRE_C: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_c_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_K: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_k_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_S: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_s_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_protocol); + break; + case MLX5HWS_DEFINER_FNAME_GRE_OPT_KEY: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_key.key); + break; + case MLX5HWS_DEFINER_FNAME_ICMP_DW1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_code); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.icmpv6_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_code); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.outer_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.inner_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_0); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_1); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_2); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_3); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK: + /* assuming this is flex parser for geneve option */ + if ((fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 0) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 1) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 2) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 3) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 4) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 5) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 6) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 7)) { + mlx5hws_err(ctx, + "Complex params: unsupported field %s (%d), flex parser ID for geneve is %d\n", + mlx5hws_definer_fname_to_str(fname), fname, + caps->flex_parser_id_geneve_tlv_option_0); + break; + } + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_tlv_option_0_exist); + break; + case MLX5HWS_DEFINER_FNAME_REG_6: + default: + mlx5hws_err(ctx, "Complex params: unsupported field %s (%d)\n", + mlx5hws_definer_fname_to_str(fname), fname); + break; + } +} + +static bool +hws_bwc_matcher_complex_params_comb_is_valid(struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool m1[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool m2[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool is_first_matcher; + int i; + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + if (is_first_matcher) + m1[fc[i].fname] = true; + else + m2[fc[i].fname] = true; + } + + /* Not all the fields can be split into separate matchers. + * Some should be together on the same matcher. + * For example, IPv6 parts - the whole IPv6 address should be on the + * same matcher in order for us to deduce if it's IPv6 or IPv4 address. + */ + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + /* Don't split outer IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O])) + return false; + + /* Don't split outer IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O])) + return false; + + /* Don't split inner IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I])) + return false; + + /* Don't split inner IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I])) + return false; + + /* Don't split GRE parameters. */ + if ((m1[MLX5HWS_DEFINER_FNAME_GRE_C] || + m1[MLX5HWS_DEFINER_FNAME_GRE_K] || + m1[MLX5HWS_DEFINER_FNAME_GRE_S] || + m1[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL]) && + (m2[MLX5HWS_DEFINER_FNAME_GRE_C] || + m2[MLX5HWS_DEFINER_FNAME_GRE_K] || + m2[MLX5HWS_DEFINER_FNAME_GRE_S] || + m2[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL])) + return false; + + /* Don't split TCP ack/seq numbers. */ + if ((m1[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m1[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM]) && + (m2[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m2[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM])) + return false; + + /* Don't split flex parser. */ + if ((m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7]) && + (m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7])) + return false; + + return true; +} + +static void +hws_bwc_matcher_complex_params_comb_create(struct mlx5hws_context *ctx, + struct mlx5hws_match_parameters *m, + struct mlx5hws_match_parameters *m1, + struct mlx5hws_match_parameters *m2, + struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool is_first_matcher; + int i; + + memcpy(m1->match_buf, m->match_buf, m->match_sz); + memcpy(m2->match_buf, m->match_buf, m->match_sz); + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + hws_bwc_matcher_complex_params_clear_fld(ctx, + fc[i].fname, + is_first_matcher ? + m2 : m1); + } + + MLX5_SET(fte_match_param, m2->match_buf, + misc_parameters_2.metadata_reg_c_6, -1); +} + +static void +hws_bwc_matcher_complex_params_destroy(struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + kfree(mask_1->match_buf); + kfree(mask_2->match_buf); +} + +static int +hws_bwc_matcher_complex_params_create(struct mlx5hws_context *ctx, + u8 match_criteria, + struct mlx5hws_match_parameters *mask, + struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + struct mlx5hws_definer_fc *fc; + u32 num_of_combinations; + int fc_sz = 0; + int res = 0; + u32 i; + + if (MLX5_GET(fte_match_param, mask->match_buf, + misc_parameters_2.metadata_reg_c_6)) { + mlx5hws_err(ctx, "Complex matcher: REG_C_6 matching is reserved\n"); + res = -EINVAL; + goto out; + } + + mask_1->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + mask_2->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (!mask_1->match_buf || !mask_2->match_buf) { + mlx5hws_err(ctx, "Complex matcher: failed to allocate match_param\n"); + res = -ENOMEM; + goto free_params; + } + + mask_1->match_sz = mask->match_sz; + mask_2->match_sz = mask->match_sz; + + fc = mlx5hws_definer_conv_match_params_to_compressed_fc(ctx, + match_criteria, + mask->match_buf, + &fc_sz); + if (!fc) { + res = -ENOMEM; + goto free_params; + } + + if (fc_sz >= sizeof(num_of_combinations) * BITS_PER_BYTE) { + mlx5hws_err(ctx, + "Complex matcher: too many match parameters (%d)\n", + fc_sz); + res = -EINVAL; + goto free_fc; + } + + /* We have list of all the match fields from the match parameter. + * Now try all the possibilities of splitting them into two match + * buffers and look for the supported combination. + */ + num_of_combinations = 1 << fc_sz; + + /* Start from combination at index 1 - we know that 0 is unsupported */ + for (i = 1; i < num_of_combinations; i++) { + if (!hws_bwc_matcher_complex_params_comb_is_valid(fc, fc_sz, i)) + continue; + + hws_bwc_matcher_complex_params_comb_create(ctx, + mask, mask_1, mask_2, + fc, fc_sz, i); + /* We now have two separate sets of match params. + * Check if each of them can be used in its own matcher. + */ + if (!mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_1) && + !mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_2)) + break; + } + + if (i == num_of_combinations) { + /* We've scanned all the combinations, but to no avail */ + mlx5hws_err(ctx, "Complex matcher: couldn't find match params combination\n"); + res = -EINVAL; + goto free_fc; + } + + kfree(fc); + return 0; + +free_fc: + kfree(fc); +free_params: + hws_bwc_matcher_complex_params_destroy(mask_1, mask_2); +out: + return res; +} + +static int +hws_bwc_isolated_table_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_table_attr tbl_attr = {0}; + struct mlx5hws_table *isolated_tbl; + int ret = 0; + + tbl_attr.type = table->type; + tbl_attr.level = table->level; + + bwc_matcher->complex->isolated_tbl = + mlx5hws_table_create(ctx, &tbl_attr); + isolated_tbl = bwc_matcher->complex->isolated_tbl; + if (!isolated_tbl) + return -EINVAL; + + /* Set the default miss of the isolated table to + * point to the end anchor of the original matcher. + */ + mlx5hws_cmd_set_attr_connect_miss_tbl(ctx, + isolated_tbl->fw_ft_type, + isolated_tbl->type, + &ft_attr); + ft_attr.table_miss_id = bwc_matcher->matcher->end_ft_id; + + ret = mlx5hws_cmd_flow_table_modify(ctx->mdev, + &ft_attr, + isolated_tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Failed setting isolated tbl default miss\n"); + goto destroy_tbl; + } + + return 0; + +destroy_tbl: + mlx5hws_table_destroy(isolated_tbl); + return ret; +} + +static void hws_bwc_isolated_table_destroy(struct mlx5hws_table *isolated_tbl) +{ + /* This table is isolated - no table is pointing to it, no need to + * disconnect it from anywhere, it won't affect any other table's miss. + */ + mlx5hws_table_destroy(isolated_tbl); +} + +static int +hws_bwc_isolated_matcher_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table, + u8 match_criteria_enable, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + isolated_bwc_matcher = kzalloc(sizeof(*bwc_matcher), GFP_KERNEL); + if (!isolated_bwc_matcher) + return -ENOMEM; + + bwc_matcher->complex->isolated_bwc_matcher = isolated_bwc_matcher; + + /* Isolated BWC matcher needs access to the first BWC matcher */ + isolated_bwc_matcher->complex_first_bwc_matcher = bwc_matcher; + + /* Isolated matcher needs to match on REG_C_6, + * so make sure its criteria bit is on. + */ + match_criteria_enable |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; + + ret = mlx5hws_bwc_matcher_create_simple(isolated_bwc_matcher, + isolated_tbl, + 0, + match_criteria_enable, + mask, + NULL); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated BWC matcher\n"); + goto free_matcher; + } + + return 0; + +free_matcher: + kfree(bwc_matcher->complex->isolated_bwc_matcher); + return ret; +} + +static void +hws_bwc_isolated_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + kfree(bwc_matcher); +} + +static int +hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_action_mh_pattern ptrn; + int ret = 0; + + /* Create action to jump to isolated table */ + + bwc_matcher->complex->action_go_to_tbl = + mlx5hws_action_create_dest_table(ctx, + isolated_tbl, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_go_to_tbl) { + mlx5hws_err(ctx, "Complex matcher: failed to create go-to-tbl action\n"); + return -EINVAL; + } + + /* Create modify header action to set REG_C_6 */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, data, 0); + + ptrn.data = (void *)modify_hdr_action; + ptrn.sz = MLX5HWS_ACTION_DOUBLE_SIZE; + + bwc_matcher->complex->action_metadata = + mlx5hws_action_create_modify_header(ctx, 1, &ptrn, 0, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_metadata) { + ret = -EINVAL; + goto destroy_action_go_to_tbl; + } + + /* Create last action */ + + bwc_matcher->complex->action_last = + mlx5hws_action_create_last(ctx, MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_last) { + mlx5hws_err(ctx, "Complex matcher: failed to create last action\n"); + ret = -EINVAL; + goto destroy_action_metadata; + } + + return 0; + +destroy_action_metadata: + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); +destroy_action_go_to_tbl: + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); + return ret; +} + +static void +hws_bwc_isolated_actions_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_action_destroy(bwc_matcher->complex->action_last); + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); +} + int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_table *table, u32 priority, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) { - mlx5hws_err(table->ctx, "Complex matcher is not supported yet\n"); - return -EOPNOTSUPP; + enum mlx5hws_action_type complex_init_action_types[3]; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_match_parameters mask_1 = {0}; + struct mlx5hws_match_parameters mask_2 = {0}; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + ret = hws_bwc_matcher_complex_params_create(table->ctx, + match_criteria_enable, + mask, &mask_1, &mask_2); + if (ret) + goto err; + + bwc_matcher->complex = + kzalloc(sizeof(*bwc_matcher->complex), GFP_KERNEL); + if (!bwc_matcher->complex) { + ret = -ENOMEM; + goto free_masks; + } + + ret = rhashtable_init(&bwc_matcher->complex->refcount_hash, + &hws_refcount_hash); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed to initialize rhashtable\n"); + goto free_complex; + } + + mutex_init(&bwc_matcher->complex->hash_lock); + ida_init(&bwc_matcher->complex->metadata_ida); + + /* Create initial action template for the first matcher. + * Usually the initial AT is just dummy, but in case of complex + * matcher we know exactly which actions should it have. + */ + + complex_init_action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; + complex_init_action_types[1] = MLX5HWS_ACTION_TYP_TBL; + complex_init_action_types[2] = MLX5HWS_ACTION_TYP_LAST; + + /* Create the first matcher */ + + ret = mlx5hws_bwc_matcher_create_simple(bwc_matcher, + table, + priority, + match_criteria_enable, + &mask_1, + complex_init_action_types); + if (ret) + goto destroy_ida; + + /* Create isolated table to hold the second isolated matcher */ + + ret = hws_bwc_isolated_table_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated table\n"); + goto destroy_first_matcher; + } + + /* Now create the second BWC matcher - the isolated one */ + + ret = hws_bwc_isolated_matcher_create(bwc_matcher, table, + match_criteria_enable, &mask_2); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated matcher\n"); + goto destroy_isolated_tbl; + } + + /* Create action for isolated matcher's rules */ + + ret = hws_bwc_isolated_actions_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated actions\n"); + goto destroy_isolated_matcher; + } + + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); + return 0; + +destroy_isolated_matcher: + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); +destroy_isolated_tbl: + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); +destroy_first_matcher: + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); +destroy_ida: + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); +free_complex: + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +free_masks: + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); +err: + return ret; } void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { - /* nothing to do here */ + struct mlx5hws_bwc_matcher *isolated_bwc_matcher = + bwc_matcher->complex->isolated_bwc_matcher; + + hws_bwc_isolated_actions_destroy(bwc_matcher); + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +} + +static void +hws_bwc_matcher_complex_hash_lock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_lock(&bwc_matcher->complex->hash_lock); +} + +static void +hws_bwc_matcher_complex_hash_unlock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_unlock(&bwc_matcher->complex->hash_lock); +} + +static int +hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_match_parameters *params) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; + struct rhashtable *refcount_hash; + int i; + + bwc_rule->complex_hash_node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + + node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + refcount_set(&node->refcount, 1); + + /* Clear match buffer - turn off all the unrelated fields + * in accordance with the match params mask for the first + * matcher out of the two parts of the complex matcher. + * The resulting mask is the key for the hash. + */ + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + node->match_buf[i] = params->match_buf[i] & + bwc_matcher->mt->match_param[i]; + + refcount_hash = &bwc_matcher->complex->refcount_hash; + old_node = rhashtable_lookup_get_insert_fast(refcount_hash, + &node->hash_node, + hws_refcount_hash); + if (old_node) { + /* Rule with the same tag already exists - update refcount */ + refcount_inc(&old_node->refcount); + /* Let the new rule use the same tag as the existing rule. + * Note that we don't have any indication for the rule creation + * process that a rule with similar matching params already + * exists - no harm done when this rule is be overwritten by + * the same STE. + * There's some performance advantage in skipping such cases, + * so this is left for future optimizations. + */ + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + node = old_node; + } + + bwc_rule->complex_hash_node = node; + return 0; +} + +static void +hws_bwc_rule_complex_hash_node_put(struct mlx5hws_bwc_rule *bwc_rule, + bool *is_last_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node; + + if (is_last_rule) + *is_last_rule = false; + + node = bwc_rule->complex_hash_node; + if (refcount_dec_and_test(&node->refcount)) { + rhashtable_remove_fast(&bwc_matcher->complex->refcount_hash, + &node->hash_node, + hws_refcount_hash); + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + if (is_last_rule) + *is_last_rule = true; + } + + bwc_rule->complex_hash_node = NULL; } int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, @@ -70,19 +1144,271 @@ int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_action rule_actions[], u16 bwc_queue_idx) { - mlx5hws_err(bwc_rule->bwc_matcher->matcher->tbl->ctx, - "Complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_rule_action rule_actions_1[3] = {0}; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + u32 *match_buf_2; + u32 metadata_val; + int ret = 0; + + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + bwc_rule->isolated_bwc_rule = + mlx5hws_bwc_rule_alloc(isolated_bwc_matcher); + if (unlikely(!bwc_rule->isolated_bwc_rule)) + return -ENOMEM; + + hws_bwc_matcher_complex_hash_lock(bwc_matcher); + + /* Get a new hash node for this complex rule. + * If this is a unique set of match params for the first matcher, + * we will get a new hash node with newly allocated IDA. + * Otherwise we will get an existing node with IDA and updated refcount. + */ + ret = hws_bwc_rule_complex_hash_node_get(bwc_rule, params); + if (unlikely(ret)) { + mlx5hws_err(ctx, "Complex rule: failed getting RHT node for this rule\n"); + goto free_isolated_rule; + } + + /* No need to clear match buffer's fields in accordance to what + * will actually be matched on first and second matchers. + * Both matchers were created with the appropriate masks + * and each of them holds the appropriate field copy array, + * so rule creation will use only the fields that will be copied + * in accordance with setters in field copy array. + * We do, however, need to temporary allocate match buffer + * for the second (isolated) rule in order to not modify + * user's match params buffer. + */ + + match_buf_2 = kmemdup(params->match_buf, + MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (unlikely(!match_buf_2)) { + mlx5hws_err(ctx, "Complex rule: failed allocating match_buf\n"); + ret = ENOMEM; + goto hash_node_put; + } + + /* On 2nd matcher, use unique 32-bit ID as a matching tag */ + metadata_val = bwc_rule->complex_hash_node->tag; + MLX5_SET(fte_match_param, match_buf_2, + misc_parameters_2.metadata_reg_c_6, metadata_val); + + /* Isolated rule's rule_actions contain all the original actions */ + ret = mlx5hws_bwc_rule_create_simple(bwc_rule->isolated_bwc_rule, + match_buf_2, + rule_actions, + flow_source, + bwc_queue_idx); + kfree(match_buf_2); + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating isolated BWC rule (%d)\n", + ret); + goto hash_node_put; + } + + /* First rule's rule_actions contain setting metadata and + * jump to isolated table that contains the second matcher. + * Set metadata value to a unique value for this rule. + */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, + offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, + data, metadata_val); + + rule_actions_1[0].action = bwc_matcher->complex->action_metadata; + rule_actions_1[0].modify_header.offset = 0; + rule_actions_1[0].modify_header.data = modify_hdr_action; + + rule_actions_1[1].action = bwc_matcher->complex->action_go_to_tbl; + rule_actions_1[2].action = bwc_matcher->complex->action_last; + + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions_1, + flow_source, + bwc_queue_idx); + + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating first BWC rule (%d)\n", + ret); + goto destroy_isolated_rule; + } + + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + + return 0; + +destroy_isolated_rule: + mlx5hws_bwc_rule_destroy_simple(bwc_rule->isolated_bwc_rule); +hash_node_put: + hws_bwc_rule_complex_hash_node_put(bwc_rule, NULL); +free_isolated_rule: + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + mlx5hws_bwc_rule_free(bwc_rule->isolated_bwc_rule); + return ret; } int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule) { - return 0; + struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + int ret_isolated, ret; + bool is_last_rule; + + hws_bwc_matcher_complex_hash_lock(bwc_rule->bwc_matcher); + + hws_bwc_rule_complex_hash_node_put(bwc_rule, &is_last_rule); + bwc_rule->rule->skip_delete = !is_last_rule; + + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (unlikely(ret)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying first rule\n"); + + isolated_bwc_rule = bwc_rule->isolated_bwc_rule; + ret_isolated = mlx5hws_bwc_rule_destroy_simple(isolated_bwc_rule); + if (unlikely(ret_isolated)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying second (isolated) rule\n"); + + hws_bwc_matcher_complex_hash_unlock(bwc_rule->bwc_matcher); + + mlx5hws_bwc_rule_free(isolated_bwc_rule); + + return ret || ret_isolated; +} + +static void +hws_bwc_matcher_clear_hash_rtcs(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_bwc_complex_rule_hash_node *node; + struct rhashtable_iter iter; + + rhashtable_walk_enter(&bwc_matcher->complex->refcount_hash, &iter); + rhashtable_walk_start(&iter); + + while ((node = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(node)) + continue; + node->rtc_valid = false; + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +int +mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { - mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "Moving complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + bool move_error = false, poll_error = false; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_bwc_rule *tmp_bwc_rule; + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_table *isolated_tbl; + struct mlx5hws_rule *tmp_rule; + struct list_head *rules_list; + u32 expected_completions = 1; + u32 end_ft_id; + int i, ret; + + /* We are rehashing the matcher that is the first part of the complex + * matcher. Need to update the isolated matcher to point to the end_ft + * of this new matcher. This needs to be done before moving any rules + * to prevent possible steering loops. + */ + isolated_tbl = bwc_matcher->complex->isolated_tbl; + end_ft_id = bwc_matcher->matcher->resize_dst->end_ft_id; + ret = mlx5hws_matcher_update_end_ft_isolated(isolated_tbl, end_ft_id); + if (ret) { + mlx5hws_err(ctx, + "Failed updating end_ft of isolated matcher (%d)\n", + ret); + return ret; + } + + hws_bwc_matcher_clear_hash_rtcs(bwc_matcher); + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + rules_list = &bwc_matcher->rules[i]; + if (list_empty(rules_list)) + continue; + + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + + list_for_each_entry(tmp_bwc_rule, rules_list, list_node) { + /* Check if a rule with similar tag has already + * been moved. + */ + if (tmp_bwc_rule->complex_hash_node->rtc_valid) { + /* This rule is a duplicate of rule with similar + * tag that has already been moved earlier. + * Just update this rule's RTCs. + */ + tmp_bwc_rule->rule->rtc_0 = + tmp_bwc_rule->complex_hash_node->rtc_0; + tmp_bwc_rule->rule->rtc_1 = + tmp_bwc_rule->complex_hash_node->rtc_1; + tmp_bwc_rule->rule->matcher = + tmp_bwc_rule->rule->matcher->resize_dst; + continue; + } + + /* First time we're moving rule with this tag. + * Move it for real. + */ + tmp_rule = tmp_bwc_rule->rule; + tmp_rule->skip_delete = false; + ret = mlx5hws_matcher_resize_rule_move(matcher, + tmp_rule, + &rule_attr); + if (unlikely(ret && !move_error)) { + mlx5hws_err(ctx, + "Moving complex BWC rule failed (%d), attempting to move rest of the rules\n", + ret); + move_error = true; + } + + expected_completions = 1; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &expected_completions, + true); + if (unlikely(ret && !poll_error)) { + mlx5hws_err(ctx, + "Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = true; + } + + /* Done moving the rule to the new matcher, + * now update RTCs for all the duplicated rules. + */ + tmp_bwc_rule->complex_hash_node->rtc_0 = + tmp_bwc_rule->rule->rtc_0; + tmp_bwc_rule->complex_hash_node->rtc_1 = + tmp_bwc_rule->rule->rtc_1; + + tmp_bwc_rule->complex_hash_node->rtc_valid = true; + } + } + + if (move_error || poll_error) + ret = -EINVAL; + + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h index 340f0688e394..a6887c7e39d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h @@ -4,6 +4,27 @@ #ifndef HWS_BWC_COMPLEX_H_ #define HWS_BWC_COMPLEX_H_ +struct mlx5hws_bwc_complex_rule_hash_node { + u32 match_buf[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 tag; + refcount_t refcount; + bool rtc_valid; + u32 rtc_0; + u32 rtc_1; + struct rhash_head hash_node; +}; + +struct mlx5hws_bwc_matcher_complex_data { + struct mlx5hws_table *isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_action *action_metadata; + struct mlx5hws_action *action_go_to_tbl; + struct mlx5hws_action *action_last; + struct rhashtable refcount_hash; + struct mutex hash_lock; /* Protect the refcount rhashtable */ + struct ida metadata_ida; +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index e8f98c109b99..9c83753e4592 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -406,7 +406,6 @@ int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev, MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1); MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base); MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base); - MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset); MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id); MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index 51d9e0291ac1..fa6bff210266 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -70,7 +70,6 @@ struct mlx5hws_cmd_rtc_create_attr { u32 pd; u32 stc_base; u32 ste_base; - u32 ste_offset; u32 miss_ft_id; bool fw_gen_wqe; u8 update_index_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c index 9cda2774fd64..428dae869706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c @@ -34,7 +34,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) /* Create an STC pool per FT type */ pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL; max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max); pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran); @@ -159,10 +158,16 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx, if (ret) goto pools_uninit; + ret = mlx5hws_action_ste_pool_init(ctx); + if (ret) + goto close_queues; + INIT_LIST_HEAD(&ctx->tbl_list); return 0; +close_queues: + mlx5hws_send_queues_close(ctx); pools_uninit: hws_context_pools_uninit(ctx); uninit_pd: @@ -175,6 +180,7 @@ static void hws_context_uninit_hws(struct mlx5hws_context *ctx) if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT)) return; + mlx5hws_action_ste_pool_uninit(ctx); mlx5hws_send_queues_close(ctx); hws_context_pools_uninit(ctx); hws_context_uninit_pd(ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h index 38c3647444ad..3f8938c73dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h @@ -39,6 +39,8 @@ struct mlx5hws_context { struct mlx5hws_cmd_query_caps *caps; u32 pd_num; struct mlx5hws_pool *stc_pool; + struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */ + struct delayed_work action_ste_cleanup; struct mlx5hws_context_common_res common_res; struct mlx5hws_pattern_cache *pattern_cache; struct mlx5hws_definer_cache *definer_cache; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c index 696275fd0ce2..91568d6c1dac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c @@ -118,7 +118,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma { enum mlx5hws_table_type tbl_type = matcher->tbl->type; struct mlx5hws_cmd_ft_query_attr ft_attr = {0}; - struct mlx5hws_pool_chunk *ste; struct mlx5hws_pool *ste_pool; u64 icm_addr_0 = 0; u64 icm_addr_1 = 0; @@ -134,12 +133,11 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->end_ft_id, matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0); - ste = &matcher->match_ste.ste; ste_pool = matcher->match_ste.pool; if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); + ste_0_id = mlx5hws_pool_get_base_id(ste_pool); if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); + ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool); } seq_printf(f, ",%d,%d,%d,%d", @@ -148,19 +146,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->match_ste.rtc_1_id, (int)ste_1_id); - ste = &matcher->action_ste.ste; - ste_pool = matcher->action_ste.pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - else - ste_1_id = -1; - } else { - ste_0_id = -1; - ste_1_id = -1; - } - ft_attr.type = matcher->tbl->fw_ft_type; ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev, matcher->end_ft_id, @@ -170,10 +155,7 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma if (ret) return ret; - seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n", - matcher->action_ste.rtc_0_id, (int)ste_0_id, - matcher->action_ste.rtc_1_id, (int)ste_1_id, - 0, + seq_printf(f, ",-1,-1,-1,-1,0,0x%llx,0x%llx\n", mlx5hws_debug_icm_to_idx(icm_addr_0), mlx5hws_debug_icm_to_idx(icm_addr_1)); @@ -387,14 +369,17 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context if (!stc_pool) return 0; - if (stc_pool->resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]); + if (stc_pool->resource) { + ret = hws_debug_dump_context_stc_resource(f, ctx, + stc_pool->resource); if (ret) return ret; } - if (stc_pool->mirror_resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]); + if (stc_pool->mirror_resource) { + struct mlx5hws_pool_resource *res = stc_pool->mirror_resource; + + ret = hws_debug_dump_context_stc_resource(f, ctx, res); if (ret) return ret; } @@ -402,10 +387,41 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context return 0; } +static void +hws_debug_dump_action_ste_table(struct seq_file *f, + struct mlx5hws_action_ste_table *action_tbl) +{ + int ste_0_id = mlx5hws_pool_get_base_id(action_tbl->pool); + int ste_1_id = mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d\n", + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE, + HWS_PTR_TO_ID(action_tbl), + action_tbl->rtc_0_id, ste_0_id, + action_tbl->rtc_1_id, ste_1_id); +} + +static void hws_debug_dump_action_ste_pool(struct seq_file *f, + struct mlx5hws_action_ste_pool *pool) +{ + struct mlx5hws_action_ste_table *action_tbl; + enum mlx5hws_pool_optimize opt; + + mutex_lock(&pool->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + list_for_each_entry(action_tbl, &pool->elems[opt].available, + list_node) { + hws_debug_dump_action_ste_table(f, action_tbl); + } + } + mutex_unlock(&pool->lock); +} + static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx) { struct mlx5hws_table *tbl; - int ret; + int ret, i; ret = hws_debug_dump_context_info(f, ctx); if (ret) @@ -425,6 +441,9 @@ static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ct return ret; } + for (i = 0; i < ctx->queues; i++) + hws_debug_dump_action_ste_pool(f, &ctx->action_ste_pool[i]); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h index e44e7ae28f93..89c396f9f266 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h @@ -26,6 +26,8 @@ enum mlx5hws_debug_res_type { MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207, + + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE = 4300, }; static inline u64 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c8cc0c8115f5..5cc0dc002ac1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -158,6 +158,218 @@ struct mlx5hws_definer_conv_data { u32 match_flags; }; +#define HWS_DEFINER_ENTRY(name)[MLX5HWS_DEFINER_FNAME_##name] = #name + +static const char * const hws_definer_fname_to_str[] = { + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_TYPE_O), + HWS_DEFINER_ENTRY(ETH_TYPE_I), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_O), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_CFI_O), + HWS_DEFINER_ENTRY(VLAN_CFI_I), + HWS_DEFINER_ENTRY(VLAN_ID_O), + HWS_DEFINER_ENTRY(VLAN_ID_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_I), + HWS_DEFINER_ENTRY(IPV4_IHL_O), + HWS_DEFINER_ENTRY(IPV4_IHL_I), + HWS_DEFINER_ENTRY(IP_DSCP_O), + HWS_DEFINER_ENTRY(IP_DSCP_I), + HWS_DEFINER_ENTRY(IP_ECN_O), + HWS_DEFINER_ENTRY(IP_ECN_I), + HWS_DEFINER_ENTRY(IP_TTL_O), + HWS_DEFINER_ENTRY(IP_TTL_I), + HWS_DEFINER_ENTRY(IPV4_DST_O), + HWS_DEFINER_ENTRY(IPV4_DST_I), + HWS_DEFINER_ENTRY(IPV4_SRC_O), + HWS_DEFINER_ENTRY(IPV4_SRC_I), + HWS_DEFINER_ENTRY(IP_VERSION_O), + HWS_DEFINER_ENTRY(IP_VERSION_I), + HWS_DEFINER_ENTRY(IP_FRAG_O), + HWS_DEFINER_ENTRY(IP_FRAG_I), + HWS_DEFINER_ENTRY(IP_LEN_O), + HWS_DEFINER_ENTRY(IP_LEN_I), + HWS_DEFINER_ENTRY(IP_TOS_O), + HWS_DEFINER_ENTRY(IP_TOS_I), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_O), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_I), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_O), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_O), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_O), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_O), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_I), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_I), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_I), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_I), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_O), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_O), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_O), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_O), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_I), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_I), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_I), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_I), + HWS_DEFINER_ENTRY(IP_PROTOCOL_O), + HWS_DEFINER_ENTRY(IP_PROTOCOL_I), + HWS_DEFINER_ENTRY(L4_SPORT_O), + HWS_DEFINER_ENTRY(L4_SPORT_I), + HWS_DEFINER_ENTRY(L4_DPORT_O), + HWS_DEFINER_ENTRY(L4_DPORT_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_O), + HWS_DEFINER_ENTRY(TCP_SEQ_NUM), + HWS_DEFINER_ENTRY(TCP_ACK_NUM), + HWS_DEFINER_ENTRY(GTP_TEID), + HWS_DEFINER_ENTRY(GTP_MSG_TYPE), + HWS_DEFINER_ENTRY(GTP_EXT_FLAG), + HWS_DEFINER_ENTRY(GTP_NEXT_EXT_HDR), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_PDU), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_QFI), + HWS_DEFINER_ENTRY(GTPU_DW0), + HWS_DEFINER_ENTRY(GTPU_FIRST_EXT_DW0), + HWS_DEFINER_ENTRY(GTPU_DW2), + HWS_DEFINER_ENTRY(FLEX_PARSER_0), + HWS_DEFINER_ENTRY(FLEX_PARSER_1), + HWS_DEFINER_ENTRY(FLEX_PARSER_2), + HWS_DEFINER_ENTRY(FLEX_PARSER_3), + HWS_DEFINER_ENTRY(FLEX_PARSER_4), + HWS_DEFINER_ENTRY(FLEX_PARSER_5), + HWS_DEFINER_ENTRY(FLEX_PARSER_6), + HWS_DEFINER_ENTRY(FLEX_PARSER_7), + HWS_DEFINER_ENTRY(VPORT_REG_C_0), + HWS_DEFINER_ENTRY(VXLAN_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD0), + HWS_DEFINER_ENTRY(VXLAN_GPE_PROTO), + HWS_DEFINER_ENTRY(VXLAN_GPE_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD1), + HWS_DEFINER_ENTRY(GENEVE_OPT_LEN), + HWS_DEFINER_ENTRY(GENEVE_OAM), + HWS_DEFINER_ENTRY(GENEVE_PROTO), + HWS_DEFINER_ENTRY(GENEVE_VNI), + HWS_DEFINER_ENTRY(SOURCE_QP), + HWS_DEFINER_ENTRY(SOURCE_GVMI), + HWS_DEFINER_ENTRY(REG_0), + HWS_DEFINER_ENTRY(REG_1), + HWS_DEFINER_ENTRY(REG_2), + HWS_DEFINER_ENTRY(REG_3), + HWS_DEFINER_ENTRY(REG_4), + HWS_DEFINER_ENTRY(REG_5), + HWS_DEFINER_ENTRY(REG_6), + HWS_DEFINER_ENTRY(REG_7), + HWS_DEFINER_ENTRY(REG_8), + HWS_DEFINER_ENTRY(REG_9), + HWS_DEFINER_ENTRY(REG_10), + HWS_DEFINER_ENTRY(REG_11), + HWS_DEFINER_ENTRY(REG_A), + HWS_DEFINER_ENTRY(REG_B), + HWS_DEFINER_ENTRY(GRE_KEY_PRESENT), + HWS_DEFINER_ENTRY(GRE_C), + HWS_DEFINER_ENTRY(GRE_K), + HWS_DEFINER_ENTRY(GRE_S), + HWS_DEFINER_ENTRY(GRE_PROTOCOL), + HWS_DEFINER_ENTRY(GRE_OPT_KEY), + HWS_DEFINER_ENTRY(GRE_OPT_SEQ), + HWS_DEFINER_ENTRY(GRE_OPT_CHECKSUM), + HWS_DEFINER_ENTRY(INTEGRITY_O), + HWS_DEFINER_ENTRY(INTEGRITY_I), + HWS_DEFINER_ENTRY(ICMP_DW1), + HWS_DEFINER_ENTRY(ICMP_DW2), + HWS_DEFINER_ENTRY(ICMP_DW3), + HWS_DEFINER_ENTRY(IPSEC_SPI), + HWS_DEFINER_ENTRY(IPSEC_SEQUENCE_NUMBER), + HWS_DEFINER_ENTRY(IPSEC_SYNDROME), + HWS_DEFINER_ENTRY(MPLS0_O), + HWS_DEFINER_ENTRY(MPLS1_O), + HWS_DEFINER_ENTRY(MPLS2_O), + HWS_DEFINER_ENTRY(MPLS3_O), + HWS_DEFINER_ENTRY(MPLS4_O), + HWS_DEFINER_ENTRY(MPLS0_I), + HWS_DEFINER_ENTRY(MPLS1_I), + HWS_DEFINER_ENTRY(MPLS2_I), + HWS_DEFINER_ENTRY(MPLS3_I), + HWS_DEFINER_ENTRY(MPLS4_I), + HWS_DEFINER_ENTRY(FLEX_PARSER0_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER1_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER2_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER3_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER4_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER5_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER6_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER7_OK), + HWS_DEFINER_ENTRY(OKS2_MPLS0_O), + HWS_DEFINER_ENTRY(OKS2_MPLS1_O), + HWS_DEFINER_ENTRY(OKS2_MPLS2_O), + HWS_DEFINER_ENTRY(OKS2_MPLS3_O), + HWS_DEFINER_ENTRY(OKS2_MPLS4_O), + HWS_DEFINER_ENTRY(OKS2_MPLS0_I), + HWS_DEFINER_ENTRY(OKS2_MPLS1_I), + HWS_DEFINER_ENTRY(OKS2_MPLS2_I), + HWS_DEFINER_ENTRY(OKS2_MPLS3_I), + HWS_DEFINER_ENTRY(OKS2_MPLS4_I), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_7), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_7), + HWS_DEFINER_ENTRY(IB_L4_OPCODE), + HWS_DEFINER_ENTRY(IB_L4_QPN), + HWS_DEFINER_ENTRY(IB_L4_A), + HWS_DEFINER_ENTRY(RANDOM_NUM), + HWS_DEFINER_ENTRY(PTYPE_L2_O), + HWS_DEFINER_ENTRY(PTYPE_L2_I), + HWS_DEFINER_ENTRY(PTYPE_L3_O), + HWS_DEFINER_ENTRY(PTYPE_L3_I), + HWS_DEFINER_ENTRY(PTYPE_L4_O), + HWS_DEFINER_ENTRY(PTYPE_L4_I), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_O), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_I), + HWS_DEFINER_ENTRY(PTYPE_FRAG_O), + HWS_DEFINER_ENTRY(PTYPE_FRAG_I), + HWS_DEFINER_ENTRY(TNL_HDR_0), + HWS_DEFINER_ENTRY(TNL_HDR_1), + HWS_DEFINER_ENTRY(TNL_HDR_2), + HWS_DEFINER_ENTRY(TNL_HDR_3), + [MLX5HWS_DEFINER_FNAME_MAX] = "DEFINER_FNAME_UNKNOWN", +}; + +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname) +{ + if (fname > MLX5HWS_DEFINER_FNAME_MAX) + fname = MLX5HWS_DEFINER_FNAME_MAX; + return hws_definer_fname_to_str[fname]; +} + static void hws_definer_ones_set(struct mlx5hws_definer_fc *fc, void *match_param, @@ -509,7 +721,7 @@ static int hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; @@ -521,6 +733,20 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + outer_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + outer_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, outer_headers.ip_version) || + HWS_IS_FLD_SET(match_param, outer_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_O, outer_headers.ethertype, @@ -570,10 +796,16 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, outer_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } - if (is_s_ipv6) { + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -587,13 +819,6 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_outer.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_O, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_outer.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -608,6 +833,10 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_outer.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_O, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_outer.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, @@ -665,7 +894,7 @@ static int hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; @@ -677,6 +906,20 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + inner_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + inner_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, inner_headers.ip_version) || + HWS_IS_FLD_SET(match_param, inner_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_I, inner_headers.ethertype, @@ -728,10 +971,16 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; - if (is_s_ipv6) { + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, inner_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } + + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -745,13 +994,6 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_inner.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_I, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_inner.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -766,6 +1008,10 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_inner.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_I, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_inner.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h index 5c1a2086efba..62da55389331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h @@ -831,4 +831,6 @@ mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx, u32 *match_param, int *fc_sz); +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname); + #endif /* HWS_DEFINER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h index 30ccd635b505..21279d503117 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h @@ -17,6 +17,7 @@ #include "context.h" #include "table.h" #include "send.h" +#include "action_ste_pool.h" #include "rule.h" #include "cmd.h" #include "action.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index b61864b32053..ce28ee1c0e41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -3,25 +3,6 @@ #include "internal.h" -enum mlx5hws_matcher_rtc_type { - HWS_MATCHER_RTC_TYPE_MATCH, - HWS_MATCHER_RTC_TYPE_STE_ARRAY, - HWS_MATCHER_RTC_TYPE_MAX, -}; - -static const char * const mlx5hws_matcher_rtc_type_str[] = { - [HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH", - [HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY", - [HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN", -}; - -static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type) -{ - if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX) - rtc_type = HWS_MATCHER_RTC_TYPE_MAX; - return mlx5hws_matcher_rtc_type_str[rtc_type]; -} - static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules) { /* Collision table concatenation is done only for large rule tables */ @@ -42,19 +23,199 @@ static void hws_matcher_destroy_end_ft(struct mlx5hws_matcher *matcher) mlx5hws_table_destroy_default_ft(matcher->tbl, matcher->end_ft_id); } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id) +{ + struct mlx5hws_matcher *tmp_matcher; + + if (list_empty(&tbl->matchers_list)) + return -EINVAL; + + /* Update isolated_matcher_end_ft_id attribute for all + * the matchers in isolated table. + */ + list_for_each_entry(tmp_matcher, &tbl->matchers_list, list_node) + tmp_matcher->attr.isolated_matcher_end_ft_id = miss_ft_id; + + tmp_matcher = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + return mlx5hws_table_ft_set_next_ft(tbl->ctx, + tmp_matcher->end_ft_id, + tbl->fw_ft_type, + miss_ft_id); +} + +static int hws_matcher_connect_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + u32 end_ft_id; + int ret; + + /* Reset end_ft next RTCs */ + ret = mlx5hws_table_ft_set_next_rtc(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + /* Connect isolated matcher's end_ft to the complex matcher's end FT */ + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + end_ft_id); + + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; +} + +static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + int ret; + + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + &matcher->end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n"); + return ret; + } + + ret = hws_matcher_connect_end_ft_isolated(matcher); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to connect end FT\n"); + goto destroy_default_ft; + } + + return 0; + +destroy_default_ft: + mlx5hws_table_destroy_default_ft(tbl, matcher->end_ft_id); + return ret; +} + static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; int ret; - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &matcher->end_ft_id); + if (mlx5hws_matcher_is_isolated(matcher)) + ret = hws_matcher_create_end_ft_isolated(matcher); + else + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + &matcher->end_ft_id); + if (ret) { mlx5hws_err(tbl->ctx, "Failed to create matcher end flow table\n"); return ret; } + return 0; } +static int hws_matcher_connect_isolated_first(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + int ret; + + /* Isolated matcher's end_ft is already pointing to the end_ft + * of the complex matcher - it was set at creation of end_ft, + * so no need to connect it. + * We still need to connect the isolated table's start FT to + * this matcher's RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to connect start FT to match RTC\n"); + return ret; + } + + /* Reset table's FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset table ft default miss\n"); + return ret; + } + + list_add(&matcher->list_node, &tbl->matchers_list); + + return ret; +} + +static int hws_matcher_connect_isolated_last(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + struct mlx5hws_matcher *last; + int ret; + + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + /* New matcher's end_ft is already pointing to the end_ft of + * the complex matcher. + * Connect previous matcher's end_ft to this new matcher RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + last->end_ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, + "Isolated matcher: failed to connect matcher end_ft to new match RTC\n"); + return ret; + } + + /* Reset prev matcher FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, last->end_ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset matcher ft default miss\n"); + return ret; + } + + /* Insert after the last matcher */ + list_add(&matcher->list_node, &last->list_node); + + return 0; +} + +static int hws_matcher_connect_isolated(struct mlx5hws_matcher *matcher) +{ + /* Isolated matcher is expected to be the only one in its table. + * However, it can have a collision matcher, and it can go through + * rehash process, in which case we will temporary have both old and + * new matchers in the isolated table. + * Check if this is the first matcher in the isolated table. + */ + if (list_empty(&matcher->tbl->matchers_list)) + return hws_matcher_connect_isolated_first(matcher); + + /* If this wasn't the first matcher, then we have 3 possible cases: + * - this is a collision matcher for the first matcher + * - this is a new rehash dest matcher + * - this is a collision matcher for the new rehash dest matcher + * The logic to add new matcher is the same for all these cases. + */ + return hws_matcher_connect_isolated_last(matcher); +} + static int hws_matcher_connect(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; @@ -64,6 +225,9 @@ static int hws_matcher_connect(struct mlx5hws_matcher *matcher) struct mlx5hws_matcher *tmp_matcher; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_connect_isolated(matcher); + /* Find location in matcher list */ if (list_empty(&tbl->matchers_list)) { list_add(&matcher->list_node, &tbl->matchers_list); @@ -140,6 +304,92 @@ remove_from_list: return ret; } +static int hws_matcher_disconnect_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_matcher *first, *last, *prev, *next; + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + u32 end_ft_id; + int ret; + + first = list_first_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + prev = list_prev_entry(matcher, list_node); + next = list_next_entry(matcher, list_node); + + list_del_init(&matcher->list_node); + + if (first == last) { + /* This was the only matcher in the list. + * Reset isolated table FT next RTCs and connect it + * to the whole complex matcher end FT instead. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + tbl->ft_id, + tbl->fw_ft_type, + end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; + } + + /* At this point we know that there are more matchers in the list */ + + if (matcher == first) { + /* We've disconnected the first matcher. + * Now update isolated table default FT. + */ + if (!next) + return -EINVAL; + return mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); + } + + if (matcher == last) { + /* If we've disconnected the last matcher - update prev + * matcher's end_ft to point to the complex matcher end_ft. + */ + if (!prev) + return -EINVAL; + return hws_matcher_connect_end_ft_isolated(prev); + } + + /* This wasn't the first or the last matcher, which means that it has + * both prev and next matchers. Note that this only happens if we're + * disconnecting collision matcher of the old matcher during rehash. + */ + if (!prev || !next || + !(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) + return -EINVAL; + + /* Update prev end FT to point to next match RTC */ + return mlx5hws_table_ft_set_next_rtc(ctx, + prev->end_ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); +} + static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher *next = NULL, *prev = NULL; @@ -147,6 +397,9 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) u32 prev_ft_id = tbl->ft_id; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_disconnect_isolated(matcher); + if (!list_is_first(&matcher->list_node, &tbl->matchers_list)) { prev = list_prev_entry(matcher, list_node); prev_ft_id = prev->end_ft_id; @@ -197,149 +450,96 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher, struct mlx5hws_cmd_rtc_create_attr *rtc_attr, - enum mlx5hws_matcher_rtc_type rtc_type, bool is_mirror) { - struct mlx5hws_pool_chunk *ste = &matcher->action_ste.ste; enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src; - bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH; if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) || (flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) { /* Optimize FDB RTC */ rtc_attr->log_size = 0; rtc_attr->log_depth = 0; - } else { - /* Keep original values */ - rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order; - rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0; } } -static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type) +static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; struct mlx5hws_match_template *mt = matcher->mt; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *action_ste; struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; - u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = &matcher->match_ste.rtc_0_id; - rtc_1_id = &matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - ste->order = attr->table.sz_col_log + attr->table.sz_row_log; - - rtc_attr.log_size = attr->table.sz_row_log; - rtc_attr.log_depth = attr->table.sz_col_log; - rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); - rtc_attr.is_scnd_range = 0; - rtc_attr.miss_ft_id = matcher->end_ft_id; - - if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { - /* The usual Hash Table */ - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; - - /* The first mt is used since all share the same definer */ - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - rtc_attr.num_hash_definer = 1; - - if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { - /* Hash Split Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { - /* Linear Lookup Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; - rtc_attr.match_definer_0 = ctx->caps->linear_match_definer; - } - } - - /* Match pool requires implicit allocation */ - ret = mlx5hws_pool_chunk_alloc(ste_pool, ste); - if (ret) { - mlx5hws_err(ctx, "Failed to allocate STE for %s RTC", - hws_matcher_rtc_type_to_str(rtc_type)); - return ret; + rtc_attr.log_size = attr->table.sz_row_log; + rtc_attr.log_depth = attr->table.sz_col_log; + rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); + rtc_attr.is_scnd_range = 0; + rtc_attr.miss_ft_id = matcher->end_ft_id; + + if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + /* The usual Hash Table */ + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; + + /* The first mt is used since all share the same definer */ + rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); + } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + rtc_attr.num_hash_definer = 1; + + if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { + /* Hash Split Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; + rtc_attr.match_definer_0 = + mlx5hws_definer_get_id(mt->definer); + } else if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { + /* Linear Lookup Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; + rtc_attr.match_definer_0 = + ctx->caps->linear_match_definer; } - break; - - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste; - - rtc_0_id = &action_ste->rtc_0_id; - rtc_1_id = &action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - /* Action RTC size calculation: - * log((max number of rules in matcher) * - * (max number of action STEs per rule) * - * (2 to support writing new STEs for update rule)) - */ - ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - attr->table.sz_row_log + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; - rtc_attr.log_size = ste->order; - rtc_attr.log_depth = 0; - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - /* The action STEs use the default always hit definer */ - rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; - rtc_attr.is_frst_jumbo = false; - rtc_attr.miss_ft_id = 0; - break; - - default: - mlx5hws_err(ctx, "HWS Invalid RTC type\n"); - return -EINVAL; } - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_id(matcher->match_ste.pool); rtc_attr.pd = ctx->pd_num; rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false); - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool; - default_stc = ctx->common_res.default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_0_id); if (ret) { - mlx5hws_err(ctx, "Failed to create matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); - goto free_ste; + mlx5hws_err(ctx, "Failed to create matcher RTC\n"); + return ret; } if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_mirror_id( + matcher->match_ste.pool); rtc_attr.ste_base = obj_id; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true); - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_1_id); if (ret) { - mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); + mlx5hws_err(ctx, "Failed to create mirror matcher RTC\n"); goto destroy_rtc_0; } } @@ -347,46 +547,18 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, return 0; destroy_rtc_0: - mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id); -free_ste: - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(ctx->mdev, matcher->match_ste.rtc_0_id); return ret; } -static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type) +static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher) { - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_chunk *ste; - struct mlx5hws_pool *ste_pool; - u32 rtc_0_id, rtc_1_id; - - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = matcher->match_ste.rtc_0_id; - rtc_1_id = matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - break; - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste; - rtc_0_id = action_ste->rtc_0_id; - rtc_1_id = action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - break; - default: - return; - } + struct mlx5_core_dev *mdev = matcher->tbl->ctx->mdev; - if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id); + if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_1_id); - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id); - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_0_id); } static int @@ -454,85 +626,17 @@ static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, return 0; } -static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL); - if (!resize_data) - return -ENOMEM; - - resize_data->max_stes = src_matcher->action_ste.max_stes; - - resize_data->stc = src_matcher->action_ste.stc; - resize_data->rtc_0_id = src_matcher->action_ste.rtc_0_id; - resize_data->rtc_1_id = src_matcher->action_ste.rtc_1_id; - resize_data->pool = src_matcher->action_ste.max_stes ? - src_matcher->action_ste.pool : NULL; - - /* Place the new resized matcher on the dst matcher's list */ - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - - /* Move all the previous resized matchers to the dst matcher's list */ - while (!list_empty(&src_matcher->resize_data)) { - resize_data = list_first_entry(&src_matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - } - - return 0; -} - -static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - if (!mlx5hws_matcher_is_resizable(matcher)) - return; - - while (!list_empty(&matcher->resize_data)) { - resize_data = list_first_entry(&matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - - if (resize_data->max_stes) { - mlx5hws_action_free_single_stc(matcher->tbl->ctx, - matcher->tbl->type, - &resize_data->stc); - - if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->rtc_1_id); - - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->rtc_0_id); - - if (resize_data->pool) - mlx5hws_pool_destroy(resize_data->pool); - } - - kfree(resize_data); - } -} - static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_attr pool_attr = {0}; - struct mlx5hws_context *ctx = tbl->ctx; - u32 required_stes; - u8 max_stes = 0; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 required_stes, max_stes; int i, ret; if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION) return 0; + max_stes = 0; for (i = 0; i < matcher->num_of_at; i++) { struct mlx5hws_action_template *at = &matcher->at[i]; @@ -548,75 +652,33 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) /* Future: Optimize reparse */ } - /* There are no additional STEs required for matcher */ - if (!max_stes) - return 0; - - matcher->action_ste.max_stes = max_stes; - - action_ste = &matcher->action_ste; - - /* Allocate action STE mempool */ - pool_attr.table_type = tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; - /* Pool size is similar to action RTC size */ - pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - matcher->attr.table.sz_row_log + - MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; - hws_matcher_set_pool_attr(&pool_attr, matcher); - action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!action_ste->pool) { - mlx5hws_err(ctx, "Failed to create action ste pool\n"); - return -EINVAL; - } - - /* Allocate action RTC */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); - if (ret) { - mlx5hws_err(ctx, "Failed to create action RTC\n"); - goto free_ste_pool; - } - - /* Allocate STC for jumps to STE */ - stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; - stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; - stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = action_ste->ste; - stc_attr.ste_table.ste_pool = action_ste->pool; - stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; - - ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type, - &action_ste->stc); - if (ret) { - mlx5hws_err(ctx, "Failed to create action jump to table STC\n"); - goto free_rtc; - } + matcher->num_of_action_stes = max_stes; return 0; - -free_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); -free_ste_pool: - mlx5hws_pool_destroy(action_ste->pool); - return ret; } -static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher) +static void hws_matcher_set_ip_version_match(struct mlx5hws_matcher *matcher) { - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - - action_ste = &matcher->action_ste; - - if (!action_ste->max_stes || - matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION || - mlx5hws_matcher_is_in_resize(matcher)) - return; + int i; - mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY); - mlx5hws_pool_destroy(action_ste->pool); + for (i = 0; i < matcher->mt->fc_sz; i++) { + switch (matcher->mt->fc[i].fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + matcher->matches_outer_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + matcher->matches_outer_ip_version = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + matcher->matches_inner_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + matcher->matches_inner_ip_version = 1; + break; + default: + break; + } + } } static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) @@ -635,10 +697,11 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) } } + hws_matcher_set_ip_version_match(matcher); + /* Create an STE pool per matcher*/ pool_attr.table_type = matcher->tbl->type; pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL; pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log + matcher->attr.table.sz_row_log; hws_matcher_set_pool_attr(&pool_attr, matcher); @@ -740,6 +803,8 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log); matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0; + matcher->flags |= attr->isolated_matcher_end_ft_id ? + MLX5HWS_MATCHER_FLAGS_ISOLATED : 0; return hws_matcher_check_attr_sz(caps, matcher); } @@ -761,10 +826,10 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) /* Create matcher end flow table anchor */ ret = hws_matcher_create_end_ft(matcher); if (ret) - goto unbind_at; + goto unbind_mt; /* Allocate the RTC for the new matcher */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + ret = hws_matcher_create_rtc(matcher); if (ret) goto destroy_end_ft; @@ -776,11 +841,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) return 0; destroy_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + hws_matcher_destroy_rtc(matcher); destroy_end_ft: hws_matcher_destroy_end_ft(matcher); -unbind_at: - hws_matcher_unbind_at(matcher); unbind_mt: hws_matcher_unbind_mt(matcher); return ret; @@ -788,11 +851,9 @@ unbind_mt: static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher) { - hws_matcher_resize_uninit(matcher); hws_matcher_disconnect(matcher); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH); + hws_matcher_destroy_rtc(matcher); hws_matcher_destroy_end_ft(matcher); - hws_matcher_unbind_at(matcher); hws_matcher_unbind_mt(matcher); } @@ -814,8 +875,6 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) if (!col_matcher) return -ENOMEM; - INIT_LIST_HEAD(&col_matcher->resize_data); - col_matcher->tbl = matcher->tbl; col_matcher->mt = matcher->mt; col_matcher->at = matcher->at; @@ -832,6 +891,8 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO; col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach; + col_matcher->attr.isolated_matcher_end_ft_id = + matcher->attr.isolated_matcher_end_ft_id; ret = hws_matcher_process_attr(ctx->caps, col_matcher); if (ret) @@ -869,8 +930,6 @@ static int hws_matcher_init(struct mlx5hws_matcher *matcher) struct mlx5hws_context *ctx = matcher->tbl->ctx; int ret; - INIT_LIST_HEAD(&matcher->resize_data); - mutex_lock(&ctx->ctrl_lock); /* Allocate matcher resource and connect to the packet pipe */ @@ -905,18 +964,44 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher) return 0; } +static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher) +{ + void *p; + + if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + + matcher->size_of_at_array *= 2; + p = krealloc(matcher->at, + matcher->size_of_at_array * sizeof(*matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + matcher->at = p; + + return 0; +} + int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_context *ctx = matcher->tbl->ctx; u32 required_stes; int ret; - if (!matcher->attr.max_num_of_at_attach) { - mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n", - matcher->num_of_at); - return -EOPNOTSUPP; + if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) { + ret = hws_matcher_grow_at_array(matcher); + if (ret) + return ret; + + if (matcher->col_matcher) { + ret = hws_matcher_grow_at_array(matcher->col_matcher); + if (ret) + return ret; + } } ret = hws_matcher_check_and_process_at(matcher, at); @@ -924,15 +1009,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, return ret; required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term); - if (matcher->action_ste.max_stes < required_stes) { - mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n", - required_stes, matcher->action_ste.max_stes); - return -ENOMEM; - } + if (matcher->num_of_action_stes < required_stes) + matcher->num_of_action_stes = required_stes; matcher->at[matcher->num_of_at] = *at; matcher->num_of_at += 1; - matcher->attr.max_num_of_at_attach -= 1; if (matcher->col_matcher) matcher->col_matcher->num_of_at = matcher->num_of_at; @@ -960,8 +1041,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), + matcher->size_of_at_array = + num_of_at + matcher->attr.max_num_of_at_attach; + matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at), GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); @@ -1110,7 +1192,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher, return -EINVAL; } - if (src_matcher->action_ste.max_stes > dst_matcher->action_ste.max_stes) { + if (src_matcher->num_of_action_stes > dst_matcher->num_of_action_stes) { mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n"); return -EINVAL; } @@ -1139,10 +1221,6 @@ int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher, src_matcher->resize_dst = dst_matcher; - ret = hws_matcher_resize_init(src_matcher); - if (ret) - src_matcher->resize_dst = NULL; - out: mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 020de70270c5..32e83cddcd60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -23,6 +23,9 @@ */ #define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 +/* Maximum number of action templates that can be attached to a matcher. */ +#define MLX5HWS_MATCHER_MAX_AT 128 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, @@ -31,6 +34,7 @@ enum mlx5hws_matcher_offset { enum mlx5hws_matcher_flags { MLX5HWS_MATCHER_FLAGS_COLLISION = 1 << 2, MLX5HWS_MATCHER_FLAGS_RESIZABLE = 1 << 3, + MLX5HWS_MATCHER_FLAGS_ISOLATED = 1 << 4, }; struct mlx5hws_match_template { @@ -42,28 +46,15 @@ struct mlx5hws_match_template { }; struct mlx5hws_matcher_match_ste { - struct mlx5hws_pool_chunk ste; u32 rtc_0_id; u32 rtc_1_id; struct mlx5hws_pool *pool; }; -struct mlx5hws_matcher_action_ste { - struct mlx5hws_pool_chunk ste; - struct mlx5hws_pool_chunk stc; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; - u8 max_stes; -}; - -struct mlx5hws_matcher_resize_data { - struct mlx5hws_pool_chunk stc; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; - u8 max_stes; - struct list_head list_node; +enum { + MLX5HWS_MATCHER_IPV_UNSET = 0, + MLX5HWS_MATCHER_IPV_4 = 1, + MLX5HWS_MATCHER_IPV_6 = 2, }; struct mlx5hws_matcher { @@ -72,16 +63,22 @@ struct mlx5hws_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at; u8 num_of_at; + u8 size_of_at_array; u8 num_of_mt; + u8 num_of_action_stes; /* enum mlx5hws_matcher_flags */ u8 flags; + u8 matches_outer_ethertype:1; + u8 matches_outer_ip_version:1; + u8 matches_inner_ethertype:1; + u8 matches_inner_ip_version:1; + u8 outer_ip_version:2; + u8 inner_ip_version:2; u32 end_ft_id; struct mlx5hws_matcher *col_matcher; struct mlx5hws_matcher *resize_dst; struct mlx5hws_matcher_match_ste match_ste; - struct mlx5hws_matcher_action_ste action_ste; struct list_head list_node; - struct list_head resize_data; }; static inline bool @@ -100,9 +97,17 @@ static inline bool mlx5hws_matcher_is_in_resize(struct mlx5hws_matcher *matcher) return !!matcher->resize_dst; } +static inline bool mlx5hws_matcher_is_isolated(struct mlx5hws_matcher *matcher) +{ + return !!(matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED); +} + static inline bool mlx5hws_matcher_is_insert_by_idx(struct mlx5hws_matcher *matcher) { return matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX; } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id); + #endif /* HWS_MATCHER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 5121951f2778..fbd63369da10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -119,6 +119,8 @@ struct mlx5hws_matcher_attr { }; /* Optional AT attach configuration - Max number of additional AT */ u8 max_num_of_at_attach; + /* Optional end FT (miss FT ID) for match RTC (for isolated matcher) */ + u32 isolated_matcher_end_ft_id; }; struct mlx5hws_rule_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index 50a81d360bb2..7e37d6e9eb83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -20,15 +20,14 @@ static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource) kfree(resource); } -static void hws_pool_resource_free(struct mlx5hws_pool *pool, - int resource_idx) +static void hws_pool_resource_free(struct mlx5hws_pool *pool) { - hws_pool_free_one_resource(pool->resource[resource_idx]); - pool->resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->resource); + pool->resource = NULL; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { - hws_pool_free_one_resource(pool->mirror_resource[resource_idx]); - pool->mirror_resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->mirror_resource); + pool->mirror_resource = NULL; } } @@ -61,10 +60,8 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range, ret = -EINVAL; } - if (ret) { - mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n"); + if (ret) goto free_resource; - } resource->pool = pool; resource->range = 1 << log_range; @@ -77,199 +74,114 @@ free_resource: return NULL; } -static int -hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx) +static int hws_pool_resource_alloc(struct mlx5hws_pool *pool) { struct mlx5hws_pool_resource *resource; u32 fw_ft_type, opt_log_range; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? + 0 : pool->alloc_log_sz; resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!resource) { - mlx5hws_err(pool->ctx, "Failed allocating resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate resource\n"); return -EINVAL; } - pool->resource[idx] = resource; + pool->resource = resource; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { struct mlx5hws_pool_resource *mirror_resource; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? + 0 : pool->alloc_log_sz; mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!mirror_resource) { - mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate mirrored resource\n"); hws_pool_free_one_resource(resource); - pool->resource[idx] = NULL; + pool->resource = NULL; return -EINVAL; } - pool->mirror_resource[idx] = mirror_resource; + pool->mirror_resource = mirror_resource; } return 0; } -static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) -{ - unsigned long *cur_bmp; - - cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL); - if (!cur_bmp) - return NULL; - - bitmap_fill(cur_bmp, 1 << log_range); - - return cur_bmp; -} - -static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_buddy_init(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - buddy = pool->db.buddy_manager->buddies[chunk->resource_idx]; + buddy = mlx5hws_buddy_create(pool->alloc_log_sz); if (!buddy) { - mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx); - return; - } - - mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); -} - -static struct mlx5hws_buddy_mem * -hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx, - u32 order, bool *is_new_buddy) -{ - static struct mlx5hws_buddy_mem *buddy; - u32 new_buddy_size; - - buddy = pool->db.buddy_manager->buddies[idx]; - if (buddy) - return buddy; - - new_buddy_size = max(pool->alloc_log_sz, order); - *is_new_buddy = true; - buddy = mlx5hws_buddy_create(new_buddy_size); - if (!buddy) { - mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n", - new_buddy_size, idx); - return NULL; + mlx5hws_err(pool->ctx, "Failed to create buddy order: %zu\n", + pool->alloc_log_sz); + return -ENOMEM; } - if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, new_buddy_size, idx); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", + pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); - return NULL; + return -ENOMEM; } - pool->db.buddy_manager->buddies[idx] = buddy; + pool->db.buddy = buddy; - return buddy; + return 0; } -static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool, - int order, - u32 *buddy_idx, - int *seg) +static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_buddy_mem *buddy; - bool new_mem = false; - int ret = 0; - int i; - - *seg = -1; - - /* Find the next free place from the buddy array */ - while (*seg < 0) { - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = hws_pool_buddy_get_next_buddy(pool, i, - order, - &new_mem); - if (!buddy) { - ret = -ENOMEM; - goto out; - } - - *seg = mlx5hws_buddy_alloc_mem(buddy, order); - if (*seg >= 0) - goto found; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) { - mlx5hws_err(pool->ctx, - "Fail to allocate seg for one resource pool\n"); - ret = -ENOMEM; - goto out; - } - - if (new_mem) { - /* We have new memory pool, should be place for us */ - mlx5hws_err(pool->ctx, - "No memory for order: %d with buddy no: %d\n", - order, i); - ret = -ENOMEM; - goto out; - } - } + struct mlx5hws_buddy_mem *buddy = pool->db.buddy; + + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return -EINVAL; } -found: - *buddy_idx = i; -out: - return ret; + chunk->offset = mlx5hws_buddy_alloc_mem(buddy, chunk->order); + if (chunk->offset >= 0) + return 0; + + return -ENOMEM; } -static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret = 0; + struct mlx5hws_buddy_mem *buddy; - /* Go over the buddies and find next free slot */ - ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + buddy = pool->db.buddy; + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return; + } - return ret; + mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); } static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = pool->db.buddy_manager->buddies[i]; - if (buddy) { - mlx5hws_buddy_cleanup(buddy); - kfree(buddy); - pool->db.buddy_manager->buddies[i] = NULL; - } - } - kfree(pool->db.buddy_manager); + buddy = pool->db.buddy; + if (buddy) { + mlx5hws_buddy_cleanup(buddy); + kfree(buddy); + pool->db.buddy = NULL; + } } -static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) +static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool) { - pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL); - if (!pool->db.buddy_manager) - return -ENOMEM; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) { - bool new_buddy; + int ret; - if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) { - mlx5hws_err(pool->ctx, - "Failed allocating memory on create log_sz: %d\n", log_range); - kfree(pool->db.buddy_manager); - return -ENOMEM; - } - } + ret = hws_pool_buddy_init(pool); + if (ret) + return ret; pool->p_db_uninit = &hws_pool_buddy_db_uninit; pool->p_get_chunk = &hws_pool_buddy_db_get_chunk; @@ -278,261 +190,105 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) return 0; } -static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool, - u32 alloc_size, int idx) -{ - int ret = hws_pool_resource_alloc(pool, alloc_size, idx); - - if (ret) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - return ret; - } - - return 0; -} - -static struct mlx5hws_pool_elements * -hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx) +static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) { - struct mlx5hws_pool_elements *elem; - u32 alloc_size; - - alloc_size = pool->alloc_log_sz; + unsigned long *bitmap; - elem = kzalloc(sizeof(*elem), GFP_KERNEL); - if (!elem) + bitmap = bitmap_zalloc(1 << log_range, GFP_KERNEL); + if (!bitmap) return NULL; - /* Sharing the same resource, also means that all the elements are with size 1 */ - if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) && - !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) { - /* Currently all chunks in size 1 */ - elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order); - if (!elem->bitmap) { - mlx5hws_err(pool->ctx, - "Failed to create bitmap type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_elem; - } - - elem->log_size = alloc_size - order; - } - - if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_db; - } - - pool->db.element_manager->elements[idx] = elem; - - return elem; + bitmap_fill(bitmap, 1 << log_range); -free_db: - bitmap_free(elem->bitmap); -free_elem: - kfree(elem); - return NULL; + return bitmap; } -static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg) +static int hws_pool_bitmap_init(struct mlx5hws_pool *pool) { - unsigned int segment, size; - - size = 1 << elem->log_size; + unsigned long *bitmap; - segment = find_first_bit(elem->bitmap, size); - if (segment >= size) { - elem->is_full = true; + bitmap = hws_pool_create_and_init_bitmap(pool->alloc_log_sz); + if (!bitmap) { + mlx5hws_err(pool->ctx, "Failed to create bitmap order: %zu\n", + pool->alloc_log_sz); return -ENOMEM; } - bitmap_clear(elem->bitmap, segment, 1); - *seg = segment; - return 0; -} - -static int -hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - struct mlx5hws_pool_elements *elem; - - elem = pool->db.element_manager->elements[0]; - if (!elem) - elem = hws_pool_element_create_new_elem(pool, order, 0); - if (!elem) - goto err_no_elem; - - if (hws_pool_element_find_seg(elem, seg) != 0) { - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %zu\n", + pool->type, pool->alloc_log_sz); + bitmap_free(bitmap); return -ENOMEM; } - *idx = 0; - elem->num_of_elements++; - return 0; + pool->db.bitmap = bitmap; -err_no_elem: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; -} - -static int -hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - int ret, i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - if (!pool->resource[i]) { - ret = hws_pool_create_resource_on_index(pool, order, i); - if (ret) - goto err_no_res; - *idx = i; - *seg = 0; /* One memory slot in that element */ - return 0; - } - } - - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); - return -ENOMEM; - -err_no_res: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; + return 0; } -static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_bitmap_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret; - - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + unsigned long *bitmap, size; - return ret; -} + if (chunk->order != 0) { + mlx5hws_err(pool->ctx, "Pool only supports order 0 allocs\n"); + return -EINVAL; + } -static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); + return -EINVAL; + } - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE) - hws_pool_resource_free(pool, chunk->resource_idx); -} + size = 1 << pool->alloc_log_sz; -static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool) -{ - (void)pool; -} + chunk->offset = find_first_bit(bitmap, size); + if (chunk->offset >= size) + return -ENOMEM; -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * allocate resource and give it. - * - When free that chunk: - * the resource is freed. - */ -static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool) -{ - pool->p_db_uninit = &hws_pool_general_element_db_uninit; - pool->p_get_chunk = &hws_pool_general_element_db_get_chunk; - pool->p_put_chunk = &hws_pool_general_element_db_put_chunk; + bitmap_clear(bitmap, chunk->offset, 1); return 0; } -static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool, - struct mlx5hws_pool_elements *elem, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - hws_pool_resource_free(pool, chunk->resource_idx); - kfree(elem); - pool->db.element_manager->elements[chunk->resource_idx] = NULL; -} - -static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_pool_elements *elem; + unsigned long *bitmap; - if (unlikely(chunk->resource_idx)) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - elem = pool->db.element_manager->elements[chunk->resource_idx]; - if (!elem) { - mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); return; } - bitmap_set(elem->bitmap, chunk->offset, 1); - elem->is_full = false; - elem->num_of_elements--; - - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE && - !elem->num_of_elements) - hws_onesize_element_db_destroy_element(pool, elem, chunk); + bitmap_set(bitmap, chunk->offset, 1); } -static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_uninit(struct mlx5hws_pool *pool) { - int ret = 0; + unsigned long *bitmap; - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); - - return ret; -} - -static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool) -{ - struct mlx5hws_pool_elements *elem; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - elem = pool->db.element_manager->elements[i]; - if (elem) { - bitmap_free(elem->bitmap); - kfree(elem); - pool->db.element_manager->elements[i] = NULL; - } + bitmap = pool->db.bitmap; + if (bitmap) { + bitmap_free(bitmap); + pool->db.bitmap = NULL; } - kfree(pool->db.element_manager); } -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * aloocate the first and only slot of memory/resource - * when it ended return error. - */ -static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool) +static int hws_pool_bitmap_db_init(struct mlx5hws_pool *pool) { - pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL); - if (!pool->db.element_manager) - return -ENOMEM; + int ret; - pool->p_db_uninit = &hws_onesize_element_db_uninit; - pool->p_get_chunk = &hws_onesize_element_db_get_chunk; - pool->p_put_chunk = &hws_onesize_element_db_put_chunk; + ret = hws_pool_bitmap_init(pool); + if (ret) + return ret; + + pool->p_db_uninit = &hws_pool_bitmap_db_uninit; + pool->p_get_chunk = &hws_pool_bitmap_db_get_chunk; + pool->p_put_chunk = &hws_pool_bitmap_db_put_chunk; return 0; } @@ -542,15 +298,14 @@ static int hws_pool_db_init(struct mlx5hws_pool *pool, { int ret; - if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE) - ret = hws_pool_general_element_db_init(pool); - else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE) - ret = hws_pool_onesize_element_db_init(pool); + if (db_type == MLX5HWS_POOL_DB_TYPE_BITMAP) + ret = hws_pool_bitmap_db_init(pool); else - ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz); + ret = hws_pool_buddy_db_init(pool); if (ret) { - mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret); + mlx5hws_err(pool->ctx, "Failed to init pool type: %d (ret: %d)\n", + db_type, ret); return ret; } @@ -569,6 +324,8 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, mutex_lock(&pool->lock); ret = pool->p_get_chunk(pool, chunk); + if (ret == 0) + pool->available_elems -= 1 << chunk->order; mutex_unlock(&pool->lock); return ret; @@ -579,6 +336,7 @@ void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, { mutex_lock(&pool->lock); pool->p_put_chunk(pool, chunk); + pool->available_elems += 1 << chunk->order; mutex_unlock(&pool->lock); } @@ -599,17 +357,13 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_ pool->tbl_type = pool_attr->table_type; pool->opt_type = pool_attr->opt_type; - /* Support general db */ - if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) - res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE; - else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS)) - res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE; - else + if (pool->flags & MLX5HWS_POOL_FLAG_BUDDY) res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY; + else + res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP; pool->alloc_log_sz = pool_attr->alloc_log_sz; + pool->available_elems = 1 << pool_attr->alloc_log_sz; if (hws_pool_db_init(pool, res_db_type)) goto free_pool; @@ -623,18 +377,17 @@ free_pool: return NULL; } -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool) +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool) { - int i; - mutex_destroy(&pool->lock); - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) - if (pool->resource[i]) - hws_pool_resource_free(pool, i); + if (pool->available_elems != 1 << pool->alloc_log_sz) + mlx5hws_err(pool->ctx, "Attempting to destroy non-empty pool\n"); + + if (pool->resource) + hws_pool_resource_free(pool); hws_pool_db_unint(pool); kfree(pool); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h index 621298b352b2..33e33d5f1fb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h @@ -6,16 +6,12 @@ #define MLX5HWS_POOL_STC_LOG_SZ 15 -#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100 - enum mlx5hws_pool_type { MLX5HWS_POOL_TYPE_STE, MLX5HWS_POOL_TYPE_STC, }; struct mlx5hws_pool_chunk { - u32 resource_idx; - /* Internal offset, relative to base index */ int offset; int order; }; @@ -27,35 +23,17 @@ struct mlx5hws_pool_resource { }; enum mlx5hws_pool_flags { - /* Only a one resource in that pool */ - MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0, - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1, - /* No sharing resources between chunks */ - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2, - /* All objects are in the same size */ - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3, - /* Managed by buddy allocator */ - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4, - /* Allocate pool_type memory on pool creation */ - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5, - - /* These values should be used by the caller */ - MLX5HWS_POOL_FLAGS_FOR_STC_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS, - MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL = - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK, - MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED | - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE, + /* Managed by a buddy allocator. If this is not set only allocations of + * order 0 are supported. + */ + MLX5HWS_POOL_FLAG_BUDDY = BIT(0), }; enum mlx5hws_pool_optimize { MLX5HWS_POOL_OPTIMIZE_NONE = 0x0, MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1, MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2, + MLX5HWS_POOL_OPTIMIZE_MAX = 0x3, }; struct mlx5hws_pool_attr { @@ -68,34 +46,17 @@ struct mlx5hws_pool_attr { }; enum mlx5hws_db_type { - /* Uses for allocating chunk of big memory, each element has its own resource in the FW*/ - MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE, - /* One resource only, all the elements are with same one size */ - MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE, - /* Many resources, the memory allocated with buddy mechanism */ + /* Uses a bitmap, supports only allocations of order 0. */ + MLX5HWS_POOL_DB_TYPE_BITMAP, + /* Entries are managed using a buddy mechanism. */ MLX5HWS_POOL_DB_TYPE_BUDDY, }; -struct mlx5hws_buddy_manager { - struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - -struct mlx5hws_pool_elements { - u32 num_of_elements; - unsigned long *bitmap; - u32 log_size; - bool is_full; -}; - -struct mlx5hws_element_manager { - struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - struct mlx5hws_pool_db { enum mlx5hws_db_type type; union { - struct mlx5hws_element_manager *element_manager; - struct mlx5hws_buddy_manager *buddy_manager; + unsigned long *bitmap; + struct mlx5hws_buddy_mem *buddy; }; }; @@ -111,11 +72,11 @@ struct mlx5hws_pool { enum mlx5hws_pool_flags flags; struct mutex lock; /* protect the pool */ size_t alloc_log_sz; + size_t available_elems; enum mlx5hws_table_type tbl_type; enum mlx5hws_pool_optimize opt_type; - struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - /* DB */ + struct mlx5hws_pool_resource *resource; + struct mlx5hws_pool_resource *mirror_resource; struct mlx5hws_pool_db db; /* Functions */ mlx5hws_pool_unint_db p_db_uninit; @@ -127,7 +88,7 @@ struct mlx5hws_pool * mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_attr); -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool); +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool); int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); @@ -135,17 +96,37 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); -static inline u32 -mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_id(struct mlx5hws_pool *pool) { - return pool->resource[chunk->resource_idx]->base_id; + return pool->resource->base_id; } -static inline u32 -mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool) { - return pool->mirror_resource[chunk->resource_idx]->base_id; + return pool->mirror_resource->base_id; +} + +static inline bool +mlx5hws_pool_empty(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == 0; + mutex_unlock(&pool->lock); + + return ret; +} + +static inline bool +mlx5hws_pool_full(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == (1 << pool->alloc_log_sz); + mutex_unlock(&pool->lock); + + return ret; } #endif /* MLX5HWS_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c index a27a2d5ffc7b..5342a4cc7194 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c @@ -195,44 +195,30 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule, } } -static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule) +static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule, + u16 queue_id, bool skip_rx, + bool skip_tx) { struct mlx5hws_matcher *matcher = rule->matcher; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_pool_chunk ste = {0}; - int ret; - - action_ste = &matcher->action_ste; - ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes)); - ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste); - if (unlikely(ret)) { - mlx5hws_err(matcher->tbl->ctx, - "Failed to allocate STE for rule actions"); - return ret; - } - - rule->action_ste.pool = matcher->action_ste.pool; - rule->action_ste.num_stes = matcher->action_ste.max_stes; - rule->action_ste.index = ste.offset; + struct mlx5hws_context *ctx = matcher->tbl->ctx; - return 0; + rule->action_ste.ste.order = + ilog2(roundup_pow_of_two(matcher->num_of_action_stes)); + return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id], + skip_rx, skip_tx, + &rule->action_ste); } -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste) +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste) { - struct mlx5hws_pool_chunk ste = {0}; - - if (!action_ste->num_stes) + if (!action_ste->action_tbl) return; - ste.order = ilog2(roundup_pow_of_two(action_ste->num_stes)); - ste.offset = action_ste->index; - /* This release is safe only when the rule match STE was deleted * (when the rule is being deleted) or replaced with the new STE that * isn't pointing to old action STEs (when the rule is being updated). */ - mlx5hws_pool_chunk_free(action_ste->pool, &ste); + mlx5hws_action_ste_chunk_free(action_ste); } static void hws_rule_create_init(struct mlx5hws_rule *rule, @@ -250,22 +236,15 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, rule->rtc_0 = 0; rule->rtc_1 = 0; - rule->action_ste.pool = NULL; - rule->action_ste.num_stes = 0; - rule->action_ste.index = -1; - rule->status = MLX5HWS_RULE_STATUS_CREATING; } else { rule->status = MLX5HWS_RULE_STATUS_UPDATING; + /* Save the old action STE info so we can free it after writing + * new action STEs and a corresponding match STE. + */ + rule->old_action_ste = rule->action_ste; } - /* Initialize the old action STE info - shallow-copy action_ste. - * In create flow this will set old_action_ste fields to initial values. - * In update flow this will save the existing action STE info, - * so that we will later use it to free old STEs. - */ - rule->old_action_ste = rule->action_ste; - rule->pending_wqes = 0; /* Init default send STE attributes */ @@ -277,7 +256,6 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, /* Init default action apply */ apply->tbl_type = tbl->type; apply->common_res = &ctx->common_res; - apply->jump_to_action_stc = matcher->action_ste.stc.offset; apply->require_dep = 0; } @@ -353,17 +331,24 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule, if (action_stes) { /* Allocate action STEs for rules that need more than match STE */ - ret = hws_rule_alloc_action_ste(rule); + ret = mlx5hws_rule_alloc_action_ste(rule, attr->queue_id, + !!ste_attr.rtc_0, + !!ste_attr.rtc_1); if (ret) { mlx5hws_err(ctx, "Failed to allocate action memory %d", ret); mlx5hws_send_abort_new_dep_wqe(queue); return ret; } + apply.jump_to_action_stc = + rule->action_ste.action_tbl->stc.offset; /* Skip RX/TX based on the dep_wqe init */ - ste_attr.rtc_0 = dep_wqe->rtc_0 ? matcher->action_ste.rtc_0_id : 0; - ste_attr.rtc_1 = dep_wqe->rtc_1 ? matcher->action_ste.rtc_1_id : 0; + ste_attr.rtc_0 = dep_wqe->rtc_0 ? + rule->action_ste.action_tbl->rtc_0_id : 0; + ste_attr.rtc_1 = dep_wqe->rtc_1 ? + rule->action_ste.action_tbl->rtc_1_id : 0; /* Action STEs are written to a specific index last to first */ - ste_attr.direct_index = rule->action_ste.index + action_stes; + ste_attr.direct_index = + rule->action_ste.ste.offset + action_stes; apply.next_direct_idx = ste_attr.direct_index; } else { apply.next_direct_idx = 0; @@ -670,6 +655,124 @@ int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule, return 0; } +static u8 hws_rule_ethertype_to_matcher_ipv(u32 ethertype) +{ + switch (ethertype) { + case ETH_P_IP: + return MLX5HWS_MATCHER_IPV_4; + case ETH_P_IPV6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static u8 hws_rule_ip_version_to_matcher_ipv(u32 ip_version) +{ + switch (ip_version) { + case 4: + return MLX5HWS_MATCHER_IPV_4; + case 6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static int hws_rule_check_outer_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 outer_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv, ver; + + if (matcher->matches_outer_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ethertype); + outer_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_outer_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ip_version); + outer_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ether != outer_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent outer ethertype and ip version\n"); + return -EINVAL; + } + + outer_ipv = outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + outer_ipv_ether : outer_ipv_ip; + if (outer_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->outer_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv != matcher->outer_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on outer IP version\n"); + return -EINVAL; + } + matcher->outer_ip_version = outer_ipv; + + return 0; +} + +static int hws_rule_check_inner_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 inner_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv, ver; + + if (matcher->matches_inner_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ethertype); + inner_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_inner_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ip_version); + inner_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ether != inner_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent inner ethertype and ip version\n"); + return -EINVAL; + } + + inner_ipv = inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + inner_ipv_ether : inner_ipv_ip; + if (inner_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->inner_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv != matcher->inner_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on inner IP version\n"); + return -EINVAL; + } + matcher->inner_ip_version = inner_ipv; + + return 0; +} + +static int hws_rule_check_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + int ret; + + ret = hws_rule_check_outer_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + ret = hws_rule_check_inner_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + return 0; +} + int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, u8 mt_idx, u32 *match_param, @@ -680,6 +783,10 @@ int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, { int ret; + ret = hws_rule_check_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + rule_handle->matcher = matcher; ret = hws_rule_enqueue_precheck_create(rule_handle, attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h index b5ee94ac449b..1c47a9c11572 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h @@ -43,12 +43,6 @@ struct mlx5hws_rule_match_tag { }; }; -struct mlx5hws_rule_action_ste_info { - struct mlx5hws_pool *pool; - int index; /* STE array index */ - u8 num_stes; -}; - struct mlx5hws_rule_resize_info { u32 rtc_0; u32 rtc_1; @@ -64,8 +58,8 @@ struct mlx5hws_rule { struct mlx5hws_rule_match_tag tag; struct mlx5hws_rule_resize_info *resize_info; }; - struct mlx5hws_rule_action_ste_info action_ste; - struct mlx5hws_rule_action_ste_info old_action_ste; + struct mlx5hws_action_ste_chunk action_ste; + struct mlx5hws_action_ste_chunk old_action_ste; u32 rtc_0; /* The RTC into which the STE was inserted */ u32 rtc_1; /* The RTC into which the STE was inserted */ u8 status; /* enum mlx5hws_rule_status */ @@ -75,7 +69,7 @@ struct mlx5hws_rule { */ }; -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste); +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste); int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule, void *queue, void *user_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index cb6abc4ab7df..c4b22be19a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -344,18 +344,133 @@ hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue, } } +static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue, + struct mlx5hws_send_ring_priv *priv, + struct mlx5_cqe64 *cqe) +{ + u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0; + struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx; + u32 opcode = cqe ? get_cqe_opcode(cqe) : 0; + struct mlx5hws_rule *rule = priv->rule; + + /* If something bad happens and lots of rules are failing, we don't + * want to pollute dmesg. Print only the first bad cqe per engine, + * the one that started the avalanche. + */ + if (queue->error_cqe_printed) + return; + + queue->error_cqe_printed = true; + + if (mlx5hws_rule_move_in_progress(rule)) + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" : + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" : + "UNKNOWN", + rule->pending_wqes); + else + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->status == + MLX5HWS_RULE_STATUS_CREATING ? "CREATING" : + rule->status == + MLX5HWS_RULE_STATUS_DELETING ? "DELETING" : + rule->status == + MLX5HWS_RULE_STATUS_FAILING ? "FAILING" : + rule->status == + MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA", + rule->status, + rule->pending_wqes); + + mlx5hws_err(ctx, " rule 0x%08llx: matcher 0x%llx %s\n", + HWS_PTR_TO_ID(rule), + HWS_PTR_TO_ID(rule->matcher), + (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ? + "(isolated)" : ""); + + if (!cqe) { + mlx5hws_err(ctx, " rule 0x%08llx: no CQE\n", + HWS_PTR_TO_ID(rule)); + return; + } + + mlx5hws_err(ctx, " rule 0x%08llx: cqe->opcode = %d %s\n", + HWS_PTR_TO_ID(rule), opcode, + opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" : + opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " "); + + if (opcode == MLX5_CQE_REQ_ERR) { + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_error_syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[16]); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_syndrome_type = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[17] >> 4); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- vendor_err_synd = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->vendor_err_synd); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->syndrome); + } + + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->byte_cnt = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-- UPDATE STATUS = %s\n", + HWS_PTR_TO_ID(rule), + (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ? + "FAILURE" : "SUCCESS"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------- SYNDROME = %s\n", + HWS_PTR_TO_ID(rule), + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ? + "SET_FLOW_FAIL" : + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ? + "DISABLE_FLOW_FAIL" : "UNKNOWN"); + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->sop_drop_qpn = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-send wqe opcode = 0x%02x %s\n", + HWS_PTR_TO_ID(rule), wqe_opcode, + wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ? + "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------------ qpn = 0x%06x\n", + HWS_PTR_TO_ID(rule), + be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff); +} + static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue, struct mlx5hws_send_ring_priv *priv, u16 wqe_cnt, - enum mlx5hws_flow_op_status *status) + enum mlx5hws_flow_op_status *status, + struct mlx5_cqe64 *cqe) { priv->rule->pending_wqes--; - if (*status == MLX5HWS_FLOW_OP_ERROR) { + if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) { if (priv->retry_id) { + /* If there is a retry_id, then it's not an error yet, + * retry to insert this rule in the collision RTC. + */ hws_send_engine_retry_post_send(queue, priv, wqe_cnt); return; } + hws_send_engine_dump_error_cqe(queue, priv, cqe); /* Some part of the rule failed */ priv->rule->status = MLX5HWS_RULE_STATUS_FAILING; *priv->used_id = 0; @@ -420,7 +535,8 @@ static void hws_send_engine_update(struct mlx5hws_send_engine *queue, if (priv->user_data) { if (priv->rule) { - hws_send_engine_update_rule(queue, priv, wqe_cnt, &status); + hws_send_engine_update_rule(queue, priv, wqe_cnt, + &status, cqe); /* Completion is provided on the last rule WQE */ if (priv->rule->pending_wqes) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h index f833092235c1..3fb8e99309b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h @@ -140,6 +140,7 @@ struct mlx5hws_send_engine { u16 used_entries; u16 num_entries; bool err; + bool error_cqe_printed; struct mutex lock; /* Protects the send engine */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index ab1297531232..568f691733f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -342,10 +342,10 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id); } -static int hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, - u32 ft_id, - u32 fw_ft_type, - u32 next_ft_id) +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id) { struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; @@ -389,10 +389,10 @@ int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl, if (dst_tbl) { if (list_empty(&dst_tbl->matchers_list)) { /* Connect src_tbl last_ft to dst_tbl start anchor */ - ret = hws_table_ft_set_next_ft(src_tbl->ctx, - last_ft_id, - src_tbl->fw_ft_type, - dst_tbl->ft_id); + ret = mlx5hws_table_ft_set_next_ft(src_tbl->ctx, + last_ft_id, + src_tbl->fw_ft_type, + dst_tbl->ft_id); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h index dd50420eec9e..0400cce0c317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h @@ -65,4 +65,9 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, u32 rtc_0_id, u32 rtc_1_id); +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id); + #endif /* MLX5HWS_TABLE_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3080ea032e7f..618957d65663 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1159,63 +1159,31 @@ static int mlxsw_sp_set_features(struct net_device *dev, return 0; } -static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; - int err; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + config, extack); } -static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; - int err; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + config); } static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) { - struct hwtstamp_config config = {0}; - - mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); -} - -static int -mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct kernel_hwtstamp_config config = {}; - switch (cmd) { - case SIOCSHWTSTAMP: - return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); - case SIOCGHWTSTAMP: - return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); - default: - return -EOPNOTSUPP; - } + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config, + NULL); } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1232,7 +1200,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, - .ndo_eth_ioctl = mlxsw_sp_port_ioctl, + .ndo_hwtstamp_get = mlxsw_sp_port_hwtstamp_get, + .ndo_hwtstamp_set = mlxsw_sp_port_hwtstamp_set, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 37cd1d002b3b..b03ff9e044f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -233,9 +233,10 @@ struct mlxsw_sp_ptp_ops { u16 local_port); int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); @@ -351,7 +352,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_flow_block *eg_flow_block; struct { struct delayed_work shaper_dw; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; u16 ing_types; u16 egr_types; struct mlxsw_sp_ptp_port_stats stats; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 3f64cdbabfa3..0a8fb9c842d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -262,7 +262,7 @@ err_port_pause_configure: } struct mlxsw_sp_port_hw_stats { - char str[ETH_GSTRING_LEN]; + char str[ETH_GSTRING_LEN] __nonstring; u64 (*getter)(const char *payload); bool cells_bytes; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index ca8b9d18fbb9..e8182dd76c7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -46,7 +46,7 @@ struct mlxsw_sp2_ptp_state { refcount_t ptp_port_enabled_ref; /* Number of ports with time stamping * enabled. */ - struct hwtstamp_config config; + struct kernel_hwtstamp_config config; struct mutex lock; /* Protects 'config' and HW configuration. */ }; @@ -1083,14 +1083,14 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) } int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { *config = mlxsw_sp_port->ptp.hwtstamp_config; return 0; } static int -mlxsw_sp1_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp1_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1246,7 +1246,8 @@ void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) } int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { enum hwtstamp_rx_filters rx_filter; u16 ing_types; @@ -1270,7 +1271,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; return 0; @@ -1451,7 +1452,7 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, } int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct mlxsw_sp2_ptp_state *ptp_state; @@ -1465,7 +1466,7 @@ int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp2_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1542,7 +1543,7 @@ static int mlxsw_sp2_ptp_mtpcpc_set(struct mlxsw_sp *mlxsw_sp, bool ptp_trap_en, static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1556,7 +1557,7 @@ static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, } static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1571,7 +1572,7 @@ static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1592,7 +1593,7 @@ static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1614,11 +1615,12 @@ err_ptp_disable: } int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { + struct kernel_hwtstamp_config new_config; struct mlxsw_sp2_ptp_state *ptp_state; enum hwtstamp_rx_filters rx_filter; - struct hwtstamp_config new_config; u16 new_ing_types, new_egr_types; bool ptp_enabled; int err; @@ -1652,7 +1654,7 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->ptp.ing_types = new_ing_types; mlxsw_sp_port->ptp.egr_types = new_egr_types; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; mutex_unlock(&ptp_state->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 102db9060135..df37f1470830 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -34,10 +34,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u64 timestamp); int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); @@ -65,10 +66,11 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u16 local_port); int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); @@ -117,14 +119,15 @@ mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, static inline int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -181,14 +184,15 @@ static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, static inline int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 831921b9d4d5..3ba527514f1e 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -27,6 +27,7 @@ config FBNIC select NET_DEVLINK select PAGE_POOL select PHYLINK + select PLDMFW help This driver supports Meta Platforms Host Network Interface. diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index de6b1a340f55..65815d4f379e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -19,6 +19,7 @@ struct fbnic_napi_vector; #define FBNIC_MAX_NAPI_VECTORS 128u +#define FBNIC_MBX_CMPL_SLOTS 4 struct fbnic_dev { struct device *dev; @@ -42,7 +43,7 @@ struct fbnic_dev { struct fbnic_fw_mbx mbx[FBNIC_IPC_MBX_INDICES]; struct fbnic_fw_cap fw_cap; - struct fbnic_fw_completion *cmpl_data; + struct fbnic_fw_completion *cmpl_data[FBNIC_MBX_CMPL_SLOTS]; /* Lock protecting Tx Mailbox queue to prevent possible races */ spinlock_t fw_tx_lock; @@ -81,6 +82,9 @@ struct fbnic_dev { /* Local copy of hardware statistics */ struct fbnic_hw_stats hw_stats; + + /* Lock protecting access to hw_stats */ + spinlock_t hw_stats_lock; }; /* Reserve entry 0 in the MSI-X "others" array until we have filled all diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 51bee8072420..36393a17d92d 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -397,6 +397,15 @@ enum { #define FBNIC_TCE_DROP_CTRL_TTI_FRM_DROP_EN CSR_BIT(1) #define FBNIC_TCE_DROP_CTRL_TTI_TBI_DROP_EN CSR_BIT(2) +#define FBNIC_TCE_TTI_CM_DROP_PKTS 0x0403e /* 0x100f8 */ +#define FBNIC_TCE_TTI_CM_DROP_BYTE_L 0x0403f /* 0x100fc */ +#define FBNIC_TCE_TTI_CM_DROP_BYTE_H 0x04040 /* 0x10100 */ +#define FBNIC_TCE_TTI_FRAME_DROP_PKTS 0x04041 /* 0x10104 */ +#define FBNIC_TCE_TTI_FRAME_DROP_BYTE_L 0x04042 /* 0x10108 */ +#define FBNIC_TCE_TTI_FRAME_DROP_BYTE_H 0x04043 /* 0x1010c */ +#define FBNIC_TCE_TBI_DROP_PKTS 0x04044 /* 0x10110 */ +#define FBNIC_TCE_TBI_DROP_BYTE_L 0x04045 /* 0x10114 */ + #define FBNIC_TCE_TCAM_IDX2DEST_MAP 0x0404A /* 0x10128 */ #define FBNIC_TCE_TCAM_IDX2DEST_MAP_DEST_ID_0 CSR_GENMASK(3, 0) enum { @@ -432,6 +441,11 @@ enum { #define FBNIC_TMI_SOP_PROT_CTRL 0x04400 /* 0x11000 */ #define FBNIC_TMI_DROP_CTRL 0x04401 /* 0x11004 */ #define FBNIC_TMI_DROP_CTRL_EN CSR_BIT(0) +#define FBNIC_TMI_DROP_PKTS 0x04402 /* 0x11008 */ +#define FBNIC_TMI_DROP_BYTE_L 0x04403 /* 0x1100c */ +#define FBNIC_TMI_ILLEGAL_PTP_REQS 0x04409 /* 0x11024 */ +#define FBNIC_TMI_GOOD_PTP_TS 0x0440a /* 0x11028 */ +#define FBNIC_TMI_BAD_PTP_TS 0x0440b /* 0x1102c */ #define FBNIC_CSR_END_TMI 0x0443f /* CSR section delimiter */ /* Precision Time Protocol Registers */ @@ -485,6 +499,14 @@ enum { FBNIC_RXB_FIFO_INDICES = 8 }; +enum { + FBNIC_RXB_INTF_NET = 0, + FBNIC_RXB_INTF_RBT = 1, + /* Unused */ + /* Unused */ + FBNIC_RXB_INTF_INDICES = 4 +}; + #define FBNIC_RXB_CT_SIZE(n) (0x08000 + (n)) /* 0x20000 + 4*n */ #define FBNIC_RXB_CT_SIZE_CNT 8 #define FBNIC_RXB_CT_SIZE_HEADER CSR_GENMASK(5, 0) @@ -866,6 +888,12 @@ enum { #define FBNIC_QUEUE_TWQ1_BAL 0x022 /* 0x088 */ #define FBNIC_QUEUE_TWQ1_BAH 0x023 /* 0x08c */ +/* Tx Work Queue Statistics Registers */ +#define FBNIC_QUEUE_TWQ0_PKT_CNT 0x062 /* 0x188 */ +#define FBNIC_QUEUE_TWQ0_ERR_CNT 0x063 /* 0x18c */ +#define FBNIC_QUEUE_TWQ1_PKT_CNT 0x072 /* 0x1c8 */ +#define FBNIC_QUEUE_TWQ1_ERR_CNT 0x073 /* 0x1cc */ + /* Tx Completion Queue Registers */ #define FBNIC_QUEUE_TCQ_CTL 0x080 /* 0x200 */ #define FBNIC_QUEUE_TCQ_CTL_RESET CSR_BIT(0) @@ -955,6 +983,12 @@ enum { FBNIC_QUEUE_RDE_CTL1_PAYLD_PACK_RSS = 2, }; +/* Rx Per CQ Statistics Counters */ +#define FBNIC_QUEUE_RDE_PKT_CNT 0x2a2 /* 0xa88 */ +#define FBNIC_QUEUE_RDE_PKT_ERR_CNT 0x2a3 /* 0xa8c */ +#define FBNIC_QUEUE_RDE_CQ_DROP_CNT 0x2a4 /* 0xa90 */ +#define FBNIC_QUEUE_RDE_BDQ_DROP_CNT 0x2a5 /* 0xa94 */ + /* Rx Interrupt Manager Registers */ #define FBNIC_QUEUE_RIM_CTL 0x2c0 /* 0xb00 */ #define FBNIC_QUEUE_RIM_CTL_MSIX_MASK CSR_GENMASK(7, 0) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index 0072d612215e..71d9461a0d1b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -3,10 +3,12 @@ #include <linux/unaligned.h> #include <linux/pci.h> +#include <linux/pldmfw.h> #include <linux/types.h> #include <net/devlink.h> #include "fbnic.h" +#include "fbnic_tlv.h" #define FBNIC_SN_STR_LEN 24 @@ -109,8 +111,264 @@ static int fbnic_devlink_info_get(struct devlink *devlink, return 0; } +static bool +fbnic_pldm_match_record(struct pldmfw *context, struct pldmfw_record *record) +{ + struct pldmfw_desc_tlv *desc; + u32 anti_rollback_ver = 0; + struct devlink *devlink; + struct fbnic_dev *fbd; + struct pci_dev *pdev; + + /* First, use the standard PCI matching function */ + if (!pldmfw_op_pci_match_record(context, record)) + return false; + + pdev = to_pci_dev(context->dev); + fbd = pci_get_drvdata(pdev); + devlink = priv_to_devlink(fbd); + + /* If PCI match is successful, check for vendor-specific descriptors */ + list_for_each_entry(desc, &record->descs, entry) { + if (desc->type != PLDM_DESC_ID_VENDOR_DEFINED) + continue; + + if (desc->size < 21 || desc->data[0] != 1 || + desc->data[1] != 15) + continue; + + if (memcmp(desc->data + 2, "AntiRollbackVer", 15) != 0) + continue; + + anti_rollback_ver = get_unaligned_le32(desc->data + 17); + break; + } + + /* Compare versions and return error if they do not match */ + if (anti_rollback_ver < fbd->fw_cap.anti_rollback_version) { + char buf[128]; + + snprintf(buf, sizeof(buf), + "New firmware anti-rollback version (0x%x) is older than device version (0x%x)!", + anti_rollback_ver, fbd->fw_cap.anti_rollback_version); + devlink_flash_update_status_notify(devlink, buf, + "Anti-Rollback", 0, 0); + + return false; + } + + return true; +} + +static int +fbnic_flash_start(struct fbnic_dev *fbd, struct pldmfw_component *component) +{ + struct fbnic_fw_completion *cmpl; + int err; + + cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + if (!cmpl) + return -ENOMEM; + + fbnic_fw_init_cmpl(cmpl, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ); + err = fbnic_fw_xmit_fw_start_upgrade(fbd, cmpl, + component->identifier, + component->component_size); + if (err) + goto cmpl_free; + + /* Wait for firmware to ack firmware upgrade start */ + if (wait_for_completion_timeout(&cmpl->done, 10 * HZ)) + err = cmpl->result; + else + err = -ETIMEDOUT; + + fbnic_fw_clear_cmpl(fbd, cmpl); +cmpl_free: + fbnic_fw_put_cmpl(cmpl); + + return err; +} + +static int +fbnic_flash_component(struct pldmfw *context, + struct pldmfw_component *component) +{ + const u8 *data = component->component_data; + const u32 size = component->component_size; + struct fbnic_fw_completion *cmpl; + const char *component_name; + struct devlink *devlink; + struct fbnic_dev *fbd; + struct pci_dev *pdev; + u32 offset = 0; + u32 length = 0; + char buf[32]; + int err; + + pdev = to_pci_dev(context->dev); + fbd = pci_get_drvdata(pdev); + devlink = priv_to_devlink(fbd); + + switch (component->identifier) { + case QSPI_SECTION_CMRT: + component_name = "boot1"; + break; + case QSPI_SECTION_CONTROL_FW: + component_name = "boot2"; + break; + case QSPI_SECTION_OPTION_ROM: + component_name = "option-rom"; + break; + default: + snprintf(buf, sizeof(buf), "Unknown component ID %u!", + component->identifier); + devlink_flash_update_status_notify(devlink, buf, NULL, 0, + size); + return -EINVAL; + } + + /* Once firmware receives the request to start upgrading it responds + * with two messages: + * 1. An ACK that it received the message and possible error code + * indicating that an upgrade is not currently possible. + * 2. A request for the first chunk of data + * + * Setup completions for write before issuing the start message so + * the driver can catch both messages. + */ + cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + if (!cmpl) + return -ENOMEM; + + fbnic_fw_init_cmpl(cmpl, FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ); + err = fbnic_mbx_set_cmpl(fbd, cmpl); + if (err) + goto cmpl_free; + + devlink_flash_update_timeout_notify(devlink, "Initializing", + component_name, 15); + err = fbnic_flash_start(fbd, component); + if (err) + goto err_no_msg; + + while (offset < size) { + if (!wait_for_completion_timeout(&cmpl->done, 15 * HZ)) { + err = -ETIMEDOUT; + break; + } + + err = cmpl->result; + if (err) + break; + + /* Verify firmware is requesting the next chunk in the seq. */ + if (cmpl->u.fw_update.offset != offset + length) { + err = -EFAULT; + break; + } + + offset = cmpl->u.fw_update.offset; + length = cmpl->u.fw_update.length; + + if (length > TLV_MAX_DATA || offset + length > size) { + err = -EFAULT; + break; + } + + devlink_flash_update_status_notify(devlink, "Flashing", + component_name, + offset, size); + + /* Mailbox will set length to 0 once it receives the finish + * message. + */ + if (!length) + continue; + + reinit_completion(&cmpl->done); + err = fbnic_fw_xmit_fw_write_chunk(fbd, data, offset, length, + 0); + if (err) + break; + } + + if (err) { + fbnic_fw_xmit_fw_write_chunk(fbd, NULL, 0, 0, err); +err_no_msg: + snprintf(buf, sizeof(buf), "Mailbox encountered error %d!", + err); + devlink_flash_update_status_notify(devlink, buf, + component_name, 0, 0); + } + + fbnic_fw_clear_cmpl(fbd, cmpl); +cmpl_free: + fbnic_fw_put_cmpl(cmpl); + + return err; +} + +static const struct pldmfw_ops fbnic_pldmfw_ops = { + .match_record = fbnic_pldm_match_record, + .flash_component = fbnic_flash_component, +}; + +static int +fbnic_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_priv(devlink); + const struct firmware *fw = params->fw; + struct device *dev = fbd->dev; + struct pldmfw context; + char *err_msg; + int err; + + context.ops = &fbnic_pldmfw_ops; + context.dev = dev; + + err = pldmfw_flash_image(&context, fw); + if (err) { + switch (err) { + case -EINVAL: + err_msg = "Invalid image"; + break; + case -EOPNOTSUPP: + err_msg = "Unsupported image"; + break; + case -ENOMEM: + err_msg = "Out of memory"; + break; + case -EFAULT: + err_msg = "Invalid header"; + break; + case -ENOENT: + err_msg = "No matching record"; + break; + case -ENODEV: + err_msg = "No matching device"; + break; + case -ETIMEDOUT: + err_msg = "Timed out waiting for reply"; + break; + default: + err_msg = "Unknown error"; + break; + } + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to flash PLDM Image: %s (error: %d)", + err_msg, err); + } + + return err; +} + static const struct devlink_ops fbnic_devlink_ops = { - .info_get = fbnic_devlink_info_get, + .info_get = fbnic_devlink_info_get, + .flash_update = fbnic_devlink_flash_update, }; void fbnic_devlink_free(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 0a751a2aaf73..5c7556c8c4c5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -27,6 +27,19 @@ struct fbnic_stat { FBNIC_STAT_FIELDS(fbnic_hw_stats, name, stat) static const struct fbnic_stat fbnic_gstrings_hw_stats[] = { + /* TTI */ + FBNIC_HW_STAT("tti_cm_drop_frames", tti.cm_drop.frames), + FBNIC_HW_STAT("tti_cm_drop_bytes", tti.cm_drop.bytes), + FBNIC_HW_STAT("tti_frame_drop_frames", tti.frame_drop.frames), + FBNIC_HW_STAT("tti_frame_drop_bytes", tti.frame_drop.bytes), + FBNIC_HW_STAT("tti_tbi_drop_frames", tti.tbi_drop.frames), + FBNIC_HW_STAT("tti_tbi_drop_bytes", tti.tbi_drop.bytes), + + /* TMI */ + FBNIC_HW_STAT("ptp_illegal_req", tmi.ptp_illegal_req), + FBNIC_HW_STAT("ptp_good_ts", tmi.ptp_good_ts), + FBNIC_HW_STAT("ptp_bad_ts", tmi.ptp_bad_ts), + /* RPC */ FBNIC_HW_STAT("rpc_unkn_etype", rpc.unkn_etype), FBNIC_HW_STAT("rpc_unkn_ext_hdr", rpc.unkn_ext_hdr), @@ -39,7 +52,64 @@ static const struct fbnic_stat fbnic_gstrings_hw_stats[] = { }; #define FBNIC_HW_FIXED_STATS_LEN ARRAY_SIZE(fbnic_gstrings_hw_stats) -#define FBNIC_HW_STATS_LEN FBNIC_HW_FIXED_STATS_LEN + +#define FBNIC_RXB_ENQUEUE_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_rxb_enqueue_stats, name, stat) + +static const struct fbnic_stat fbnic_gstrings_rxb_enqueue_stats[] = { + FBNIC_RXB_ENQUEUE_STAT("rxb_integrity_err%u", integrity_err), + FBNIC_RXB_ENQUEUE_STAT("rxb_mac_err%u", mac_err), + FBNIC_RXB_ENQUEUE_STAT("rxb_parser_err%u", parser_err), + FBNIC_RXB_ENQUEUE_STAT("rxb_frm_err%u", frm_err), + + FBNIC_RXB_ENQUEUE_STAT("rxb_drbo%u_frames", drbo.frames), + FBNIC_RXB_ENQUEUE_STAT("rxb_drbo%u_bytes", drbo.bytes), +}; + +#define FBNIC_HW_RXB_ENQUEUE_STATS_LEN \ + ARRAY_SIZE(fbnic_gstrings_rxb_enqueue_stats) + +#define FBNIC_RXB_FIFO_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_rxb_fifo_stats, name, stat) + +static const struct fbnic_stat fbnic_gstrings_rxb_fifo_stats[] = { + FBNIC_RXB_FIFO_STAT("rxb_fifo%u_drop", trans_drop), + FBNIC_RXB_FIFO_STAT("rxb_fifo%u_dropped_frames", drop.frames), + FBNIC_RXB_FIFO_STAT("rxb_fifo%u_ecn", trans_ecn), + FBNIC_RXB_FIFO_STAT("rxb_fifo%u_level", level), +}; + +#define FBNIC_HW_RXB_FIFO_STATS_LEN ARRAY_SIZE(fbnic_gstrings_rxb_fifo_stats) + +#define FBNIC_RXB_DEQUEUE_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_rxb_dequeue_stats, name, stat) + +static const struct fbnic_stat fbnic_gstrings_rxb_dequeue_stats[] = { + FBNIC_RXB_DEQUEUE_STAT("rxb_intf%u_frames", intf.frames), + FBNIC_RXB_DEQUEUE_STAT("rxb_intf%u_bytes", intf.bytes), + FBNIC_RXB_DEQUEUE_STAT("rxb_pbuf%u_frames", pbuf.frames), + FBNIC_RXB_DEQUEUE_STAT("rxb_pbuf%u_bytes", pbuf.bytes), +}; + +#define FBNIC_HW_RXB_DEQUEUE_STATS_LEN \ + ARRAY_SIZE(fbnic_gstrings_rxb_dequeue_stats) + +#define FBNIC_HW_Q_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_hw_q_stats, name, stat.value) + +static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = { + FBNIC_HW_Q_STAT("rde_%u_pkt_err", rde_pkt_err), + FBNIC_HW_Q_STAT("rde_%u_pkt_cq_drop", rde_pkt_cq_drop), + FBNIC_HW_Q_STAT("rde_%u_pkt_bdq_drop", rde_pkt_bdq_drop), +}; + +#define FBNIC_HW_Q_STATS_LEN ARRAY_SIZE(fbnic_gstrings_hw_q_stats) +#define FBNIC_HW_STATS_LEN \ + (FBNIC_HW_FIXED_STATS_LEN + \ + FBNIC_HW_RXB_ENQUEUE_STATS_LEN * FBNIC_RXB_ENQUEUE_INDICES + \ + FBNIC_HW_RXB_FIFO_STATS_LEN * FBNIC_RXB_FIFO_INDICES + \ + FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \ + FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES) static void fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) @@ -298,31 +368,125 @@ err_free_clone: return err; } -static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) +static void fbnic_get_rxb_enqueue_strings(u8 **data, unsigned int idx) +{ + const struct fbnic_stat *stat; + int i; + + stat = fbnic_gstrings_rxb_enqueue_stats; + for (i = 0; i < FBNIC_HW_RXB_ENQUEUE_STATS_LEN; i++, stat++) + ethtool_sprintf(data, stat->string, idx); +} + +static void fbnic_get_rxb_fifo_strings(u8 **data, unsigned int idx) +{ + const struct fbnic_stat *stat; + int i; + + stat = fbnic_gstrings_rxb_fifo_stats; + for (i = 0; i < FBNIC_HW_RXB_FIFO_STATS_LEN; i++, stat++) + ethtool_sprintf(data, stat->string, idx); +} + +static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx) { + const struct fbnic_stat *stat; int i; + stat = fbnic_gstrings_rxb_dequeue_stats; + for (i = 0; i < FBNIC_HW_RXB_DEQUEUE_STATS_LEN; i++, stat++) + ethtool_sprintf(data, stat->string, idx); +} + +static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) +{ + const struct fbnic_stat *stat; + int i, idx; + switch (sset) { case ETH_SS_STATS: - for (i = 0; i < FBNIC_HW_STATS_LEN; i++) + for (i = 0; i < FBNIC_HW_FIXED_STATS_LEN; i++) ethtool_puts(&data, fbnic_gstrings_hw_stats[i].string); + + for (i = 0; i < FBNIC_RXB_ENQUEUE_INDICES; i++) + fbnic_get_rxb_enqueue_strings(&data, i); + + for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) + fbnic_get_rxb_fifo_strings(&data, i); + + for (i = 0; i < FBNIC_RXB_DEQUEUE_INDICES; i++) + fbnic_get_rxb_dequeue_strings(&data, i); + + for (idx = 0; idx < FBNIC_MAX_QUEUES; idx++) { + stat = fbnic_gstrings_hw_q_stats; + + for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++) + ethtool_sprintf(&data, stat->string, idx); + } break; } } +static void fbnic_report_hw_stats(const struct fbnic_stat *stat, + const void *base, int len, u64 **data) +{ + while (len--) { + u8 *curr = (u8 *)base + stat->offset; + + **data = *(u64 *)curr; + + stat++; + (*data)++; + } +} + static void fbnic_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct fbnic_net *fbn = netdev_priv(dev); - const struct fbnic_stat *stat; + struct fbnic_dev *fbd = fbn->fbd; int i; fbnic_get_hw_stats(fbn->fbd); - for (i = 0; i < FBNIC_HW_STATS_LEN; i++) { - stat = &fbnic_gstrings_hw_stats[i]; - data[i] = *(u64 *)((u8 *)&fbn->fbd->hw_stats + stat->offset); + spin_lock(&fbd->hw_stats_lock); + fbnic_report_hw_stats(fbnic_gstrings_hw_stats, &fbd->hw_stats, + FBNIC_HW_FIXED_STATS_LEN, &data); + + for (i = 0; i < FBNIC_RXB_ENQUEUE_INDICES; i++) { + const struct fbnic_rxb_enqueue_stats *enq; + + enq = &fbd->hw_stats.rxb.enq[i]; + fbnic_report_hw_stats(fbnic_gstrings_rxb_enqueue_stats, + enq, FBNIC_HW_RXB_ENQUEUE_STATS_LEN, + &data); + } + + for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) { + const struct fbnic_rxb_fifo_stats *fifo; + + fifo = &fbd->hw_stats.rxb.fifo[i]; + fbnic_report_hw_stats(fbnic_gstrings_rxb_fifo_stats, + fifo, FBNIC_HW_RXB_FIFO_STATS_LEN, + &data); + } + + for (i = 0; i < FBNIC_RXB_DEQUEUE_INDICES; i++) { + const struct fbnic_rxb_dequeue_stats *deq; + + deq = &fbd->hw_stats.rxb.deq[i]; + fbnic_report_hw_stats(fbnic_gstrings_rxb_dequeue_stats, + deq, FBNIC_HW_RXB_DEQUEUE_STATS_LEN, + &data); + } + + for (i = 0; i < FBNIC_MAX_QUEUES; i++) { + const struct fbnic_hw_q_stats *hw_q = &fbd->hw_stats.hw_q[i]; + + fbnic_report_hw_stats(fbnic_gstrings_hw_q_stats, hw_q, + FBNIC_HW_Q_STATS_LEN, &data); } + spin_unlock(&fbd->hw_stats_lock); } static int fbnic_get_sset_count(struct net_device *dev, int sset) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 3d9636a6c968..6a803a59dc25 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -237,6 +237,44 @@ static int fbnic_mbx_map_tlv_msg(struct fbnic_dev *fbd, return err; } +static int fbnic_mbx_set_cmpl_slot(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; + int free = -EXFULL; + int i; + + if (!tx_mbx->ready) + return -ENODEV; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (!fbd->cmpl_data[i]) + free = i; + else if (fbd->cmpl_data[i]->msg_type == cmpl_data->msg_type) + return -EEXIST; + } + + if (free == -EXFULL) + return -EXFULL; + + fbd->cmpl_data[free] = cmpl_data; + + return 0; +} + +static void fbnic_mbx_clear_cmpl_slot(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + int i; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (fbd->cmpl_data[i] == cmpl_data) { + fbd->cmpl_data[i] = NULL; + break; + } + } +} + static void fbnic_mbx_process_tx_msgs(struct fbnic_dev *fbd) { struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; @@ -258,6 +296,19 @@ static void fbnic_mbx_process_tx_msgs(struct fbnic_dev *fbd) tx_mbx->head = head; } +int fbnic_mbx_set_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&fbd->fw_tx_lock, flags); + err = fbnic_mbx_set_cmpl_slot(fbd, cmpl_data); + spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); + + return err; +} + static int fbnic_mbx_map_req_w_cmpl(struct fbnic_dev *fbd, struct fbnic_tlv_msg *msg, struct fbnic_fw_completion *cmpl_data) @@ -266,23 +317,20 @@ static int fbnic_mbx_map_req_w_cmpl(struct fbnic_dev *fbd, int err; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - - /* If we are already waiting on a completion then abort */ - if (cmpl_data && fbd->cmpl_data) { - err = -EBUSY; - goto unlock_mbx; + if (cmpl_data) { + err = fbnic_mbx_set_cmpl_slot(fbd, cmpl_data); + if (err) + goto unlock_mbx; } - /* Record completion location and submit request */ - if (cmpl_data) - fbd->cmpl_data = cmpl_data; - err = fbnic_mbx_map_msg(fbd, FBNIC_IPC_MBX_TX_IDX, msg, le16_to_cpu(msg->hdr.len) * sizeof(u32), 1); - /* If msg failed then clear completion data for next caller */ + /* If we successfully reserved a completion and msg failed + * then clear completion data for next caller + */ if (err && cmpl_data) - fbd->cmpl_data = NULL; + fbnic_mbx_clear_cmpl_slot(fbd, cmpl_data); unlock_mbx: spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); @@ -304,12 +352,18 @@ fbnic_fw_get_cmpl_by_type(struct fbnic_dev *fbd, u32 msg_type) { struct fbnic_fw_completion *cmpl_data = NULL; unsigned long flags; + int i; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - if (fbd->cmpl_data && fbd->cmpl_data->msg_type == msg_type) { - cmpl_data = fbd->cmpl_data; - kref_get(&fbd->cmpl_data->ref_count); + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (fbd->cmpl_data[i] && + fbd->cmpl_data[i]->msg_type == msg_type) { + cmpl_data = fbd->cmpl_data[i]; + kref_get(&cmpl_data->ref_count); + break; + } } + spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); return cmpl_data; @@ -464,6 +518,7 @@ static const struct fbnic_tlv_index fbnic_fw_cap_resp_index[] = { FBNIC_TLV_ATTR_U32(FBNIC_FW_CAP_RESP_UEFI_VERSION), FBNIC_TLV_ATTR_STRING(FBNIC_FW_CAP_RESP_UEFI_COMMIT_STR, FBNIC_FW_CAP_RESP_COMMIT_MAX_SIZE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION), FBNIC_TLV_ATTR_LAST }; @@ -586,6 +641,9 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results) if (results[FBNIC_FW_CAP_RESP_BMC_ALL_MULTI] || !bmc_present) fbd->fw_cap.all_multi = all_multi; + fbd->fw_cap.anti_rollback_version = + fta_get_uint(results, FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION); + return 0; } @@ -708,6 +766,188 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) dev_warn(fbd->dev, "Failed to send heartbeat message\n"); } +int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + unsigned int id, unsigned int len) +{ + struct fbnic_tlv_msg *msg; + int err; + + if (!fbnic_fw_present(fbd)) + return -ENODEV; + + if (!len) + return -EINVAL; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ); + if (!msg) + return -ENOMEM; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_START_UPGRADE_SECTION, id); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_START_UPGRADE_IMAGE_LENGTH, + len); + if (err) + goto free_message; + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_fw_start_upgrade_resp_index[] = { + FBNIC_TLV_ATTR_S32(FBNIC_FW_START_UPGRADE_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_start_upgrade_resp(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Check for errors */ + err = fta_get_sint(results, FBNIC_FW_START_UPGRADE_ERROR); + + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + +int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, + const u8 *data, u32 offset, u16 length, + int cancel_error) +{ + struct fbnic_tlv_msg *msg; + int err; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_RESP); + if (!msg) + return -ENOMEM; + + /* Report error to FW to cancel upgrade */ + if (cancel_error) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_ERROR, + cancel_error); + if (err) + goto free_message; + } + + if (data) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_OFFSET, + offset); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_LENGTH, + length); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_value(msg, FBNIC_FW_WRITE_CHUNK_DATA, + data + offset, length); + if (err) + goto free_message; + } + + err = fbnic_mbx_map_tlv_msg(fbd, msg); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_fw_write_chunk_req_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_WRITE_CHUNK_OFFSET), + FBNIC_TLV_ATTR_U32(FBNIC_FW_WRITE_CHUNK_LENGTH), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_write_chunk_req(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + u32 offset; + u32 length; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Pull length/offset pair and mark it as complete */ + offset = fta_get_uint(results, FBNIC_FW_WRITE_CHUNK_OFFSET); + length = fta_get_uint(results, FBNIC_FW_WRITE_CHUNK_LENGTH); + cmpl_data->u.fw_update.offset = offset; + cmpl_data->u.fw_update.length = length; + + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + +static const struct fbnic_tlv_index fbnic_fw_finish_upgrade_req_index[] = { + FBNIC_TLV_ATTR_S32(FBNIC_FW_FINISH_UPGRADE_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_finish_upgrade_req(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Check for errors */ + err = fta_get_sint(results, FBNIC_FW_FINISH_UPGRADE_ERROR); + + /* Close out update by incrementing offset by length which should + * match the total size of the component. Set length to 0 since no + * new chunks will be requested. + */ + cmpl_data->u.fw_update.offset += cmpl_data->u.fw_update.length; + cmpl_data->u.fw_update.length = 0; + + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + /** * fbnic_fw_xmit_tsene_read_msg - Create and transmit a sensor read request * @fbd: FBNIC device structure @@ -792,6 +1032,15 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { fbnic_fw_parse_ownership_resp), FBNIC_TLV_PARSER(HEARTBEAT_RESP, fbnic_heartbeat_resp_index, fbnic_fw_parse_heartbeat_resp), + FBNIC_TLV_PARSER(FW_START_UPGRADE_RESP, + fbnic_fw_start_upgrade_resp_index, + fbnic_fw_parse_fw_start_upgrade_resp), + FBNIC_TLV_PARSER(FW_WRITE_CHUNK_REQ, + fbnic_fw_write_chunk_req_index, + fbnic_fw_parse_fw_write_chunk_req), + FBNIC_TLV_PARSER(FW_FINISH_UPGRADE_REQ, + fbnic_fw_finish_upgrade_req_index, + fbnic_fw_parse_fw_finish_upgrade_req), FBNIC_TLV_PARSER(TSENE_READ_RESP, fbnic_tsene_read_resp_index, fbnic_fw_parse_tsene_read_resp), @@ -921,10 +1170,16 @@ static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) { - if (fbd->cmpl_data) { - __fbnic_fw_evict_cmpl(fbd->cmpl_data); - fbd->cmpl_data = NULL; + int i; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + struct fbnic_fw_completion *cmpl_data = fbd->cmpl_data[i]; + + if (cmpl_data) + __fbnic_fw_evict_cmpl(cmpl_data); } + + memset(fbd->cmpl_data, 0, sizeof(fbd->cmpl_data)); } void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) @@ -985,12 +1240,13 @@ void fbnic_fw_init_cmpl(struct fbnic_fw_completion *fw_cmpl, kref_init(&fw_cmpl->ref_count); } -void fbnic_fw_clear_compl(struct fbnic_dev *fbd) +void fbnic_fw_clear_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *fw_cmpl) { unsigned long flags; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - fbd->cmpl_data = NULL; + fbnic_mbx_clear_cmpl_slot(fbd, fw_cmpl); spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index a3618e7826c2..6baac10fd688 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -42,6 +42,7 @@ struct fbnic_fw_cap { u8 all_multi : 1; u8 link_speed; u8 link_fec; + u32 anti_rollback_version; }; struct fbnic_fw_completion { @@ -51,6 +52,10 @@ struct fbnic_fw_completion { int result; union { struct { + u32 offset; + u32 length; + } fw_update; + struct { s32 millivolts; s32 millidegrees; } tsene; @@ -59,17 +64,26 @@ struct fbnic_fw_completion { void fbnic_mbx_init(struct fbnic_dev *fbd); void fbnic_mbx_clean(struct fbnic_dev *fbd); +int fbnic_mbx_set_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data); void fbnic_mbx_poll(struct fbnic_dev *fbd); int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd); void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership); int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll); void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd); +int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + unsigned int id, unsigned int len); +int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, + const u8 *data, u32 offset, u16 length, + int cancel_error); int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); void fbnic_fw_init_cmpl(struct fbnic_fw_completion *cmpl_data, u32 msg_type); -void fbnic_fw_clear_compl(struct fbnic_dev *fbd); +void fbnic_fw_clear_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); #define fbnic_mk_full_fw_ver_str(_rev_id, _delim, _commit, _str, _str_sz) \ @@ -86,6 +100,15 @@ do { \ #define fbnic_mk_fw_ver_str(_rev_id, _str) \ fbnic_mk_full_fw_ver_str(_rev_id, "", "", _str, sizeof(_str)) +enum { + QSPI_SECTION_CMRT = 0, + QSPI_SECTION_CONTROL_FW = 1, + QSPI_SECTION_UCODE = 2, + QSPI_SECTION_OPTION_ROM = 3, + QSPI_SECTION_USER = 4, + QSPI_SECTION_INVALID, +}; + #define FW_HEARTBEAT_PERIOD (10 * HZ) enum { @@ -95,6 +118,12 @@ enum { FBNIC_TLV_MSG_ID_OWNERSHIP_RESP = 0x13, FBNIC_TLV_MSG_ID_HEARTBEAT_REQ = 0x14, FBNIC_TLV_MSG_ID_HEARTBEAT_RESP = 0x15, + FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ = 0x22, + FBNIC_TLV_MSG_ID_FW_START_UPGRADE_RESP = 0x23, + FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ = 0x24, + FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_RESP = 0x25, + FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_REQ = 0x28, + FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_RESP = 0x29, FBNIC_TLV_MSG_ID_TSENE_READ_REQ = 0x3C, FBNIC_TLV_MSG_ID_TSENE_READ_RESP = 0x3D, }; @@ -122,6 +151,7 @@ enum { FBNIC_FW_CAP_RESP_STORED_CMRT_COMMIT_STR = 0x10, FBNIC_FW_CAP_RESP_UEFI_VERSION = 0x11, FBNIC_FW_CAP_RESP_UEFI_COMMIT_STR = 0x12, + FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION = 0x15, FBNIC_FW_CAP_RESP_MSG_MAX }; @@ -149,4 +179,25 @@ enum { FBNIC_FW_OWNERSHIP_FLAG = 0x0, FBNIC_FW_OWNERSHIP_MSG_MAX }; + +enum { + FBNIC_FW_START_UPGRADE_ERROR = 0x0, + FBNIC_FW_START_UPGRADE_SECTION = 0x1, + FBNIC_FW_START_UPGRADE_IMAGE_LENGTH = 0x2, + FBNIC_FW_START_UPGRADE_MSG_MAX +}; + +enum { + FBNIC_FW_WRITE_CHUNK_OFFSET = 0x0, + FBNIC_FW_WRITE_CHUNK_LENGTH = 0x1, + FBNIC_FW_WRITE_CHUNK_DATA = 0x2, + FBNIC_FW_WRITE_CHUNK_ERROR = 0x3, + FBNIC_FW_WRITE_CHUNK_MSG_MAX +}; + +enum { + FBNIC_FW_FINISH_UPGRADE_ERROR = 0x0, + FBNIC_FW_FINISH_UPGRADE_MSG_MAX +}; + #endif /* _FBNIC_FW_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c index 89ac6bc8c7fc..4223d8100e64 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c @@ -70,6 +70,100 @@ static void fbnic_hw_stat_rd64(struct fbnic_dev *fbd, u32 reg, s32 offset, stat->u.old_reg_value_64 = new_reg_value; } +static void fbnic_reset_tmi_stats(struct fbnic_dev *fbd, + struct fbnic_tmi_stats *tmi) +{ + fbnic_hw_stat_rst32(fbd, FBNIC_TMI_DROP_PKTS, &tmi->drop.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_TMI_DROP_BYTE_L, 1, &tmi->drop.bytes); + + fbnic_hw_stat_rst32(fbd, + FBNIC_TMI_ILLEGAL_PTP_REQS, + &tmi->ptp_illegal_req); + fbnic_hw_stat_rst32(fbd, FBNIC_TMI_GOOD_PTP_TS, &tmi->ptp_good_ts); + fbnic_hw_stat_rst32(fbd, FBNIC_TMI_BAD_PTP_TS, &tmi->ptp_bad_ts); +} + +static void fbnic_get_tmi_stats32(struct fbnic_dev *fbd, + struct fbnic_tmi_stats *tmi) +{ + fbnic_hw_stat_rd32(fbd, FBNIC_TMI_DROP_PKTS, &tmi->drop.frames); + + fbnic_hw_stat_rd32(fbd, + FBNIC_TMI_ILLEGAL_PTP_REQS, + &tmi->ptp_illegal_req); + fbnic_hw_stat_rd32(fbd, FBNIC_TMI_GOOD_PTP_TS, &tmi->ptp_good_ts); + fbnic_hw_stat_rd32(fbd, FBNIC_TMI_BAD_PTP_TS, &tmi->ptp_bad_ts); +} + +static void fbnic_get_tmi_stats(struct fbnic_dev *fbd, + struct fbnic_tmi_stats *tmi) +{ + fbnic_hw_stat_rd64(fbd, FBNIC_TMI_DROP_BYTE_L, 1, &tmi->drop.bytes); +} + +static void fbnic_reset_tti_stats(struct fbnic_dev *fbd, + struct fbnic_tti_stats *tti) +{ + fbnic_hw_stat_rst32(fbd, + FBNIC_TCE_TTI_CM_DROP_PKTS, + &tti->cm_drop.frames); + fbnic_hw_stat_rst64(fbd, + FBNIC_TCE_TTI_CM_DROP_BYTE_L, + 1, + &tti->cm_drop.bytes); + + fbnic_hw_stat_rst32(fbd, + FBNIC_TCE_TTI_FRAME_DROP_PKTS, + &tti->frame_drop.frames); + fbnic_hw_stat_rst64(fbd, + FBNIC_TCE_TTI_FRAME_DROP_BYTE_L, + 1, + &tti->frame_drop.bytes); + + fbnic_hw_stat_rst32(fbd, + FBNIC_TCE_TBI_DROP_PKTS, + &tti->tbi_drop.frames); + fbnic_hw_stat_rst64(fbd, + FBNIC_TCE_TBI_DROP_BYTE_L, + 1, + &tti->tbi_drop.bytes); +} + +static void fbnic_get_tti_stats32(struct fbnic_dev *fbd, + struct fbnic_tti_stats *tti) +{ + fbnic_hw_stat_rd32(fbd, + FBNIC_TCE_TTI_CM_DROP_PKTS, + &tti->cm_drop.frames); + + fbnic_hw_stat_rd32(fbd, + FBNIC_TCE_TTI_FRAME_DROP_PKTS, + &tti->frame_drop.frames); + + fbnic_hw_stat_rd32(fbd, + FBNIC_TCE_TBI_DROP_PKTS, + &tti->tbi_drop.frames); +} + +static void fbnic_get_tti_stats(struct fbnic_dev *fbd, + struct fbnic_tti_stats *tti) +{ + fbnic_hw_stat_rd64(fbd, + FBNIC_TCE_TTI_CM_DROP_BYTE_L, + 1, + &tti->cm_drop.bytes); + + fbnic_hw_stat_rd64(fbd, + FBNIC_TCE_TTI_FRAME_DROP_BYTE_L, + 1, + &tti->frame_drop.bytes); + + fbnic_hw_stat_rd64(fbd, + FBNIC_TCE_TBI_DROP_BYTE_L, + 1, + &tti->tbi_drop.bytes); +} + static void fbnic_reset_rpc_stats(struct fbnic_dev *fbd, struct fbnic_rpc_stats *rpc) { @@ -117,6 +211,221 @@ static void fbnic_get_rpc_stats32(struct fbnic_dev *fbd, &rpc->ovr_size_err); } +static void fbnic_reset_rxb_fifo_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_fifo_stats *fifo) +{ + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_DROP_FRMS_STS(i), + &fifo->drop.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_RXB_DROP_BYTES_STS_L(i), 1, + &fifo->drop.bytes); + + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRUN_FRMS_STS(i), + &fifo->trunc.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_RXB_TRUN_BYTES_STS_L(i), 1, + &fifo->trunc.bytes); + + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRANS_DROP_STS(i), + &fifo->trans_drop); + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRANS_ECN_STS(i), + &fifo->trans_ecn); + + fifo->level.u.old_reg_value_32 = 0; +} + +static void fbnic_reset_rxb_enq_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_enqueue_stats *enq) +{ + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_DRBO_FRM_CNT_SRC(i), + &enq->drbo.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_RXB_DRBO_BYTE_CNT_SRC_L(i), 4, + &enq->drbo.bytes); + + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_INTEGRITY_ERR(i), + &enq->integrity_err); + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_MAC_ERR(i), + &enq->mac_err); + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_PARSER_ERR(i), + &enq->parser_err); + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_FRM_ERR(i), + &enq->frm_err); +} + +static void fbnic_reset_rxb_deq_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_dequeue_stats *deq) +{ + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_INTF_FRM_CNT_DST(i), + &deq->intf.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_RXB_INTF_BYTE_CNT_DST_L(i), 4, + &deq->intf.bytes); + + fbnic_hw_stat_rst32(fbd, FBNIC_RXB_PBUF_FRM_CNT_DST(i), + &deq->pbuf.frames); + fbnic_hw_stat_rst64(fbd, FBNIC_RXB_PBUF_BYTE_CNT_DST_L(i), 4, + &deq->pbuf.bytes); +} + +static void fbnic_reset_rxb_stats(struct fbnic_dev *fbd, + struct fbnic_rxb_stats *rxb) +{ + int i; + + for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) + fbnic_reset_rxb_fifo_stats(fbd, i, &rxb->fifo[i]); + + for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) { + fbnic_reset_rxb_enq_stats(fbd, i, &rxb->enq[i]); + fbnic_reset_rxb_deq_stats(fbd, i, &rxb->deq[i]); + } +} + +static void fbnic_get_rxb_fifo_stats32(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_fifo_stats *fifo) +{ + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_DROP_FRMS_STS(i), + &fifo->drop.frames); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRUN_FRMS_STS(i), + &fifo->trunc.frames); + + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRANS_DROP_STS(i), + &fifo->trans_drop); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRANS_ECN_STS(i), + &fifo->trans_ecn); + + fifo->level.value = rd32(fbd, FBNIC_RXB_PBUF_FIFO_LEVEL(i)); +} + +static void fbnic_get_rxb_fifo_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_fifo_stats *fifo) +{ + fbnic_hw_stat_rd64(fbd, FBNIC_RXB_DROP_BYTES_STS_L(i), 1, + &fifo->drop.bytes); + fbnic_hw_stat_rd64(fbd, FBNIC_RXB_TRUN_BYTES_STS_L(i), 1, + &fifo->trunc.bytes); + + fbnic_get_rxb_fifo_stats32(fbd, i, fifo); +} + +static void fbnic_get_rxb_enq_stats32(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_enqueue_stats *enq) +{ + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_DRBO_FRM_CNT_SRC(i), + &enq->drbo.frames); + + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_INTEGRITY_ERR(i), + &enq->integrity_err); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_MAC_ERR(i), + &enq->mac_err); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_PARSER_ERR(i), + &enq->parser_err); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_FRM_ERR(i), + &enq->frm_err); +} + +static void fbnic_get_rxb_enq_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_enqueue_stats *enq) +{ + fbnic_hw_stat_rd64(fbd, FBNIC_RXB_DRBO_BYTE_CNT_SRC_L(i), 4, + &enq->drbo.bytes); + + fbnic_get_rxb_enq_stats32(fbd, i, enq); +} + +static void fbnic_get_rxb_deq_stats32(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_dequeue_stats *deq) +{ + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_INTF_FRM_CNT_DST(i), + &deq->intf.frames); + fbnic_hw_stat_rd32(fbd, FBNIC_RXB_PBUF_FRM_CNT_DST(i), + &deq->pbuf.frames); +} + +static void fbnic_get_rxb_deq_stats(struct fbnic_dev *fbd, int i, + struct fbnic_rxb_dequeue_stats *deq) +{ + fbnic_hw_stat_rd64(fbd, FBNIC_RXB_INTF_BYTE_CNT_DST_L(i), 4, + &deq->intf.bytes); + fbnic_hw_stat_rd64(fbd, FBNIC_RXB_PBUF_BYTE_CNT_DST_L(i), 4, + &deq->pbuf.bytes); + + fbnic_get_rxb_deq_stats32(fbd, i, deq); +} + +static void fbnic_get_rxb_stats32(struct fbnic_dev *fbd, + struct fbnic_rxb_stats *rxb) +{ + int i; + + for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) + fbnic_get_rxb_fifo_stats32(fbd, i, &rxb->fifo[i]); + + for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) { + fbnic_get_rxb_enq_stats32(fbd, i, &rxb->enq[i]); + fbnic_get_rxb_deq_stats32(fbd, i, &rxb->deq[i]); + } +} + +static void fbnic_get_rxb_stats(struct fbnic_dev *fbd, + struct fbnic_rxb_stats *rxb) +{ + int i; + + for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) + fbnic_get_rxb_fifo_stats(fbd, i, &rxb->fifo[i]); + + for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) { + fbnic_get_rxb_enq_stats(fbd, i, &rxb->enq[i]); + fbnic_get_rxb_deq_stats(fbd, i, &rxb->deq[i]); + } +} + +static void fbnic_reset_hw_rxq_stats(struct fbnic_dev *fbd, + struct fbnic_hw_q_stats *hw_q) +{ + int i; + + for (i = 0; i < fbd->max_num_queues; i++, hw_q++) { + u32 base = FBNIC_QUEUE(i); + + fbnic_hw_stat_rst32(fbd, + base + FBNIC_QUEUE_RDE_PKT_ERR_CNT, + &hw_q->rde_pkt_err); + fbnic_hw_stat_rst32(fbd, + base + FBNIC_QUEUE_RDE_CQ_DROP_CNT, + &hw_q->rde_pkt_cq_drop); + fbnic_hw_stat_rst32(fbd, + base + FBNIC_QUEUE_RDE_BDQ_DROP_CNT, + &hw_q->rde_pkt_bdq_drop); + } +} + +static void fbnic_get_hw_rxq_stats32(struct fbnic_dev *fbd, + struct fbnic_hw_q_stats *hw_q) +{ + int i; + + for (i = 0; i < fbd->max_num_queues; i++, hw_q++) { + u32 base = FBNIC_QUEUE(i); + + fbnic_hw_stat_rd32(fbd, + base + FBNIC_QUEUE_RDE_PKT_ERR_CNT, + &hw_q->rde_pkt_err); + fbnic_hw_stat_rd32(fbd, + base + FBNIC_QUEUE_RDE_CQ_DROP_CNT, + &hw_q->rde_pkt_cq_drop); + fbnic_hw_stat_rd32(fbd, + base + FBNIC_QUEUE_RDE_BDQ_DROP_CNT, + &hw_q->rde_pkt_bdq_drop); + } +} + +void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, + struct fbnic_hw_q_stats *hw_q) +{ + spin_lock(&fbd->hw_stats_lock); + fbnic_get_hw_rxq_stats32(fbd, hw_q); + spin_unlock(&fbd->hw_stats_lock); +} + static void fbnic_reset_pcie_stats_asic(struct fbnic_dev *fbd, struct fbnic_pcie_stats *pcie) { @@ -203,18 +512,40 @@ static void fbnic_get_pcie_stats_asic64(struct fbnic_dev *fbd, void fbnic_reset_hw_stats(struct fbnic_dev *fbd) { + spin_lock(&fbd->hw_stats_lock); + fbnic_reset_tmi_stats(fbd, &fbd->hw_stats.tmi); + fbnic_reset_tti_stats(fbd, &fbd->hw_stats.tti); fbnic_reset_rpc_stats(fbd, &fbd->hw_stats.rpc); + fbnic_reset_rxb_stats(fbd, &fbd->hw_stats.rxb); + fbnic_reset_hw_rxq_stats(fbd, fbd->hw_stats.hw_q); fbnic_reset_pcie_stats_asic(fbd, &fbd->hw_stats.pcie); + spin_unlock(&fbd->hw_stats_lock); } -void fbnic_get_hw_stats32(struct fbnic_dev *fbd) +static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd) { + fbnic_get_tmi_stats32(fbd, &fbd->hw_stats.tmi); + fbnic_get_tti_stats32(fbd, &fbd->hw_stats.tti); fbnic_get_rpc_stats32(fbd, &fbd->hw_stats.rpc); + fbnic_get_rxb_stats32(fbd, &fbd->hw_stats.rxb); + fbnic_get_hw_rxq_stats32(fbd, fbd->hw_stats.hw_q); +} + +void fbnic_get_hw_stats32(struct fbnic_dev *fbd) +{ + spin_lock(&fbd->hw_stats_lock); + __fbnic_get_hw_stats32(fbd); + spin_unlock(&fbd->hw_stats_lock); } void fbnic_get_hw_stats(struct fbnic_dev *fbd) { - fbnic_get_hw_stats32(fbd); + spin_lock(&fbd->hw_stats_lock); + __fbnic_get_hw_stats32(fbd); + fbnic_get_tmi_stats(fbd, &fbd->hw_stats.tmi); + fbnic_get_tti_stats(fbd, &fbd->hw_stats.tti); + fbnic_get_rxb_stats(fbd, &fbd->hw_stats.rxb); fbnic_get_pcie_stats_asic64(fbd, &fbd->hw_stats.pcie); + spin_unlock(&fbd->hw_stats_lock); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h index 78df56b87745..07e54bb75bf3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h @@ -17,6 +17,11 @@ struct fbnic_stat_counter { bool reported; }; +struct fbnic_hw_stat { + struct fbnic_stat_counter frames; + struct fbnic_stat_counter bytes; +}; + struct fbnic_eth_mac_stats { struct fbnic_stat_counter FramesTransmittedOK; struct fbnic_stat_counter FramesReceivedOK; @@ -37,12 +42,49 @@ struct fbnic_mac_stats { struct fbnic_eth_mac_stats eth_mac; }; +struct fbnic_tmi_stats { + struct fbnic_hw_stat drop; + struct fbnic_stat_counter ptp_illegal_req, ptp_good_ts, ptp_bad_ts; +}; + +struct fbnic_tti_stats { + struct fbnic_hw_stat cm_drop, frame_drop, tbi_drop; +}; + struct fbnic_rpc_stats { struct fbnic_stat_counter unkn_etype, unkn_ext_hdr; struct fbnic_stat_counter ipv4_frag, ipv6_frag, ipv4_esp, ipv6_esp; struct fbnic_stat_counter tcp_opt_err, out_of_hdr_err, ovr_size_err; }; +struct fbnic_rxb_enqueue_stats { + struct fbnic_hw_stat drbo; + struct fbnic_stat_counter integrity_err, mac_err; + struct fbnic_stat_counter parser_err, frm_err; +}; + +struct fbnic_rxb_fifo_stats { + struct fbnic_hw_stat drop, trunc; + struct fbnic_stat_counter trans_drop, trans_ecn; + struct fbnic_stat_counter level; +}; + +struct fbnic_rxb_dequeue_stats { + struct fbnic_hw_stat intf, pbuf; +}; + +struct fbnic_rxb_stats { + struct fbnic_rxb_enqueue_stats enq[FBNIC_RXB_ENQUEUE_INDICES]; + struct fbnic_rxb_fifo_stats fifo[FBNIC_RXB_FIFO_INDICES]; + struct fbnic_rxb_dequeue_stats deq[FBNIC_RXB_DEQUEUE_INDICES]; +}; + +struct fbnic_hw_q_stats { + struct fbnic_stat_counter rde_pkt_err; + struct fbnic_stat_counter rde_pkt_cq_drop; + struct fbnic_stat_counter rde_pkt_bdq_drop; +}; + struct fbnic_pcie_stats { struct fbnic_stat_counter ob_rd_tlp, ob_rd_dword; struct fbnic_stat_counter ob_wr_tlp, ob_wr_dword; @@ -55,13 +97,19 @@ struct fbnic_pcie_stats { struct fbnic_hw_stats { struct fbnic_mac_stats mac; + struct fbnic_tmi_stats tmi; + struct fbnic_tti_stats tti; struct fbnic_rpc_stats rpc; + struct fbnic_rxb_stats rxb; + struct fbnic_hw_q_stats hw_q[FBNIC_MAX_QUEUES]; struct fbnic_pcie_stats pcie; }; u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset); void fbnic_reset_hw_stats(struct fbnic_dev *fbd); +void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, + struct fbnic_hw_q_stats *hw_q); void fbnic_get_hw_stats32(struct fbnic_dev *fbd); void fbnic_get_hw_stats(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index dde4a37116e2..4ba6f8d10775 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -744,7 +744,7 @@ static int fbnic_mac_get_sensor_asic(struct fbnic_dev *fbd, int id, *val = *sensor; exit_cleanup: - fbnic_fw_clear_compl(fbd); + fbnic_fw_clear_cmpl(fbd, fw_cmpl); exit_free: fbnic_fw_put_cmpl(fw_cmpl); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 2524d9b88d59..aa812c63d5af 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -404,12 +404,16 @@ static int fbnic_hwtstamp_set(struct net_device *netdev, static void fbnic_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) { + u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0; u64 tx_bytes, tx_packets, tx_dropped = 0; - u64 rx_bytes, rx_packets, rx_dropped = 0; struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_dev *fbd = fbn->fbd; struct fbnic_queue_stats *stats; + u64 rx_over = 0, rx_missed = 0; unsigned int start, i; + fbnic_get_hw_stats(fbd); + stats = &fbn->tx_stats; tx_bytes = stats->bytes; @@ -420,6 +424,12 @@ static void fbnic_get_stats64(struct net_device *dev, stats64->tx_packets = tx_packets; stats64->tx_dropped = tx_dropped; + /* Record drops from Tx HW Datapath */ + tx_dropped += fbd->hw_stats.tmi.drop.frames.value + + fbd->hw_stats.tti.cm_drop.frames.value + + fbd->hw_stats.tti.frame_drop.frames.value + + fbd->hw_stats.tti.tbi_drop.frames.value; + for (i = 0; i < fbn->num_tx_queues; i++) { struct fbnic_ring *txr = fbn->tx[i]; @@ -445,9 +455,34 @@ static void fbnic_get_stats64(struct net_device *dev, rx_packets = stats->packets; rx_dropped = stats->dropped; + spin_lock(&fbd->hw_stats_lock); + /* Record drops for the host FIFOs. + * 4: network to Host, 6: BMC to Host + * Exclude the BMC and MC FIFOs as those stats may contain drops + * due to unrelated items such as TCAM misses. They are still + * accessible through the ethtool stats. + */ + i = FBNIC_RXB_FIFO_HOST; + rx_missed += fbd->hw_stats.rxb.fifo[i].drop.frames.value; + i = FBNIC_RXB_FIFO_BMC_TO_HOST; + rx_missed += fbd->hw_stats.rxb.fifo[i].drop.frames.value; + + for (i = 0; i < fbd->max_num_queues; i++) { + /* Report packets dropped due to CQ/BDQ being full/empty */ + rx_over += fbd->hw_stats.hw_q[i].rde_pkt_cq_drop.value; + rx_over += fbd->hw_stats.hw_q[i].rde_pkt_bdq_drop.value; + + /* Report packets with errors */ + rx_errors += fbd->hw_stats.hw_q[i].rde_pkt_err.value; + } + spin_unlock(&fbd->hw_stats_lock); + stats64->rx_bytes = rx_bytes; stats64->rx_packets = rx_packets; stats64->rx_dropped = rx_dropped; + stats64->rx_over_errors = rx_over; + stats64->rx_errors = rx_errors; + stats64->rx_missed_errors = rx_missed; for (i = 0; i < fbn->num_rx_queues; i++) { struct fbnic_ring *rxr = fbn->rx[i]; @@ -487,6 +522,7 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, { struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_ring *rxr = fbn->rx[idx]; + struct fbnic_dev *fbd = fbn->fbd; struct fbnic_queue_stats *stats; u64 bytes, packets, alloc_fail; u64 csum_complete, csum_none; @@ -510,6 +546,15 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, rx->alloc_fail = alloc_fail; rx->csum_complete = csum_complete; rx->csum_none = csum_none; + + fbnic_get_hw_q_stats(fbd, fbd->hw_stats.hw_q); + + spin_lock(&fbd->hw_stats_lock); + rx->hw_drop_overruns = fbd->hw_stats.hw_q[idx].rde_pkt_cq_drop.value + + fbd->hw_stats.hw_q[idx].rde_pkt_bdq_drop.value; + rx->hw_drops = fbd->hw_stats.hw_q[idx].rde_pkt_err.value + + rx->hw_drop_overruns; + spin_unlock(&fbd->hw_stats_lock); } static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 4e8595239c0f..249d3ef862d5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/rtnetlink.h> #include <linux/types.h> +#include <net/devlink.h> #include "fbnic.h" #include "fbnic_drvinfo.h" @@ -292,6 +293,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) fbnic_devlink_register(fbd); fbnic_dbg_fbd_init(fbd); + spin_lock_init(&fbd->hw_stats_lock); /* Capture snapshot of hardware stats so netdev can calculate delta */ fbnic_reset_hw_stats(fbd); @@ -387,8 +389,12 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: + devl_lock(priv_to_devlink(fbd)); + fbnic_fw_free_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); + /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -419,11 +425,15 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); + devl_lock(priv_to_devlink(fbd)); + /* Re-enable mailbox */ err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; + devl_unlock(priv_to_devlink(fbd)); + /* No netdev means there isn't a network interface to bring up */ if (fbnic_init_failure(fbd)) return 0; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2d6bfb5d693..73dfc85fa67e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2499,8 +2499,7 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx) /* save existing skb, allocate new skb and map to dma */ skb = buffer_info->skb; - if (lan743x_rx_init_ring_element(rx, rx->last_head, - GFP_ATOMIC | GFP_DMA)) { + if (lan743x_rx_init_ring_element(rx, rx->last_head, GFP_ATOMIC)) { /* failed to allocate next skb. * Memory is very low. * Drop this packet and reuse buffer. diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 0be44dcb3393..b07f5b099a2b 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -463,10 +463,6 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, struct lan743x_ptp_perout *perout = &ptp->perout[index]; int ret = 0; - /* Reject requests with unsupported flags */ - if (perout_request->flags & ~PTP_PEROUT_DUTY_CYCLE) - return -EOPNOTSUPP; - if (on) { perout_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, perout_request->index); @@ -942,12 +938,6 @@ static int lan743x_ptp_io_extts(struct lan743x_adapter *adapter, int on, extts = &ptp->extts[index]; - if (extts_request->flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - if (on) { extts_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, index); if (extts_pin < 0) @@ -1543,6 +1533,10 @@ int lan743x_ptp_open(struct lan743x_adapter *adapter) ptp->ptp_clock_info.n_per_out = LAN743X_PTP_N_EVENT_CHAN; ptp->ptp_clock_info.n_pins = n_pins; ptp->ptp_clock_info.pps = LAN743X_PTP_N_PPS; + ptp->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + ptp->ptp_clock_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; ptp->ptp_clock_info.pin_config = ptp->pin_config; ptp->ptp_clock_info.adjfine = lan743x_ptpci_adjfine; ptp->ptp_clock_info.adjtime = lan743x_ptpci_adjtime; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index 63905bb5a63a..098406e2e5bb 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -815,10 +815,6 @@ static int lan966x_ptp_perout(struct ptp_clock_info *ptp, bool pps = false; int pin; - if (rq->perout.flags & ~(PTP_PEROUT_DUTY_CYCLE | - PTP_PEROUT_PHASE)) - return -EOPNOTSUPP; - pin = ptp_find_pin(phc->clock, PTP_PF_PEROUT, rq->perout.index); if (pin == -1 || pin >= LAN966X_PHC_PINS_NUM) return -EINVAL; @@ -917,12 +913,6 @@ static int lan966x_ptp_extts(struct ptp_clock_info *ptp, if (lan966x->ptp_ext_irq <= 0) return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - pin = ptp_find_pin(phc->clock, PTP_PF_EXTTS, rq->extts.index); if (pin == -1 || pin >= LAN966X_PHC_PINS_NUM) return -EINVAL; @@ -978,6 +968,10 @@ static struct ptp_clock_info lan966x_ptp_clock_info = { .n_per_out = LAN966X_PHC_PINS_NUM, .n_ext_ts = LAN966X_PHC_PINS_NUM, .n_pins = LAN966X_PHC_PINS_NUM, + .supported_extts_flags = PTP_RISING_EDGE | + PTP_STRICT_FLAGS, + .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE | + PTP_PEROUT_PHASE, }; static int lan966x_ptp_phc_init(struct lan966x *lan966x, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 7663d196eaf8..469784d3a1a6 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -870,23 +870,30 @@ static int ocelot_set_features(struct net_device *dev, static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + return phy_mii_ioctl(dev->phydev, ifr, cmd); +} + +static int ocelot_port_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) +{ struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot *ocelot = priv->port.ocelot; int port = priv->port.index; - /* If the attached PHY device isn't capable of timestamping operations, - * use our own (when possible). - */ - if (!phy_has_hwtstamp(dev->phydev) && ocelot->ptp) { - switch (cmd) { - case SIOCSHWTSTAMP: - return ocelot_hwstamp_set(ocelot, port, ifr); - case SIOCGHWTSTAMP: - return ocelot_hwstamp_get(ocelot, port, ifr); - } - } + ocelot_hwstamp_get(ocelot, port, cfg); - return phy_mii_ioctl(dev->phydev, ifr, cmd); + return 0; +} + +static int ocelot_port_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->port.index; + + return ocelot_hwstamp_set(ocelot, port, cfg, extack); } static int ocelot_change_mtu(struct net_device *dev, int new_mtu) @@ -917,6 +924,8 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_set_features = ocelot_set_features, .ndo_setup_tc = ocelot_setup_tc, .ndo_eth_ioctl = ocelot_ioctl, + .ndo_hwtstamp_get = ocelot_port_hwtstamp_get, + .ndo_hwtstamp_set = ocelot_port_hwtstamp_set, }; struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index cc1088988da0..88b5422cc2a0 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -211,11 +211,6 @@ int ocelot_ptp_enable(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_PEROUT: - /* Reject requests with unsupported flags */ - if (rq->perout.flags & ~(PTP_PEROUT_DUTY_CYCLE | - PTP_PEROUT_PHASE)) - return -EOPNOTSUPP; - pin = ptp_find_pin(ocelot->ptp_clock, PTP_PF_PEROUT, rq->perout.index); if (pin == 0) @@ -519,47 +514,42 @@ static int ocelot_ptp_tx_type_to_cmd(int tx_type, int *ptp_cmd) return 0; } -int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) +void ocelot_hwstamp_get(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct hwtstamp_config cfg = {}; switch (ocelot_port->ptp_cmd) { case IFH_REW_OP_TWO_STEP_PTP: - cfg.tx_type = HWTSTAMP_TX_ON; + cfg->tx_type = HWTSTAMP_TX_ON; break; case IFH_REW_OP_ORIGIN_PTP: - cfg.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + cfg->tx_type = HWTSTAMP_TX_ONESTEP_SYNC; break; default: - cfg.tx_type = HWTSTAMP_TX_OFF; + cfg->tx_type = HWTSTAMP_TX_OFF; break; } - cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); - - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + cfg->rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); } EXPORT_SYMBOL(ocelot_hwstamp_get); -int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) +int ocelot_hwstamp_set(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - int ptp_cmd, old_ptp_cmd = ocelot_port->ptp_cmd; bool l2 = false, l4 = false; - struct hwtstamp_config cfg; - bool old_l2, old_l4; + int ptp_cmd; int err; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - /* Tx type sanity check */ - err = ocelot_ptp_tx_type_to_cmd(cfg.tx_type, &ptp_cmd); + err = ocelot_ptp_tx_type_to_cmd(cfg->tx_type, &ptp_cmd); if (err) return err; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: @@ -582,27 +572,15 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) return -ERANGE; } - old_l2 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L2; - old_l4 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L4; - err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); if (err) return err; ocelot_port->ptp_cmd = ptp_cmd; - cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); - - if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) { - err = -EFAULT; - goto out_restore_ptp_traps; - } + cfg->rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); return 0; -out_restore_ptp_traps: - ocelot_setup_ptp_traps(ocelot, port, old_l2, old_l4); - ocelot_port->ptp_cmd = old_ptp_cmd; - return err; } EXPORT_SYMBOL(ocelot_hwstamp_set); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 055b55651a49..498eec8ae61d 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -108,6 +108,8 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .n_ext_ts = 0, .n_per_out = OCELOT_PTP_PINS_NUM, .n_pins = OCELOT_PTP_PINS_NUM, + .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE | + PTP_PEROUT_PHASE, .pps = 0, .gettime64 = ocelot_ptp_gettime64, .settime64 = ocelot_ptp_settime64, diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 05606692e631..dd279788cf9e 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -846,7 +846,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); - i = pci_request_regions(pdev, DRV_NAME); + i = pcim_request_all_regions(pdev, DRV_NAME); if (i) goto err_pci_request_regions; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index f1c6c47564b1..08086eb76996 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -779,7 +779,7 @@ nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, case NFP_NET_META_CSUM: meta->csum_type = CHECKSUM_COMPLETE; meta->csum = - (__force __wsum)__get_unaligned_cpu32(data); + (__force __wsum)get_unaligned((u32 *)data); data += 4; break; case NFP_NET_META_RESYNC_INFO: diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index ebeb6ab4465c..ab3cd06ed63e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -779,7 +779,7 @@ nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, case NFP_NET_META_CSUM: meta->csum_type = CHECKSUM_COMPLETE; meta->csum = - (__force __wsum)__get_unaligned_cpu32(data); + (__force __wsum)get_unaligned((u32 *)data); data += 4; break; case NFP_NET_META_RESYNC_INFO: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 95514fabadf2..28997ddab966 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2538,7 +2538,7 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, nn->dp.num_r_vecs, num_online_cpus()); nn->max_r_vecs = nn->dp.num_r_vecs; - nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools), + nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(*nn->dp.xsk_pools), GFP_KERNEL); if (!nn->dp.xsk_pools) { err = -ENOMEM; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index a2d4336d2766..92f30ff2d631 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -948,64 +948,20 @@ static int ionic_get_tunable(struct net_device *netdev, return 0; } -static int ionic_get_module_info(struct net_device *netdev, - struct ethtool_modinfo *modinfo) - -{ - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_dev *idev = &lif->ionic->idev; - struct ionic_xcvr_status *xcvr; - struct sfp_eeprom_base *sfp; - - xcvr = &idev->port_info->status.xcvr; - sfp = (struct sfp_eeprom_base *) xcvr->sprom; - - /* report the module data type and length */ - switch (sfp->phys_id) { - case SFF8024_ID_SFP: - modinfo->type = ETH_MODULE_SFF_8079; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - break; - case SFF8024_ID_QSFP_8436_8636: - case SFF8024_ID_QSFP28_8636: - case SFF8024_ID_QSFP_PLUS_CMIS: - modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; - break; - default: - netdev_info(netdev, "unknown xcvr type 0x%02x\n", - xcvr->sprom[0]); - modinfo->type = 0; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - break; - } - - return 0; -} - -static int ionic_get_module_eeprom(struct net_device *netdev, - struct ethtool_eeprom *ee, - u8 *data) +static int ionic_do_module_copy(u8 *dst, u8 *src, u32 len) { - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_dev *idev = &lif->ionic->idev; - struct ionic_xcvr_status *xcvr; - char tbuf[sizeof(xcvr->sprom)]; + char tbuf[sizeof_field(struct ionic_xcvr_status, sprom)]; int count = 10; - u32 len; /* The NIC keeps the module prom up-to-date in the DMA space * so we can simply copy the module bytes into the data buffer. */ - xcvr = &idev->port_info->status.xcvr; - len = min_t(u32, sizeof(xcvr->sprom), ee->len); - do { - memcpy(data, &xcvr->sprom[ee->offset], len); - memcpy(tbuf, &xcvr->sprom[ee->offset], len); + memcpy(dst, src, len); + memcpy(tbuf, src, len); /* Let's make sure we got a consistent copy */ - if (!memcmp(data, tbuf, len)) + if (!memcmp(dst, tbuf, len)) break; } while (--count); @@ -1016,6 +972,48 @@ static int ionic_get_module_eeprom(struct net_device *netdev, return 0; } +static int ionic_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; + u32 err = -EINVAL; + u8 *src; + + if (!page_data->length) + return -EINVAL; + + if (page_data->bank != 0) { + NL_SET_ERR_MSG_MOD(extack, "Only bank 0 is supported"); + return -EINVAL; + } + + switch (page_data->page) { + case 0: + src = &idev->port_info->status.xcvr.sprom[page_data->offset]; + break; + case 1: + src = &idev->port_info->sprom_page1[page_data->offset - 128]; + break; + case 2: + src = &idev->port_info->sprom_page2[page_data->offset - 128]; + break; + case 17: + src = &idev->port_info->sprom_page17[page_data->offset - 128]; + break; + default: + return -EOPNOTSUPP; + } + + memset(page_data->data, 0, page_data->length); + err = ionic_do_module_copy(page_data->data, src, page_data->length); + if (err) + return err; + + return page_data->length; +} + static int ionic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { @@ -1161,8 +1159,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .set_rxfh = ionic_set_rxfh, .get_tunable = ionic_get_tunable, .set_tunable = ionic_set_tunable, - .get_module_info = ionic_get_module_info, - .get_module_eeprom = ionic_get_module_eeprom, + .get_module_eeprom_by_page = ionic_get_module_eeprom_by_page, .get_pauseparam = ionic_get_pauseparam, .set_pauseparam = ionic_set_pauseparam, .get_fecparam = ionic_get_fecparam, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 830c8adbfbee..f1ddbe9994a3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -2839,6 +2839,10 @@ union ionic_port_identity { * @status: Port status data * @stats: Port statistics data * @mgmt_stats: Port management statistics data + * @sprom_epage: Extended Transceiver sprom + * @sprom_page1: Extended Transceiver sprom, page 1 + * @sprom_page2: Extended Transceiver sprom, page 2 + * @sprom_page17: Extended Transceiver sprom, page 17 * @rsvd: reserved byte(s) * @pb_stats: uplink pb drop stats */ @@ -2849,8 +2853,17 @@ struct ionic_port_info { struct ionic_port_stats stats; struct ionic_mgmt_port_stats mgmt_stats; }; - /* room for pb_stats to start at 2k offset */ - u8 rsvd[760]; + union { + u8 sprom_epage[384]; + struct { + u8 sprom_page1[128]; + u8 sprom_page2[128]; + u8 sprom_page17[128]; + }; + }; + u8 rsvd[376]; + + /* pb_stats must start at 2k offset */ struct ionic_port_pb_stats pb_stats; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index b7def3b54937..016b575861b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -939,7 +939,6 @@ u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc); u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); /* doorbell recovery mechanism */ -void qed_db_recovery_dp(struct qed_hwfn *p_hwfn); void qed_db_recovery_execute(struct qed_hwfn *p_hwfn); bool qed_edpm_enabled(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h index f6cd1b3efdfd..27e91d0d39f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h @@ -1305,37 +1305,6 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, char *results_buf); /** - * qed_print_mcp_trace_results_cont(): Prints MCP Trace results, and - * keeps the MCP trace meta data allocated, to support continuous MCP Trace - * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should - * be called to free the meta data. - * - * @p_hwfn: HW device data. - * @dump_buf: MVP trace dump buffer, starting from the header. - * @results_buf: Buffer for printing the mcp trace results. - * - * Return: Error if the parsing fails, ok otherwise. - */ -enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn, - u32 *dump_buf, - char *results_buf); - -/** - * qed_print_mcp_trace_line(): Prints MCP Trace results for a single line - * - * @p_hwfn: HW device data. - * @dump_buf: MCP trace dump buffer, starting from the header. - * @num_dumped_bytes: Number of bytes that were dumped. - * @results_buf: Buffer for printing the mcp trace results. - * - * Return: Error if the parsing fails, ok otherwise. - */ -enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn, - u8 *dump_buf, - u32 num_dumped_bytes, - char *results_buf); - -/** * qed_mcp_trace_free_meta_data(): Frees the MCP Trace meta data. * Should be called after continuous MCP Trace parsing. * diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 464a72afb758..9c3d3dd2f847 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -7614,31 +7614,6 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, results_buf, &parsed_buf_size, true); } -enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn, - u32 *dump_buf, - char *results_buf) -{ - u32 parsed_buf_size; - - return qed_parse_mcp_trace_dump(p_hwfn, dump_buf, results_buf, - &parsed_buf_size, false); -} - -enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn, - u8 *dump_buf, - u32 num_dumped_bytes, - char *results_buf) -{ - u32 parsed_results_bytes; - - return qed_parse_mcp_trace_buf(p_hwfn, - dump_buf, - num_dumped_bytes, - 0, - num_dumped_bytes, - results_buf, &parsed_results_bytes); -} - /* Frees the specified MCP Trace meta data */ void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 86a93cac2647..9659ce5b0712 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -255,25 +255,6 @@ static void qed_db_recovery_teardown(struct qed_hwfn *p_hwfn) p_hwfn->db_recovery_info.db_recovery_counter = 0; } -/* Print the content of the doorbell recovery mechanism */ -void qed_db_recovery_dp(struct qed_hwfn *p_hwfn) -{ - struct qed_db_recovery_entry *db_entry = NULL; - - DP_NOTICE(p_hwfn, - "Displaying doorbell recovery database. Counter was %d\n", - p_hwfn->db_recovery_info.db_recovery_counter); - - /* Protect the list */ - spin_lock_bh(&p_hwfn->db_recovery_info.lock); - list_for_each_entry(db_entry, - &p_hwfn->db_recovery_info.list, list_entry) { - qed_db_recovery_dp_entry(p_hwfn, db_entry, "Printing"); - } - - spin_unlock_bh(&p_hwfn->db_recovery_info.lock); -} - /* Ring the doorbell of a single doorbell recovery entry */ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, struct qed_db_recovery_entry *db_entry) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index ed1a84542ad2..10e355397cee 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -2666,58 +2666,6 @@ void qed_gft_config(struct qed_hwfn *p_hwfn, void qed_enable_context_validation(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); -/** - * qed_calc_session_ctx_validation(): Calcualte validation byte for - * session context. - * - * @p_ctx_mem: Pointer to context memory. - * @ctx_size: Context size. - * @ctx_type: Context type. - * @cid: Context cid. - * - * Return: Void. - */ -void qed_calc_session_ctx_validation(void *p_ctx_mem, - u16 ctx_size, u8 ctx_type, u32 cid); - -/** - * qed_calc_task_ctx_validation(): Calcualte validation byte for task - * context. - * - * @p_ctx_mem: Pointer to context memory. - * @ctx_size: Context size. - * @ctx_type: Context type. - * @tid: Context tid. - * - * Return: Void. - */ -void qed_calc_task_ctx_validation(void *p_ctx_mem, - u16 ctx_size, u8 ctx_type, u32 tid); - -/** - * qed_memset_session_ctx(): Memset session context to 0 while - * preserving validation bytes. - * - * @p_ctx_mem: Pointer to context memory. - * @ctx_size: Size to initialzie. - * @ctx_type: Context type. - * - * Return: Void. - */ -void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type); - -/** - * qed_memset_task_ctx(): Memset task context to 0 while preserving - * validation bytes. - * - * @p_ctx_mem: Pointer to context memory. - * @ctx_size: size to initialzie. - * @ctx_type: context type. - * - * Return: Void. - */ -void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type); - #define NUM_STORMS 6 /** diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 9e5f0dbc8a07..9907973399dc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -69,17 +69,6 @@ int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn) return 0; } -void qed_ptt_invalidate(struct qed_hwfn *p_hwfn) -{ - struct qed_ptt *p_ptt; - int i; - - for (i = 0; i < PXP_EXTERNAL_BAR_PF_WINDOW_NUM; i++) { - p_ptt = &p_hwfn->p_ptt_pool->ptts[i]; - p_ptt->pxp.offset = QED_BAR_INVALID_OFFSET; - } -} - void qed_ptt_pool_free(struct qed_hwfn *p_hwfn) { kfree(p_hwfn->p_ptt_pool); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h index e535983ce21b..3c98f58a184f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h @@ -62,15 +62,6 @@ enum _dmae_cmd_crc_mask { void qed_gtt_init(struct qed_hwfn *p_hwfn); /** - * qed_ptt_invalidate(): Forces all ptt entries to be re-configured - * - * @p_hwfn: HW device data. - * - * Return: Void. - */ -void qed_ptt_invalidate(struct qed_hwfn *p_hwfn); - -/** * qed_ptt_pool_alloc(): Allocate and initialize PTT pool. * * @p_hwfn: HW device data. diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c index 407029a36fa1..aa20bb8caa9a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c @@ -18,16 +18,6 @@ #define CDU_VALIDATION_DEFAULT_CFG CDU_CONTEXT_VALIDATION_DEFAULT_CFG -static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES] = { - {400, 336, 352, 368, 304, 384, 416, 352}, /* region 3 offsets */ - {528, 496, 416, 512, 448, 512, 544, 480}, /* region 4 offsets */ - {608, 544, 496, 576, 576, 592, 624, 560} /* region 5 offsets */ -}; - -static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES] = { - {240, 240, 112, 0, 0, 0, 0, 96} /* region 1 offsets */ -}; - /* General constants */ #define QM_PQ_MEM_4KB(pq_size) (pq_size ? DIV_ROUND_UP((pq_size + 1) * \ QM_PQ_ELEMENT_SIZE, \ @@ -1576,134 +1566,6 @@ void qed_gft_config(struct qed_hwfn *p_hwfn, qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 1); } -DECLARE_CRC8_TABLE(cdu_crc8_table); - -/* Calculate and return CDU validation byte per connection type/region/cid */ -static u8 qed_calc_cdu_validation_byte(u8 conn_type, u8 region, u32 cid) -{ - const u8 validation_cfg = CDU_VALIDATION_DEFAULT_CFG; - u8 crc, validation_byte = 0; - static u8 crc8_table_valid; /* automatically initialized to 0 */ - u32 validation_string = 0; - __be32 data_to_crc; - - if (!crc8_table_valid) { - crc8_populate_msb(cdu_crc8_table, 0x07); - crc8_table_valid = 1; - } - - /* The CRC is calculated on the String-to-compress: - * [31:8] = {CID[31:20],CID[11:0]} - * [7:4] = Region - * [3:0] = Type - */ - if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_CID) & 1) - validation_string |= (cid & 0xFFF00000) | ((cid & 0xFFF) << 8); - - if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_REGION) & 1) - validation_string |= ((region & 0xF) << 4); - - if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_TYPE) & 1) - validation_string |= (conn_type & 0xF); - - /* Convert to big-endian and calculate CRC8 */ - data_to_crc = cpu_to_be32(validation_string); - crc = crc8(cdu_crc8_table, (u8 *)&data_to_crc, sizeof(data_to_crc), - CRC8_INIT_VALUE); - - /* The validation byte [7:0] is composed: - * for type A validation - * [7] = active configuration bit - * [6:0] = crc[6:0] - * - * for type B validation - * [7] = active configuration bit - * [6:3] = connection_type[3:0] - * [2:0] = crc[2:0] - */ - validation_byte |= - ((validation_cfg >> - CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE) & 1) << 7; - - if ((validation_cfg >> - CDU_CONTEXT_VALIDATION_CFG_VALIDATION_TYPE_SHIFT) & 1) - validation_byte |= ((conn_type & 0xF) << 3) | (crc & 0x7); - else - validation_byte |= crc & 0x7F; - - return validation_byte; -} - -/* Calcualte and set validation bytes for session context */ -void qed_calc_session_ctx_validation(void *p_ctx_mem, - u16 ctx_size, u8 ctx_type, u32 cid) -{ - u8 *x_val_ptr, *t_val_ptr, *u_val_ptr, *p_ctx; - - p_ctx = (u8 * const)p_ctx_mem; - x_val_ptr = &p_ctx[con_region_offsets[0][ctx_type]]; - t_val_ptr = &p_ctx[con_region_offsets[1][ctx_type]]; - u_val_ptr = &p_ctx[con_region_offsets[2][ctx_type]]; - - memset(p_ctx, 0, ctx_size); - - *x_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 3, cid); - *t_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 4, cid); - *u_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 5, cid); -} - -/* Calcualte and set validation bytes for task context */ -void qed_calc_task_ctx_validation(void *p_ctx_mem, - u16 ctx_size, u8 ctx_type, u32 tid) -{ - u8 *p_ctx, *region1_val_ptr; - - p_ctx = (u8 * const)p_ctx_mem; - region1_val_ptr = &p_ctx[task_region_offsets[0][ctx_type]]; - - memset(p_ctx, 0, ctx_size); - - *region1_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 1, tid); -} - -/* Memset session context to 0 while preserving validation bytes */ -void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type) -{ - u8 *x_val_ptr, *t_val_ptr, *u_val_ptr, *p_ctx; - u8 x_val, t_val, u_val; - - p_ctx = (u8 * const)p_ctx_mem; - x_val_ptr = &p_ctx[con_region_offsets[0][ctx_type]]; - t_val_ptr = &p_ctx[con_region_offsets[1][ctx_type]]; - u_val_ptr = &p_ctx[con_region_offsets[2][ctx_type]]; - - x_val = *x_val_ptr; - t_val = *t_val_ptr; - u_val = *u_val_ptr; - - memset(p_ctx, 0, ctx_size); - - *x_val_ptr = x_val; - *t_val_ptr = t_val; - *u_val_ptr = u_val; -} - -/* Memset task context to 0 while preserving validation bytes */ -void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type) -{ - u8 *p_ctx, *region1_val_ptr; - u8 region1_val; - - p_ctx = (u8 * const)p_ctx_mem; - region1_val_ptr = &p_ctx[task_region_offsets[0][ctx_type]]; - - region1_val = *region1_val_ptr; - - memset(p_ctx, 0, ctx_size); - - *region1_val_ptr = region1_val; -} - /* Enable and configure context validation */ void qed_enable_context_validation(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index 9210ff360fdc..a4434eb38950 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -52,7 +52,6 @@ config QCOM_EMAC depends on HAS_DMA && HAS_IOMEM select CRC32 select PHYLIB - select MDIO_DEVRES help This driver supports the Qualcomm Technologies, Inc. Gigabit Ethernet Media Access Controller (EMAC). The controller diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 7a194a8ab989..f05231030925 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -64,16 +64,14 @@ enum mac_version { /* support for RTL_GIGA_MAC_VER_50 has been removed */ RTL_GIGA_MAC_VER_51, RTL_GIGA_MAC_VER_52, - RTL_GIGA_MAC_VER_53, /* support for RTL_GIGA_MAC_VER_60 has been removed */ RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, RTL_GIGA_MAC_VER_64, - RTL_GIGA_MAC_VER_65, RTL_GIGA_MAC_VER_66, RTL_GIGA_MAC_VER_70, - RTL_GIGA_MAC_VER_71, - RTL_GIGA_MAC_NONE + RTL_GIGA_MAC_NONE, + RTL_GIGA_MAC_VER_LAST = RTL_GIGA_MAC_NONE - 1 }; struct rtl8169_private; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4eebd9cb40a3..7bf71a675362 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -91,61 +91,114 @@ #define JUMBO_9K (9 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) #define JUMBO_16K (SZ_16K - VLAN_ETH_HLEN - ETH_FCS_LEN) -static const struct { +static const struct rtl_chip_info { + u16 mask; + u16 val; + enum mac_version mac_version; const char *name; const char *fw_name; } rtl_chip_infos[] = { - /* PCI devices. */ - [RTL_GIGA_MAC_VER_02] = {"RTL8169s" }, - [RTL_GIGA_MAC_VER_03] = {"RTL8110s" }, - [RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb" }, - [RTL_GIGA_MAC_VER_05] = {"RTL8169sc/8110sc" }, - [RTL_GIGA_MAC_VER_06] = {"RTL8169sc/8110sc" }, - /* PCI-E devices. */ - [RTL_GIGA_MAC_VER_07] = {"RTL8102e" }, - [RTL_GIGA_MAC_VER_08] = {"RTL8102e" }, - [RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e" }, - [RTL_GIGA_MAC_VER_10] = {"RTL8101e/RTL8100e" }, - [RTL_GIGA_MAC_VER_14] = {"RTL8401" }, - [RTL_GIGA_MAC_VER_17] = {"RTL8168b/8111b" }, - [RTL_GIGA_MAC_VER_18] = {"RTL8168cp/8111cp" }, - [RTL_GIGA_MAC_VER_19] = {"RTL8168c/8111c" }, - [RTL_GIGA_MAC_VER_20] = {"RTL8168c/8111c" }, - [RTL_GIGA_MAC_VER_21] = {"RTL8168c/8111c" }, - [RTL_GIGA_MAC_VER_22] = {"RTL8168c/8111c" }, - [RTL_GIGA_MAC_VER_23] = {"RTL8168cp/8111cp" }, - [RTL_GIGA_MAC_VER_24] = {"RTL8168cp/8111cp" }, - [RTL_GIGA_MAC_VER_25] = {"RTL8168d/8111d", FIRMWARE_8168D_1}, - [RTL_GIGA_MAC_VER_26] = {"RTL8168d/8111d", FIRMWARE_8168D_2}, - [RTL_GIGA_MAC_VER_28] = {"RTL8168dp/8111dp" }, - [RTL_GIGA_MAC_VER_29] = {"RTL8105e", FIRMWARE_8105E_1}, - [RTL_GIGA_MAC_VER_30] = {"RTL8105e", FIRMWARE_8105E_1}, - [RTL_GIGA_MAC_VER_31] = {"RTL8168dp/8111dp" }, - [RTL_GIGA_MAC_VER_32] = {"RTL8168e/8111e", FIRMWARE_8168E_1}, - [RTL_GIGA_MAC_VER_33] = {"RTL8168e/8111e", FIRMWARE_8168E_2}, - [RTL_GIGA_MAC_VER_34] = {"RTL8168evl/8111evl", FIRMWARE_8168E_3}, - [RTL_GIGA_MAC_VER_35] = {"RTL8168f/8111f", FIRMWARE_8168F_1}, - [RTL_GIGA_MAC_VER_36] = {"RTL8168f/8111f", FIRMWARE_8168F_2}, - [RTL_GIGA_MAC_VER_37] = {"RTL8402", FIRMWARE_8402_1 }, - [RTL_GIGA_MAC_VER_38] = {"RTL8411", FIRMWARE_8411_1 }, - [RTL_GIGA_MAC_VER_39] = {"RTL8106e", FIRMWARE_8106E_1}, - [RTL_GIGA_MAC_VER_40] = {"RTL8168g/8111g", FIRMWARE_8168G_2}, - [RTL_GIGA_MAC_VER_42] = {"RTL8168gu/8111gu", FIRMWARE_8168G_3}, - [RTL_GIGA_MAC_VER_43] = {"RTL8106eus", FIRMWARE_8106E_2}, - [RTL_GIGA_MAC_VER_44] = {"RTL8411b", FIRMWARE_8411_2 }, - [RTL_GIGA_MAC_VER_46] = {"RTL8168h/8111h", FIRMWARE_8168H_2}, - [RTL_GIGA_MAC_VER_48] = {"RTL8107e", FIRMWARE_8107E_2}, - [RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" }, - [RTL_GIGA_MAC_VER_52] = {"RTL8168fp/RTL8117", FIRMWARE_8168FP_3}, - [RTL_GIGA_MAC_VER_53] = {"RTL8168fp/RTL8117", }, - [RTL_GIGA_MAC_VER_61] = {"RTL8125A", FIRMWARE_8125A_3}, - /* reserve 62 for CFG_METHOD_4 in the vendor driver */ - [RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2}, - [RTL_GIGA_MAC_VER_64] = {"RTL8125D", FIRMWARE_8125D_1}, - [RTL_GIGA_MAC_VER_65] = {"RTL8125D", FIRMWARE_8125D_2}, - [RTL_GIGA_MAC_VER_66] = {"RTL8125BP", FIRMWARE_8125BP_2}, - [RTL_GIGA_MAC_VER_70] = {"RTL8126A", FIRMWARE_8126A_2}, - [RTL_GIGA_MAC_VER_71] = {"RTL8126A", FIRMWARE_8126A_3}, + /* 8126A family. */ + { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_70, "RTL8126A", FIRMWARE_8126A_3 }, + { 0x7cf, 0x649, RTL_GIGA_MAC_VER_70, "RTL8126A", FIRMWARE_8126A_2 }, + + /* 8125BP family. */ + { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66, "RTL8125BP", FIRMWARE_8125BP_2 }, + + /* 8125D family. */ + { 0x7cf, 0x689, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_2 }, + { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64, "RTL8125D", FIRMWARE_8125D_1 }, + + /* 8125B family. */ + { 0x7cf, 0x641, RTL_GIGA_MAC_VER_63, "RTL8125B", FIRMWARE_8125B_2 }, + + /* 8125A family. */ + { 0x7cf, 0x609, RTL_GIGA_MAC_VER_61, "RTL8125A", FIRMWARE_8125A_3 }, + + /* RTL8117 */ + { 0x7cf, 0x54b, RTL_GIGA_MAC_VER_52, "RTL8168fp/RTL8117" }, + { 0x7cf, 0x54a, RTL_GIGA_MAC_VER_52, "RTL8168fp/RTL8117", + FIRMWARE_8168FP_3 }, + + /* 8168EP family. */ + { 0x7cf, 0x502, RTL_GIGA_MAC_VER_51, "RTL8168ep/8111ep" }, + + /* 8168H family. */ + { 0x7cf, 0x541, RTL_GIGA_MAC_VER_46, "RTL8168h/8111h", + FIRMWARE_8168H_2 }, + /* Realtek calls it RTL8168M, but it's handled like RTL8168H */ + { 0x7cf, 0x6c0, RTL_GIGA_MAC_VER_46, "RTL8168M", FIRMWARE_8168H_2 }, + + /* 8168G family. */ + { 0x7cf, 0x5c8, RTL_GIGA_MAC_VER_44, "RTL8411b", FIRMWARE_8411_2 }, + { 0x7cf, 0x509, RTL_GIGA_MAC_VER_42, "RTL8168gu/8111gu", + FIRMWARE_8168G_3 }, + { 0x7cf, 0x4c0, RTL_GIGA_MAC_VER_40, "RTL8168g/8111g", + FIRMWARE_8168G_2 }, + + /* 8168F family. */ + { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38, "RTL8411", FIRMWARE_8411_1 }, + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36, "RTL8168f/8111f", + FIRMWARE_8168F_2 }, + { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35, "RTL8168f/8111f", + FIRMWARE_8168F_1 }, + + /* 8168E family. */ + { 0x7c8, 0x2c8, RTL_GIGA_MAC_VER_34, "RTL8168evl/8111evl", + FIRMWARE_8168E_3 }, + { 0x7cf, 0x2c1, RTL_GIGA_MAC_VER_32, "RTL8168e/8111e", + FIRMWARE_8168E_1 }, + { 0x7c8, 0x2c0, RTL_GIGA_MAC_VER_33, "RTL8168e/8111e", + FIRMWARE_8168E_2 }, + + /* 8168D family. */ + { 0x7cf, 0x281, RTL_GIGA_MAC_VER_25, "RTL8168d/8111d", + FIRMWARE_8168D_1 }, + { 0x7c8, 0x280, RTL_GIGA_MAC_VER_26, "RTL8168d/8111d", + FIRMWARE_8168D_2 }, + + /* 8168DP family. */ + { 0x7cf, 0x28a, RTL_GIGA_MAC_VER_28, "RTL8168dp/8111dp" }, + { 0x7cf, 0x28b, RTL_GIGA_MAC_VER_31, "RTL8168dp/8111dp" }, + + /* 8168C family. */ + { 0x7cf, 0x3c9, RTL_GIGA_MAC_VER_23, "RTL8168cp/8111cp" }, + { 0x7cf, 0x3c8, RTL_GIGA_MAC_VER_18, "RTL8168cp/8111cp" }, + { 0x7c8, 0x3c8, RTL_GIGA_MAC_VER_24, "RTL8168cp/8111cp" }, + { 0x7cf, 0x3c0, RTL_GIGA_MAC_VER_19, "RTL8168c/8111c" }, + { 0x7cf, 0x3c2, RTL_GIGA_MAC_VER_20, "RTL8168c/8111c" }, + { 0x7cf, 0x3c3, RTL_GIGA_MAC_VER_21, "RTL8168c/8111c" }, + { 0x7c8, 0x3c0, RTL_GIGA_MAC_VER_22, "RTL8168c/8111c" }, + + /* 8168B family. */ + { 0x7c8, 0x380, RTL_GIGA_MAC_VER_17, "RTL8168b/8111b" }, + /* This one is very old and rare, support has been removed. + * { 0x7c8, 0x300, RTL_GIGA_MAC_VER_11, "RTL8168b/8111b" }, + */ + + /* 8101 family. */ + { 0x7c8, 0x448, RTL_GIGA_MAC_VER_39, "RTL8106e", FIRMWARE_8106E_1 }, + { 0x7c8, 0x440, RTL_GIGA_MAC_VER_37, "RTL8402", FIRMWARE_8402_1 }, + { 0x7cf, 0x409, RTL_GIGA_MAC_VER_29, "RTL8105e", FIRMWARE_8105E_1 }, + { 0x7c8, 0x408, RTL_GIGA_MAC_VER_30, "RTL8105e", FIRMWARE_8105E_1 }, + { 0x7cf, 0x349, RTL_GIGA_MAC_VER_08, "RTL8102e" }, + { 0x7cf, 0x249, RTL_GIGA_MAC_VER_08, "RTL8102e" }, + { 0x7cf, 0x348, RTL_GIGA_MAC_VER_07, "RTL8102e" }, + { 0x7cf, 0x248, RTL_GIGA_MAC_VER_07, "RTL8102e" }, + { 0x7cf, 0x240, RTL_GIGA_MAC_VER_14, "RTL8401" }, + { 0x7c8, 0x348, RTL_GIGA_MAC_VER_09, "RTL8102e/RTL8103e" }, + { 0x7c8, 0x248, RTL_GIGA_MAC_VER_09, "RTL8102e/RTL8103e" }, + { 0x7c8, 0x340, RTL_GIGA_MAC_VER_10, "RTL8101e/RTL8100e" }, + + /* 8110 family. */ + { 0xfc8, 0x980, RTL_GIGA_MAC_VER_06, "RTL8169sc/8110sc" }, + { 0xfc8, 0x180, RTL_GIGA_MAC_VER_05, "RTL8169sc/8110sc" }, + { 0xfc8, 0x100, RTL_GIGA_MAC_VER_04, "RTL8169sb/8110sb" }, + { 0xfc8, 0x040, RTL_GIGA_MAC_VER_03, "RTL8110s" }, + { 0xfc8, 0x008, RTL_GIGA_MAC_VER_02, "RTL8169s" }, + + /* Catch-all */ + { 0x000, 0x000, RTL_GIGA_MAC_NONE } }; static const struct pci_device_id rtl8169_pci_tbl[] = { @@ -777,7 +830,7 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_34 && tp->mac_version != RTL_GIGA_MAC_VER_39 && - tp->mac_version <= RTL_GIGA_MAC_VER_53; + tp->mac_version <= RTL_GIGA_MAC_VER_52; } static bool rtl_supports_eee(struct rtl8169_private *tp) @@ -945,9 +998,7 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx, static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) { /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ - if (type == ERIAR_OOB && - (tp->mac_version == RTL_GIGA_MAC_VER_52 || - tp->mac_version == RTL_GIGA_MAC_VER_53)) + if (type == ERIAR_OOB && tp->mac_version == RTL_GIGA_MAC_VER_52) *cmd |= 0xf70 << 18; } @@ -1236,7 +1287,7 @@ static void rtl_writephy(struct rtl8169_private *tp, int location, int val) case RTL_GIGA_MAC_VER_31: r8168dp_2_mdio_write(tp, location, val); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_LAST: r8168g_mdio_write(tp, location, val); break; default: @@ -1251,7 +1302,7 @@ static int rtl_readphy(struct rtl8169_private *tp, int location) case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: return r8168dp_2_mdio_read(tp, location); - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_LAST: return r8168g_mdio_read(tp, location); default: return r8169_mdio_read(tp, location); @@ -1447,7 +1498,7 @@ static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: return RTL_DASH_DP; - case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_52: return RTL_DASH_EP; case RTL_GIGA_MAC_VER_66: return RTL_DASH_25_BP; @@ -1603,7 +1654,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) break; case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_LAST: r8169_mod_reg8_cond(tp, Config2, PME_SIGNAL, wolopts); break; default: @@ -2076,7 +2127,7 @@ static void rtl_set_eee_txidle_timer(struct rtl8169_private *tp) tp->tx_lpi_timer = timer_val; r8168_mac_ocp_write(tp, 0xe048, timer_val); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: tp->tx_lpi_timer = timer_val; RTL_W16(tp, EEE_TXIDLE_TIMER_8125, timer_val); break; @@ -2265,151 +2316,30 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { .get_eth_ctrl_stats = rtl8169_get_eth_ctrl_stats, }; -static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) +static const struct rtl_chip_info *rtl8169_get_chip_version(u16 xid, bool gmii) { - /* - * The driver currently handles the 8168Bf and the 8168Be identically - * but they can be identified more specifically through the test below - * if needed: - * - * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be - * - * Same thing for the 8101Eb and the 8101Ec: - * - * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec - */ - static const struct rtl_mac_info { - u16 mask; - u16 val; - enum mac_version ver; - } mac_info[] = { - /* 8126A family. */ - { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_71 }, - { 0x7cf, 0x649, RTL_GIGA_MAC_VER_70 }, - - /* 8125BP family. */ - { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66 }, - - /* 8125D family. */ - { 0x7cf, 0x689, RTL_GIGA_MAC_VER_65 }, - { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64 }, - - /* 8125B family. */ - { 0x7cf, 0x641, RTL_GIGA_MAC_VER_63 }, - - /* 8125A family. */ - { 0x7cf, 0x609, RTL_GIGA_MAC_VER_61 }, - /* It seems only XID 609 made it to the mass market. - * { 0x7cf, 0x608, RTL_GIGA_MAC_VER_60 }, - * { 0x7c8, 0x608, RTL_GIGA_MAC_VER_61 }, - */ - - /* RTL8117 */ - { 0x7cf, 0x54b, RTL_GIGA_MAC_VER_53 }, - { 0x7cf, 0x54a, RTL_GIGA_MAC_VER_52 }, - - /* 8168EP family. */ - { 0x7cf, 0x502, RTL_GIGA_MAC_VER_51 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x501, RTL_GIGA_MAC_VER_50 }, - * { 0x7cf, 0x500, RTL_GIGA_MAC_VER_49 }, - */ - - /* 8168H family. */ - { 0x7cf, 0x541, RTL_GIGA_MAC_VER_46 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x540, RTL_GIGA_MAC_VER_45 }, - */ - /* Realtek calls it RTL8168M, but it's handled like RTL8168H */ - { 0x7cf, 0x6c0, RTL_GIGA_MAC_VER_46 }, - - /* 8168G family. */ - { 0x7cf, 0x5c8, RTL_GIGA_MAC_VER_44 }, - { 0x7cf, 0x509, RTL_GIGA_MAC_VER_42 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x4c1, RTL_GIGA_MAC_VER_41 }, - */ - { 0x7cf, 0x4c0, RTL_GIGA_MAC_VER_40 }, - - /* 8168F family. */ - { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, - - /* 8168E family. */ - { 0x7c8, 0x2c8, RTL_GIGA_MAC_VER_34 }, - { 0x7cf, 0x2c1, RTL_GIGA_MAC_VER_32 }, - { 0x7c8, 0x2c0, RTL_GIGA_MAC_VER_33 }, - - /* 8168D family. */ - { 0x7cf, 0x281, RTL_GIGA_MAC_VER_25 }, - { 0x7c8, 0x280, RTL_GIGA_MAC_VER_26 }, - - /* 8168DP family. */ - /* It seems this early RTL8168dp version never made it to - * the wild. Support has been removed. - * { 0x7cf, 0x288, RTL_GIGA_MAC_VER_27 }, - */ - { 0x7cf, 0x28a, RTL_GIGA_MAC_VER_28 }, - { 0x7cf, 0x28b, RTL_GIGA_MAC_VER_31 }, - - /* 8168C family. */ - { 0x7cf, 0x3c9, RTL_GIGA_MAC_VER_23 }, - { 0x7cf, 0x3c8, RTL_GIGA_MAC_VER_18 }, - { 0x7c8, 0x3c8, RTL_GIGA_MAC_VER_24 }, - { 0x7cf, 0x3c0, RTL_GIGA_MAC_VER_19 }, - { 0x7cf, 0x3c2, RTL_GIGA_MAC_VER_20 }, - { 0x7cf, 0x3c3, RTL_GIGA_MAC_VER_21 }, - { 0x7c8, 0x3c0, RTL_GIGA_MAC_VER_22 }, - - /* 8168B family. */ - { 0x7c8, 0x380, RTL_GIGA_MAC_VER_17 }, - /* This one is very old and rare, support has been removed. - * { 0x7c8, 0x300, RTL_GIGA_MAC_VER_11 }, - */ - - /* 8101 family. */ - { 0x7c8, 0x448, RTL_GIGA_MAC_VER_39 }, - { 0x7c8, 0x440, RTL_GIGA_MAC_VER_37 }, - { 0x7cf, 0x409, RTL_GIGA_MAC_VER_29 }, - { 0x7c8, 0x408, RTL_GIGA_MAC_VER_30 }, - { 0x7cf, 0x349, RTL_GIGA_MAC_VER_08 }, - { 0x7cf, 0x249, RTL_GIGA_MAC_VER_08 }, - { 0x7cf, 0x348, RTL_GIGA_MAC_VER_07 }, - { 0x7cf, 0x248, RTL_GIGA_MAC_VER_07 }, - { 0x7cf, 0x240, RTL_GIGA_MAC_VER_14 }, - { 0x7c8, 0x348, RTL_GIGA_MAC_VER_09 }, - { 0x7c8, 0x248, RTL_GIGA_MAC_VER_09 }, - { 0x7c8, 0x340, RTL_GIGA_MAC_VER_10 }, - - /* 8110 family. */ - { 0xfc8, 0x980, RTL_GIGA_MAC_VER_06 }, - { 0xfc8, 0x180, RTL_GIGA_MAC_VER_05 }, - { 0xfc8, 0x100, RTL_GIGA_MAC_VER_04 }, - { 0xfc8, 0x040, RTL_GIGA_MAC_VER_03 }, - { 0xfc8, 0x008, RTL_GIGA_MAC_VER_02 }, - - /* Catch-all */ - { 0x000, 0x000, RTL_GIGA_MAC_NONE } + /* Chips combining a 1Gbps MAC with a 100Mbps PHY */ + static const struct rtl_chip_info rtl8106eus_info = { + .mac_version = RTL_GIGA_MAC_VER_43, + .name = "RTL8106eus", + .fw_name = FIRMWARE_8106E_2, + }; + static const struct rtl_chip_info rtl8107e_info = { + .mac_version = RTL_GIGA_MAC_VER_48, + .name = "RTL8107e", + .fw_name = FIRMWARE_8107E_2, }; - const struct rtl_mac_info *p = mac_info; - enum mac_version ver; + const struct rtl_chip_info *p = rtl_chip_infos; while ((xid & p->mask) != p->val) p++; - ver = p->ver; - if (ver != RTL_GIGA_MAC_NONE && !gmii) { - if (ver == RTL_GIGA_MAC_VER_42) - ver = RTL_GIGA_MAC_VER_43; - else if (ver == RTL_GIGA_MAC_VER_46) - ver = RTL_GIGA_MAC_VER_48; - } + if (p->mac_version == RTL_GIGA_MAC_VER_42 && !gmii) + return &rtl8106eus_info; + if (p->mac_version == RTL_GIGA_MAC_VER_46 && !gmii) + return &rtl8107e_info; - return ver; + return p; } static void rtl_release_firmware(struct rtl8169_private *tp) @@ -2553,13 +2483,13 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; case RTL_GIGA_MAC_VER_61: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_LAST: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST | RX_PAUSE_SLOT_ON); break; @@ -2684,14 +2614,14 @@ DECLARE_RTL_COND(rtl_rxtx_empty_cond_2) static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); break; case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_61: rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); break; - case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_LAST: RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond_2, 100, 42); @@ -2852,10 +2782,23 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr) RTL_R32(tp, CSIDR) : ~0; } +static void rtl_csi_mod(struct rtl8169_private *tp, int addr, + u32 mask, u32 set) +{ + u32 val; + + WARN(addr % 4, "Invalid CSI address %#x\n", addr); + + netdev_notice_once(tp->dev, + "No native access to PCI extended config space, falling back to CSI\n"); + + val = rtl_csi_read(tp, addr); + rtl_csi_write(tp, addr, (val & ~mask) | set); +} + static void rtl_disable_zrxdc_timeout(struct rtl8169_private *tp) { struct pci_dev *pdev = tp->pci_dev; - u32 csi; int rc; u8 val; @@ -2872,16 +2815,12 @@ static void rtl_disable_zrxdc_timeout(struct rtl8169_private *tp) } } - netdev_notice_once(tp->dev, - "No native access to PCI extended config space, falling back to CSI\n"); - csi = rtl_csi_read(tp, RTL_GEN3_RELATED_OFF); - rtl_csi_write(tp, RTL_GEN3_RELATED_OFF, csi & ~RTL_GEN3_ZRXDC_NONCOMPL); + rtl_csi_mod(tp, RTL_GEN3_RELATED_OFF, RTL_GEN3_ZRXDC_NONCOMPL, 0); } static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val) { struct pci_dev *pdev = tp->pci_dev; - u32 csi; /* According to Realtek the value at config space address 0x070f * controls the L0s/L1 entrance latency. We try standard ECAM access @@ -2893,10 +2832,7 @@ static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val) pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL) return; - netdev_notice_once(tp->dev, - "No native access to PCI extended config space, falling back to CSI\n"); - csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff; - rtl_csi_write(tp, 0x070c, csi | val << 24); + rtl_csi_mod(tp, 0x070c, 0xff000000, val << 24); } static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp) @@ -2960,7 +2896,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38: rtl_eri_set_bits(tp, 0xd4, 0x0c00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_LAST: r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80); break; default: @@ -2974,7 +2910,7 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: rtl_eri_clear_bits(tp, 0xd4, 0x1f00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_LAST: r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0); break; default: @@ -3001,7 +2937,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config5(tp, 0, ASPM_en); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) | INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -3012,7 +2947,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: /* reset ephy tx/rx disable timer */ r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0); /* chip can trigger L1.2 */ @@ -3024,7 +2959,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) } else { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0); break; default: @@ -3033,7 +2968,6 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) & ~INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -3753,12 +3687,10 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) /* disable new tx descriptor format */ r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400); else if (tp->mac_version == RTL_GIGA_MAC_VER_63) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200); @@ -3776,8 +3708,7 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030); r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000); r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001); - if (tp->mac_version == RTL_GIGA_MAC_VER_70 || - tp->mac_version == RTL_GIGA_MAC_VER_71) + if (tp->mac_version == RTL_GIGA_MAC_VER_70) r8168_mac_ocp_modify(tp, 0xea1c, 0x0300, 0x0000); else r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000); @@ -3893,14 +3824,11 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1, [RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3, [RTL_GIGA_MAC_VER_52] = rtl_hw_start_8117, - [RTL_GIGA_MAC_VER_53] = rtl_hw_start_8117, [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, [RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d, - [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_66] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a, - [RTL_GIGA_MAC_VER_71] = rtl_hw_start_8126a, }; if (hw_configs[tp->mac_version]) @@ -3917,14 +3845,12 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_64: - case RTL_GIGA_MAC_VER_65: case RTL_GIGA_MAC_VER_66: for (i = 0xa00; i < 0xb00; i += 4) RTL_W32(tp, i, 0); break; case RTL_GIGA_MAC_VER_63: case RTL_GIGA_MAC_VER_70: - case RTL_GIGA_MAC_VER_71: for (i = 0xa00; i < 0xa80; i += 4) RTL_W32(tp, i, 0); RTL_W16(tp, INT_CFG1_8125, 0x0000); @@ -4156,7 +4082,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp) RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_LAST: rtl_enable_rxdvgate(tp); fsleep(2000); break; @@ -4313,7 +4239,7 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp, switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: padto = max_t(unsigned int, padto, ETH_ZLEN); break; default: @@ -5101,10 +5027,8 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) { - pci_wake_from_d3(pdev, tp->saved_wolopts); - pci_set_power_state(pdev, PCI_D3hot); - } + if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) + pci_prepare_to_sleep(pdev); } static void rtl_remove_one(struct pci_dev *pdev) @@ -5358,13 +5282,13 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp) static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: + case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_52: rtl8168ep_stop_cmac(tp); fallthrough; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: rtl_hw_init_8168g(tp); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: rtl_hw_init_8125(tp); break; default: @@ -5389,7 +5313,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: return JUMBO_6K; /* RTL8125/8126 */ - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_LAST: return JUMBO_16K; default: return JUMBO_9K; @@ -5434,9 +5358,9 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp) static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct rtl_chip_info *chip; struct rtl8169_private *tp; int jumbo_max, region, rc; - enum mac_version chipset; struct net_device *dev; u32 txconfig; u16 xid; @@ -5486,12 +5410,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) xid = (txconfig >> 20) & 0xfcf; /* Identify chip attached to board */ - chipset = rtl8169_get_mac_version(xid, tp->supports_gmii); - if (chipset == RTL_GIGA_MAC_NONE) + chip = rtl8169_get_chip_version(xid, tp->supports_gmii); + if (chip->mac_version == RTL_GIGA_MAC_NONE) return dev_err_probe(&pdev->dev, -ENODEV, "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", xid); - tp->mac_version = chipset; + tp->mac_version = chip->mac_version; + tp->fw_name = chip->fw_name; /* Disable ASPM L1 as that cause random device stop working * problems as well as full system hangs for some PCIe devices users. @@ -5596,8 +5521,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_set_irq_mask(tp); - tp->fw_name = rtl_chip_infos[chipset].fw_name; - tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters), &tp->counters_phys_addr, GFP_KERNEL); @@ -5622,7 +5545,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n", - rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq); + chip->name, dev->dev_addr, xid, tp->irq); if (jumbo_max) netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n", diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index cf95e579c65d..5403f8202c79 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -50,6 +50,15 @@ static void r8168g_phy_param(struct phy_device *phydev, u16 parm, phy_restore_page(phydev, oldpage, 0); } +static void rtl8125_phy_param(struct phy_device *phydev, u16 parm, + u16 mask, u16 val) +{ + phy_lock_mdio_bus(phydev); + __phy_write_mmd(phydev, MDIO_MMD_VEND2, 0xb87c, parm); + __phy_modify_mmd(phydev, MDIO_MMD_VEND2, 0xb87e, mask, val); + phy_unlock_mdio_bus(phydev); +} + struct phy_reg { u16 reg; u16 val; @@ -1004,12 +1013,8 @@ static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp, phy_write_paged(phydev, 0xac5, 0x16, 0x01ff); phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030); - phy_write(phydev, 0x1f, 0x0b87); - phy_write(phydev, 0x16, 0x80a2); - phy_write(phydev, 0x17, 0x0153); - phy_write(phydev, 0x16, 0x809c); - phy_write(phydev, 0x17, 0x0153); - phy_write(phydev, 0x1f, 0x0000); + rtl8125_phy_param(phydev, 0x80a2, 0xffff, 0x0153); + rtl8125_phy_param(phydev, 0x809c, 0xffff, 0x0153); phy_write(phydev, 0x1f, 0x0a43); phy_write(phydev, 0x13, 0x81B3); @@ -1061,14 +1066,9 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xac4, 0x13, 0x00f0, 0x0090); phy_modify_paged(phydev, 0xad3, 0x10, 0x0003, 0x0001); - phy_write(phydev, 0x1f, 0x0b87); - phy_write(phydev, 0x16, 0x80f5); - phy_write(phydev, 0x17, 0x760e); - phy_write(phydev, 0x16, 0x8107); - phy_write(phydev, 0x17, 0x360e); - phy_write(phydev, 0x16, 0x8551); - phy_modify(phydev, 0x17, 0xff00, 0x0800); - phy_write(phydev, 0x1f, 0x0000); + rtl8125_phy_param(phydev, 0x80f5, 0xffff, 0x760e); + rtl8125_phy_param(phydev, 0x8107, 0xffff, 0x360e); + rtl8125_phy_param(phydev, 0x8551, 0xff00, 0x0800); phy_modify_paged(phydev, 0xbf0, 0x10, 0xe000, 0xa000); phy_modify_paged(phydev, 0xbf4, 0x13, 0x0f00, 0x0300); @@ -1110,12 +1110,8 @@ static void rtl8125bp_hw_phy_config(struct rtl8169_private *tp, r8168g_phy_param(phydev, 0x8010, 0x0800, 0x0000); - phy_write(phydev, 0x1f, 0x0b87); - phy_write(phydev, 0x16, 0x8088); - phy_modify(phydev, 0x17, 0xff00, 0x9000); - phy_write(phydev, 0x16, 0x808f); - phy_modify(phydev, 0x17, 0xff00, 0x9000); - phy_write(phydev, 0x1f, 0x0000); + rtl8125_phy_param(phydev, 0x8088, 0xff00, 0x9000); + rtl8125_phy_param(phydev, 0x808f, 0xff00, 0x9000); r8168g_phy_param(phydev, 0x8174, 0x2000, 0x1800); @@ -1180,14 +1176,11 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config, [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config, [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config, - [RTL_GIGA_MAC_VER_53] = rtl8117_hw_phy_config, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, [RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config, - [RTL_GIGA_MAC_VER_65] = rtl8125d_hw_phy_config, [RTL_GIGA_MAC_VER_66] = rtl8125bp_hw_phy_config, [RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config, - [RTL_GIGA_MAC_VER_71] = rtl8126a_hw_phy_config, }; if (phy_configs[ver]) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 2bbfcad613ab..498cfe4d0cac 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -170,6 +170,7 @@ enum rtase_registers { #define RTASE_TC_MODE_MASK GENMASK(11, 10) RTASE_TOKSEL = 0x2046, + RTASE_TXQCRDT_0 = 0x2500, RTASE_RFIFONFULL = 0x4406, RTASE_INT_MITI_TX = 0x0A00, RTASE_INT_MITI_RX = 0x0A80, @@ -259,6 +260,12 @@ union rtase_rx_desc { #define RTASE_VLAN_TAG_MASK GENMASK(15, 0) #define RTASE_RX_PKT_SIZE_MASK GENMASK(13, 0) +/* txqos hardware definitions */ +#define RTASE_1T_CLOCK 64 +#define RTASE_1T_POWER 10000000 +#define RTASE_IDLESLOPE_INT_SHIFT 25 +#define RTASE_IDLESLOPE_INT_MASK GENMASK(31, 25) + #define RTASE_IVEC_NAME_SIZE (IFNAMSIZ + 10) struct rtase_int_vector { @@ -294,6 +301,13 @@ struct rtase_ring { u64 alloc_fail; }; +struct rtase_txqos { + int hicredit; + int locredit; + int idleslope; + int sendslope; +}; + struct rtase_stats { u64 tx_dropped; u64 rx_dropped; @@ -313,6 +327,7 @@ struct rtase_private { struct page_pool *page_pool; struct rtase_ring tx_ring[RTASE_NUM_TX_QUEUE]; + struct rtase_txqos tx_qos[RTASE_NUM_TX_QUEUE]; struct rtase_ring rx_ring[RTASE_NUM_RX_QUEUE]; struct rtase_counters *tally_vaddr; dma_addr_t tally_paddr; diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 55b8d3666153..0efe7668e498 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1114,7 +1114,7 @@ static int rtase_open(struct net_device *dev) /* request other interrupts to handle multiqueue */ for (i = 1; i < tp->int_nums; i++) { ivec = &tp->int_vector[i]; - snprintf(ivec->name, sizeof(ivec->name), "%s_int%i", + snprintf(ivec->name, sizeof(ivec->name), "%s_int%u", tp->dev->name, i); ret = request_irq(ivec->irq, rtase_q_interrupt, 0, ivec->name, ivec); @@ -1661,6 +1661,65 @@ static void rtase_get_stats64(struct net_device *dev, stats->rx_length_errors = tp->stats.rx_length_errors; } +static void rtase_set_hw_cbs(const struct rtase_private *tp, u32 queue) +{ + u32 idle = tp->tx_qos[queue].idleslope * RTASE_1T_CLOCK; + u32 val, i; + + val = u32_encode_bits(idle / RTASE_1T_POWER, RTASE_IDLESLOPE_INT_MASK); + idle %= RTASE_1T_POWER; + + for (i = 1; i <= RTASE_IDLESLOPE_INT_SHIFT; i++) { + idle *= 2; + if ((idle / RTASE_1T_POWER) == 1) + val |= BIT(RTASE_IDLESLOPE_INT_SHIFT - i); + + idle %= RTASE_1T_POWER; + } + + rtase_w32(tp, RTASE_TXQCRDT_0 + queue * 4, val); +} + +static int rtase_setup_tc_cbs(struct rtase_private *tp, + const struct tc_cbs_qopt_offload *qopt) +{ + int queue = qopt->queue; + + if (queue < 0 || queue >= tp->func_tx_queue_num) + return -EINVAL; + + if (!qopt->enable) { + tp->tx_qos[queue].hicredit = 0; + tp->tx_qos[queue].locredit = 0; + tp->tx_qos[queue].idleslope = 0; + tp->tx_qos[queue].sendslope = 0; + + rtase_w32(tp, RTASE_TXQCRDT_0 + queue * 4, 0); + } else { + tp->tx_qos[queue].hicredit = qopt->hicredit; + tp->tx_qos[queue].locredit = qopt->locredit; + tp->tx_qos[queue].idleslope = qopt->idleslope; + tp->tx_qos[queue].sendslope = qopt->sendslope; + + rtase_set_hw_cbs(tp, queue); + } + + return 0; +} + +static int rtase_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct rtase_private *tp = netdev_priv(dev); + + switch (type) { + case TC_SETUP_QDISC_CBS: + return rtase_setup_tc_cbs(tp, type_data); + default: + return -EOPNOTSUPP; + } +} + static netdev_features_t rtase_fix_features(struct net_device *dev, netdev_features_t features) { @@ -1696,6 +1755,7 @@ static const struct net_device_ops rtase_netdev_ops = { .ndo_change_mtu = rtase_change_mtu, .ndo_tx_timeout = rtase_tx_timeout, .ndo_get_stats64 = rtase_get_stats64, + .ndo_setup_tc = rtase_setup_tc, .ndo_fix_features = rtase_fix_features, .ndo_set_features = rtase_set_features, }; diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index b4365906669f..226c6c0ab945 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -176,12 +176,6 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp, struct net_device *ndev = priv->ndev; unsigned long flags; - /* Reject requests with unsupported flags */ - if (req->flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE)) - return -EOPNOTSUPP; - if (req->index) return -EINVAL; @@ -212,10 +206,6 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp, unsigned long flags; int error = 0; - /* Reject requests with unsupported flags */ - if (req->flags) - return -EOPNOTSUPP; - if (req->index) return -EINVAL; @@ -287,6 +277,7 @@ static const struct ptp_clock_info ravb_ptp_info = { .max_adj = 50000000, .n_ext_ts = N_EXT_TS, .n_per_out = N_PER_OUT, + .supported_extts_flags = PTP_RISING_EDGE | PTP_FALLING_EDGE, .adjfine = ravb_ptp_adjfine, .adjtime = ravb_ptp_adjtime, .gettime64 = ravb_ptp_gettime64, diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index d5db26103d82..19985d13e243 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1933,7 +1933,7 @@ static int ofdpa_port_fdb(struct ofdpa_port *ofdpa_port, spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags); /* Check if adding and already exists, or removing and can't find */ - if (!found != !removing) { + if (!found == removing) { kfree(fdb); if (!found && removing) return 0; diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 332cbd725900..df869f82cae8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -468,7 +468,7 @@ static int sis900_probe(struct pci_dev *pci_dev, SET_NETDEV_DEV(net_dev, &pci_dev->dev); /* We do a request_region() to register /proc/ioports info. */ - ret = pci_request_regions(pci_dev, "sis900"); + ret = pcim_request_all_regions(pci_dev, "sis900"); if (ret) goto err_out; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 3c820ef56775..67fa879b1e52 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -3,6 +3,7 @@ config STMMAC_ETH tristate "STMicroelectronics Multi-Gigabit Ethernet driver" depends on HAS_IOMEM && HAS_DMA depends on PTP_1588_CLOCK_OPTIONAL + depends on ETHTOOL_NETLINK select MII select PCS_XPCS select PAGE_POOL @@ -131,6 +132,17 @@ config DWMAC_QCOM_ETHQOS This selects the Qualcomm ETHQOS glue layer support for the stmmac device driver. +config DWMAC_RENESAS_GBETH + tristate "Renesas RZ/V2H(P) GBETH support" + default ARCH_RENESAS + depends on OF && (ARCH_RENESAS || COMPILE_TEST) + help + Support for Gigabit Ethernet Interface (GBETH) on Renesas + RZ/V2H(P) SoCs. + + This selects the Renesas RZ/V2H(P) Soc specific glue layer support + for the stmmac device driver. + config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 594883fb4164..b591d93f8503 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -6,7 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ - stmmac_xdp.o stmmac_est.o stmmac_fpe.o \ + stmmac_xdp.o stmmac_est.o stmmac_fpe.o stmmac_vlan.o \ $(stmmac-y) stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o @@ -20,6 +20,7 @@ obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MEDIATEK) += dwmac-mediatek.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o +obj-$(CONFIG_DWMAC_RENESAS_GBETH) += dwmac-renesas-gbeth.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 412b07e77945..ea5da5793362 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -602,6 +602,7 @@ struct mac_device_info { const struct stmmac_tc_ops *tc; const struct stmmac_mmc_ops *mmc; const struct stmmac_est_ops *est; + const struct stmmac_vlan_ops *vlan; struct dw_xpcs *xpcs; struct phylink_pcs *phylink_pcs; struct mii_regs mii; /* MII register Addresses */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 37fe7c288878..84072c8ed741 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -65,13 +65,12 @@ anarion_config_dt(struct platform_device *pdev, { struct anarion_gmac *gmac; void __iomem *ctl_block; - int err; ctl_block = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(ctl_block)) { - err = PTR_ERR(ctl_block); - dev_err(&pdev->dev, "Cannot get reset region (%d)!\n", err); - return ERR_PTR(err); + dev_err(&pdev->dev, "Cannot get reset region (%pe)!\n", + ctl_block); + return ERR_CAST(ctl_block); } gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL); @@ -80,17 +79,11 @@ anarion_config_dt(struct platform_device *pdev, gmac->ctl_block = ctl_block; - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RGMII: - fallthrough; - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: + if (phy_interface_mode_is_rgmii(plat_dat->phy_interface)) { gmac->phy_intf_sel = GMAC_CONFIG_INTF_RGMII; - break; - default: - dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n", - plat_dat->phy_interface); + } else { + dev_err(&pdev->dev, "Unsupported phy-mode (%s)\n", + phy_modes(plat_dat->phy_interface)); return ERR_PTR(-ENOTSUPP); } @@ -118,10 +111,9 @@ static int anarion_dwmac_probe(struct platform_device *pdev) plat_dat->init = anarion_gmac_init; plat_dat->exit = anarion_gmac_exit; - anarion_gmac_init(pdev, gmac); plat_dat->bsp_priv = gmac; - return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id anarion_dwmac_match[] = { @@ -132,7 +124,6 @@ MODULE_DEVICE_TABLE(of, anarion_dwmac_match); static struct platform_driver anarion_dwmac_driver = { .probe = anarion_dwmac_probe, - .remove = stmmac_pltfr_remove, .driver = { .name = "anarion-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index cd431f84f34f..09ae16e026eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -29,21 +29,10 @@ struct tegra_eqos { void __iomem *regs; struct reset_control *rst; - struct clk *clk_slave; struct gpio_desc *reset; }; -static struct clk *dwc_eth_find_clk(struct plat_stmmacenet_data *plat_dat, - const char *name) -{ - for (int i = 0; i < plat_dat->num_clks; i++) - if (strcmp(plat_dat->clks[i].id, name) == 0) - return plat_dat->clks[i].clk; - - return NULL; -} - static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) { @@ -132,7 +121,7 @@ static int dwc_qos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat, struct stmmac_resources *stmmac_res) { - plat_dat->pclk = dwc_eth_find_clk(plat_dat, "phy_ref_clk"); + plat_dat->pclk = stmmac_pltfr_find_clk(plat_dat, "phy_ref_clk"); return 0; } @@ -147,10 +136,11 @@ static int dwc_qos_probe(struct platform_device *pdev, #define AUTO_CAL_STATUS 0x880c #define AUTO_CAL_STATUS_ACTIVE BIT(31) -static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) +static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode) { - struct tegra_eqos *eqos = priv; + struct tegra_eqos *eqos = bsp_priv; bool needs_calibration = false; + struct stmmac_priv *priv; u32 value; int err; @@ -169,6 +159,11 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) } if (needs_calibration) { + priv = netdev_priv(dev_get_drvdata(eqos->dev)); + + /* Calibration should be done with the MDIO bus idle */ + mutex_lock(&priv->mii->mdio_lock); + /* calibrate */ value = readl(eqos->regs + SDMEMCOMPPADCTRL); value |= SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD; @@ -202,6 +197,8 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) value = readl(eqos->regs + SDMEMCOMPPADCTRL); value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD; writel(value, eqos->regs + SDMEMCOMPPADCTRL); + + mutex_unlock(&priv->mii->mdio_lock); } else { value = readl(eqos->regs + AUTO_CAL_CONFIG); value &= ~AUTO_CAL_CONFIG_ENABLE; @@ -209,20 +206,6 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode) } } -static int tegra_eqos_init(struct platform_device *pdev, void *priv) -{ - struct tegra_eqos *eqos = priv; - unsigned long rate; - u32 value; - - rate = clk_get_rate(eqos->clk_slave); - - value = (rate / 1000000) - 1; - writel(value, eqos->regs + GMAC_1US_TIC_COUNTER); - - return 0; -} - static int tegra_eqos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat, struct stmmac_resources *res) @@ -237,12 +220,11 @@ static int tegra_eqos_probe(struct platform_device *pdev, eqos->dev = &pdev->dev; eqos->regs = res->addr; - eqos->clk_slave = plat_dat->stmmac_clk; if (!is_of_node(dev->fwnode)) goto bypass_clk_reset_gpio; - plat_dat->clk_tx_i = dwc_eth_find_clk(plat_dat, "tx"); + plat_dat->clk_tx_i = stmmac_pltfr_find_clk(plat_dat, "tx"); eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH); if (IS_ERR(eqos->reset)) { @@ -277,17 +259,12 @@ static int tegra_eqos_probe(struct platform_device *pdev, bypass_clk_reset_gpio: plat_dat->fix_mac_speed = tegra_eqos_fix_speed; plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; - plat_dat->init = tegra_eqos_init; plat_dat->bsp_priv = eqos; - plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE; - - err = tegra_eqos_init(pdev, eqos); - if (err < 0) - goto reset; + plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE | + STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP; return 0; -reset: - reset_control_assert(eqos->rst); + reset_phy: gpiod_set_value(eqos->reset, 1); @@ -362,8 +339,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (ret) return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n"); - plat_dat->stmmac_clk = dwc_eth_find_clk(plat_dat, - data->stmmac_clk_name); + plat_dat->stmmac_clk = stmmac_pltfr_find_clk(plat_dat, + data->stmmac_clk_name); if (data->probe) ret = data->probe(pdev, plat_dat, &stmmac_res); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 5d279fa54b3e..889e2bb6f7f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -379,10 +379,6 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - ret = imx_dwmac_init(pdev, dwmac); - if (ret) - goto err_dwmac_init; - if (dwmac->ops->fix_mac_speed) { plat_dat->fix_mac_speed = dwmac->ops->fix_mac_speed; } else if (!dwmac->ops->mac_rgmii_txclk_auto_adj) { @@ -392,16 +388,10 @@ static int imx_dwmac_probe(struct platform_device *pdev) dwmac->plat_dat->fix_soc_reset = dwmac->ops->fix_soc_reset; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); if (ret) - goto err_drv_probe; - - return 0; + imx_dwmac_clks_config(dwmac, false); -err_drv_probe: - imx_dwmac_exit(pdev, plat_dat->bsp_priv); -err_dwmac_init: - imx_dwmac_clks_config(dwmac, false); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 066783d66422..15abe214131f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -56,6 +56,7 @@ enum ingenic_mac_version { struct ingenic_mac { const struct ingenic_soc_info *soc_info; + struct plat_stmmacenet_data *plat_dat; struct device *dev; struct regmap *regmap; @@ -70,13 +71,13 @@ struct ingenic_soc_info { int (*set_mode)(struct plat_stmmacenet_data *plat_dat); }; -static int ingenic_mac_init(struct plat_stmmacenet_data *plat_dat) +static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv) { - struct ingenic_mac *mac = plat_dat->bsp_priv; + struct ingenic_mac *mac = bsp_priv; int ret; if (mac->soc_info->set_mode) { - ret = mac->soc_info->set_mode(plat_dat); + ret = mac->soc_info->set_mode(mac->plat_dat); if (ret) return ret; } @@ -284,44 +285,14 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->soc_info = data; mac->dev = &pdev->dev; + mac->plat_dat = plat_dat; plat_dat->bsp_priv = mac; + plat_dat->init = ingenic_mac_init; - ret = ingenic_mac_init(plat_dat); - if (ret) - return ret; - - return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); -} - -#ifdef CONFIG_PM_SLEEP -static int ingenic_mac_suspend(struct device *dev) -{ - int ret; - - ret = stmmac_suspend(dev); - - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } -static int ingenic_mac_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - int ret; - - ret = ingenic_mac_init(priv->plat); - if (ret) - return ret; - - ret = stmmac_resume(dev); - - return ret; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(ingenic_mac_pm_ops, ingenic_mac_suspend, ingenic_mac_resume); - static struct ingenic_soc_info jz4775_soc_info = { .version = ID_JZ4775, .mask = MACPHYC_TXCLK_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK, @@ -370,10 +341,9 @@ MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches); static struct platform_driver ingenic_mac_driver = { .probe = ingenic_mac_probe, - .remove = stmmac_pltfr_remove, .driver = { .name = "ingenic-mac", - .pm = pm_ptr(&ingenic_mac_pm_ops), + .pm = &stmmac_pltfr_pm_ops, .of_match_table = ingenic_mac_of_matches, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 599def7b3a64..4ea7b0a803d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -113,16 +113,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) plat_dat->clk_tx_i = dwmac->tx_clk; plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; - plat_dat->bsp_priv = dwmac; - plat_dat->eee_usecs_rate = plat_dat->clk_ptp_rate; - - if (plat_dat->eee_usecs_rate > 0) { - u32 tx_lpi_usec; - - tx_lpi_usec = (plat_dat->eee_usecs_rate / 1000000) - 1; - writel(tx_lpi_usec, stmmac_res.addr + GMAC_1US_TIC_COUNTER); - } ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index c8bb9265bbb4..9a47015254bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -284,28 +284,28 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data) } } -static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) +static void tgl_get_interfaces(struct stmmac_priv *priv, void *bsp_priv, + unsigned long *interfaces) { - struct intel_priv_data *intel_priv = intel_data; - struct stmmac_priv *priv = netdev_priv(ndev); - int serdes_phy_addr = 0; - u32 data = 0; - - serdes_phy_addr = intel_priv->mdio_adhoc_addr; + struct intel_priv_data *intel_priv = bsp_priv; + phy_interface_t interface; + int data; /* Determine the link speed mode: 2.5Gbps/1Gbps */ - data = mdiobus_read(priv->mii, serdes_phy_addr, - SERDES_GCR); + data = mdiobus_read(priv->mii, intel_priv->mdio_adhoc_addr, SERDES_GCR); + if (data < 0) + return; - if (((data & SERDES_LINK_MODE_MASK) >> SERDES_LINK_MODE_SHIFT) == - SERDES_LINK_MODE_2G5) { + if (FIELD_GET(SERDES_LINK_MODE_MASK, data) == SERDES_LINK_MODE_2G5) { dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n"); - priv->plat->max_speed = 2500; - priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX; priv->plat->mdio_bus_data->default_an_inband = false; + interface = PHY_INTERFACE_MODE_2500BASEX; } else { - priv->plat->max_speed = 1000; + interface = PHY_INTERFACE_MODE_SGMII; } + + __set_bit(interface, interfaces); + priv->plat->phy_interface = interface; } /* Program PTP Clock Frequency for different variant of @@ -682,7 +682,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->axi->axi_blen[2] = 16; plat->ptp_max_adj = plat->clk_ptp_rate; - plat->eee_usecs_rate = plat->clk_ptp_rate; /* Set system clock */ sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev)); @@ -933,7 +932,7 @@ static int tgl_common_data(struct pci_dev *pdev, plat->rx_queues_to_use = 6; plat->tx_queues_to_use = 4; plat->clk_ptp_rate = 204800000; - plat->speed_mode_2500 = intel_speed_mode_2500; + plat->get_interfaces = tgl_get_interfaces; plat->safety_feat_cfg->tsoee = 1; plat->safety_feat_cfg->mrxpee = 0; @@ -952,7 +951,6 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); @@ -966,7 +964,6 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 2; - plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); @@ -980,7 +977,6 @@ static int adls_sgmii_phy0_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_interface = PHY_INTERFACE_MODE_SGMII; /* SerDes power up and power down are done in BIOS for ADL */ @@ -995,7 +991,6 @@ static int adls_sgmii_phy1_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 2; - plat->phy_interface = PHY_INTERFACE_MODE_SGMII; /* SerDes power up and power down are done in BIOS for ADL */ @@ -1313,13 +1308,6 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, memset(&res, 0, sizeof(res)); res.addr = pcim_iomap_table(pdev)[0]; - if (plat->eee_usecs_rate > 0) { - u32 tx_lpi_usec; - - tx_lpi_usec = (plat->eee_usecs_rate / 1000000) - 1; - writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER); - } - ret = stmmac_config_multi_msi(pdev, plat, &res); if (ret) { ret = stmmac_config_single_msi(pdev, plat, &res); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h index a12f8e65f89f..7511c224b312 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h @@ -21,7 +21,6 @@ #define SERDES_RATE_MASK GENMASK(9, 8) #define SERDES_PCLK_MASK GENMASK(14, 12) /* PCLK rate to PHY */ #define SERDES_LINK_MODE_MASK GENMASK(2, 1) -#define SERDES_LINK_MODE_SHIFT 1 #define SERDES_PWR_ST_SHIFT 4 #define SERDES_PWR_ST_P0 0x0 #define SERDES_PWR_ST_P3 0x3 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 1a93787056a7..e1591e6217d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -66,12 +66,14 @@ DMA_STATUS_TPS | DMA_STATUS_TI | \ DMA_STATUS_MSK_COMMON_LOONGSON) -#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GMAC1 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GMAC2 0x7a23 #define PCI_DEVICE_ID_LOONGSON_GNET 0x7a13 -#define DWMAC_CORE_LS_MULTICHAN 0x10 /* Loongson custom ID */ -#define CHANNEL_NUM 8 +#define DWMAC_CORE_MULTICHAN_V1 0x10 /* Loongson custom ID 0x10 */ +#define DWMAC_CORE_MULTICHAN_V2 0x12 /* Loongson custom ID 0x12 */ struct loongson_data { + u32 multichan; u32 loongson_id; struct device *dev; }; @@ -83,6 +85,8 @@ struct stmmac_pci_info { static void loongson_default_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct loongson_data *ld = plat->bsp_priv; + /* Get bus_id, this can be overwritten later */ plat->bus_id = pci_dev_id(pdev); @@ -116,31 +120,37 @@ static void loongson_default_data(struct pci_dev *pdev, plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; -} - -static int loongson_gmac_data(struct pci_dev *pdev, - struct plat_stmmacenet_data *plat) -{ - struct loongson_data *ld; - int i; - - ld = plat->bsp_priv; - loongson_default_data(pdev, plat); - - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) { - plat->rx_queues_to_use = CHANNEL_NUM; - plat->tx_queues_to_use = CHANNEL_NUM; + switch (ld->loongson_id) { + case DWMAC_CORE_MULTICHAN_V1: + ld->multichan = 1; + plat->rx_queues_to_use = 8; + plat->tx_queues_to_use = 8; /* Only channel 0 supports checksum, * so turn off checksum to enable multiple channels. */ - for (i = 1; i < CHANNEL_NUM; i++) + for (int i = 1; i < 8; i++) plat->tx_queues_cfg[i].coe_unsupported = 1; - } else { + + break; + case DWMAC_CORE_MULTICHAN_V2: + ld->multichan = 1; + plat->rx_queues_to_use = 4; + plat->tx_queues_to_use = 4; + break; + default: + ld->multichan = 0; plat->tx_queues_to_use = 1; plat->rx_queues_to_use = 1; + break; } +} + +static int loongson_gmac_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + loongson_default_data(pdev, plat); plat->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; @@ -172,27 +182,8 @@ static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode) static int loongson_gnet_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { - struct loongson_data *ld; - int i; - - ld = plat->bsp_priv; - loongson_default_data(pdev, plat); - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) { - plat->rx_queues_to_use = CHANNEL_NUM; - plat->tx_queues_to_use = CHANNEL_NUM; - - /* Only channel 0 supports checksum, - * so turn off checksum to enable multiple channels. - */ - for (i = 1; i < CHANNEL_NUM; i++) - plat->tx_queues_cfg[i].coe_unsupported = 1; - } else { - plat->tx_queues_to_use = 1; - plat->rx_queues_to_use = 1; - } - plat->phy_interface = PHY_INTERFACE_MODE_GMII; plat->mdio_bus_data->phy_mask = ~(u32)BIT(2); plat->fix_mac_speed = loongson_gnet_fix_speed; @@ -350,14 +341,14 @@ static struct mac_device_info *loongson_dwmac_setup(void *apriv) return NULL; /* The Loongson GMAC and GNET devices are based on the DW GMAC - * v3.50a and v3.73a IP-cores. But the HW designers have changed the - * GMAC_VERSION.SNPSVER field to the custom 0x10 value on the - * network controllers with the multi-channels feature + * v3.50a and v3.73a IP-cores. But the HW designers have changed + * the GMAC_VERSION.SNPSVER field to the custom 0x10/0x12 value + * on the network controllers with the multi-channels feature * available to emphasize the differences: multiple DMA-channels, * AV feature and GMAC_INT_STATUS CSR flags layout. Get back the * original value so the correct HW-interface would be selected. */ - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) { + if (ld->multichan) { priv->synopsys_id = DWMAC_CORE_3_70; *dma = dwmac1000_dma_ops; dma->init_chan = loongson_dwmac_dma_init_channel; @@ -378,13 +369,13 @@ static struct mac_device_info *loongson_dwmac_setup(void *apriv) if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); - /* Loongson GMAC doesn't support the flow control. LS2K2000 - * GNET doesn't support the half-duplex link mode. + /* Loongson GMAC doesn't support the flow control. Loongson GNET + * without multi-channel doesn't support the half-duplex link mode. */ - if (pdev->device == PCI_DEVICE_ID_LOONGSON_GMAC) { + if (pdev->device != PCI_DEVICE_ID_LOONGSON_GNET) { mac->link.caps = MAC_10 | MAC_100 | MAC_1000; } else { - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) + if (ld->multichan) mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000; else @@ -413,9 +404,11 @@ static int loongson_dwmac_msi_config(struct pci_dev *pdev, struct plat_stmmacenet_data *plat, struct stmmac_resources *res) { - int i, ret, vecs; + int i, ch_num, ret, vecs; + + ch_num = min(plat->tx_queues_to_use, plat->rx_queues_to_use); - vecs = roundup_pow_of_two(CHANNEL_NUM * 2 + 1); + vecs = roundup_pow_of_two(ch_num * 2 + 1); ret = pci_alloc_irq_vectors(pdev, vecs, vecs, PCI_IRQ_MSI); if (ret < 0) { dev_warn(&pdev->dev, "Failed to allocate MSI IRQs\n"); @@ -424,14 +417,12 @@ static int loongson_dwmac_msi_config(struct pci_dev *pdev, res->irq = pci_irq_vector(pdev, 0); - for (i = 0; i < plat->rx_queues_to_use; i++) { - res->rx_irq[CHANNEL_NUM - 1 - i] = - pci_irq_vector(pdev, 1 + i * 2); + for (i = 0; i < ch_num; i++) { + res->rx_irq[ch_num - 1 - i] = pci_irq_vector(pdev, 1 + i * 2); } - for (i = 0; i < plat->tx_queues_to_use; i++) { - res->tx_irq[CHANNEL_NUM - 1 - i] = - pci_irq_vector(pdev, 2 + i * 2); + for (i = 0; i < ch_num; i++) { + res->tx_irq[ch_num - 1 - i] = pci_irq_vector(pdev, 2 + i * 2); } plat->flags |= STMMAC_FLAG_MULTI_MSI_EN; @@ -593,7 +584,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id goto err_disable_device; /* Use the common MAC IRQ if per-channel MSIs allocation failed */ - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) + if (ld->multichan) loongson_dwmac_msi_config(pdev, plat, &res); ret = stmmac_dvr_probe(&pdev->dev, plat, &res); @@ -605,7 +596,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id err_plat_clear: if (dev_of_node(&pdev->dev)) loongson_dwmac_dt_clear(pdev, plat); - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) + if (ld->multichan) loongson_dwmac_msi_clear(pdev); err_disable_device: pci_disable_device(pdev); @@ -624,7 +615,7 @@ static void loongson_dwmac_remove(struct pci_dev *pdev) if (dev_of_node(&pdev->dev)) loongson_dwmac_dt_clear(pdev, priv->plat); - if (ld->loongson_id == DWMAC_CORE_LS_MULTICHAN) + if (ld->multichan) loongson_dwmac_msi_clear(pdev); pci_disable_device(pdev); @@ -669,7 +660,8 @@ static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, loongson_dwmac_resume); static const struct pci_device_id loongson_dwmac_id_table[] = { - { PCI_DEVICE_DATA(LOONGSON, GMAC, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GMAC1, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GMAC2, &loongson_gmac_pci_info) }, { PCI_DEVICE_DATA(LOONGSON, GNET, &loongson_gnet_pci_info) }, {} }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d178d5ddc7c7..39421d6a34e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -581,7 +581,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, int i; priv_plat->phy_mode = plat->phy_interface; - plat->mac_interface = priv_plat->phy_mode; if (priv_plat->mac_wol) plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL; else diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 0e4da216f942..e30bdf72331a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -106,12 +106,11 @@ struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; void __iomem *mac_base; - int (*configure_func)(struct qcom_ethqos *ethqos); + int (*configure_func)(struct qcom_ethqos *ethqos, int speed); unsigned int link_clk_rate; struct clk *link_clk; struct phy *serdes_phy; - int speed; int serdes_speed; phy_interface_t phy_mode; @@ -385,7 +384,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) return 0; } -static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) +static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; int phase_shift; @@ -412,7 +411,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, RGMII_CONFIG_INTF_SEL, 0, RGMII_IO_MACRO_CONFIG); - switch (ethqos->speed) { + switch (speed) { case SPEED_1000: rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE, RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); @@ -532,14 +531,14 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) loopback, RGMII_IO_MACRO_CONFIG); break; default: - dev_err(dev, "Invalid speed %d\n", ethqos->speed); + dev_err(dev, "Invalid speed %d\n", speed); return -EINVAL; } return 0; } -static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) +static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; volatile unsigned int dll_lock; @@ -562,7 +561,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG); if (ethqos->has_emac_ge_3) { - if (ethqos->speed == SPEED_1000) { + if (speed == SPEED_1000) { rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL); rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL); rgmii_writel(ethqos, 0xA001, SDCC_HC_REG_DLL_CONFIG2); @@ -580,7 +579,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, 0, SDCC_HC_REG_DLL_CONFIG); - if (ethqos->speed != SPEED_100 && ethqos->speed != SPEED_10) { + if (speed != SPEED_100 && speed != SPEED_10) { /* Set DLL_EN */ rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN, SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG); @@ -607,10 +606,10 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) dev_err(dev, "Timeout while waiting for DLL lock\n"); } - if (ethqos->speed == SPEED_1000) + if (speed == SPEED_1000) ethqos_dll_configure(ethqos); - ethqos_rgmii_macro_init(ethqos); + ethqos_rgmii_macro_init(ethqos, speed); return 0; } @@ -626,7 +625,7 @@ static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed) /* On interface toggle MAC registers gets reset. * Configure MAC block for SGMII on ethernet phy link up */ -static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) +static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) { struct net_device *dev = platform_get_drvdata(ethqos->pdev); struct stmmac_priv *priv = netdev_priv(dev); @@ -634,7 +633,7 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) val = readl(ethqos->mac_base + MAC_CTRL_REG); - switch (ethqos->speed) { + switch (speed) { case SPEED_2500: val &= ~ETHQOS_MAC_CTRL_PORT_SEL; rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, @@ -673,17 +672,9 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) return val; } -static void qcom_ethqos_speed_mode_2500(struct net_device *ndev, void *data) +static int ethqos_configure(struct qcom_ethqos *ethqos, int speed) { - struct stmmac_priv *priv = netdev_priv(ndev); - - priv->plat->max_speed = 2500; - priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX; -} - -static int ethqos_configure(struct qcom_ethqos *ethqos) -{ - return ethqos->configure_func(ethqos); + return ethqos->configure_func(ethqos, speed); } static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) @@ -691,9 +682,8 @@ static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) struct qcom_ethqos *ethqos = priv; qcom_ethqos_set_sgmii_loopback(ethqos, false); - ethqos->speed = speed; ethqos_update_link_clk(ethqos, speed); - ethqos_configure(ethqos); + ethqos_configure(ethqos, speed); } static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) @@ -709,7 +699,7 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) if (ret) return ret; - return phy_set_speed(ethqos->serdes_phy, ethqos->speed); + return phy_set_speed(ethqos->serdes_phy, ethqos->serdes_speed); } static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv) @@ -803,8 +793,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->configure_func = ethqos_configure_rgmii; break; case PHY_INTERFACE_MODE_2500BASEX: - plat_dat->speed_mode_2500 = qcom_ethqos_speed_mode_2500; - fallthrough; case PHY_INTERFACE_MODE_SGMII: ethqos->configure_func = ethqos_configure_sgmii; break; @@ -847,7 +835,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy), "Failed to get serdes phy\n"); - ethqos->speed = SPEED_1000; ethqos->serdes_speed = SPEED_1000; ethqos_update_link_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c new file mode 100644 index 000000000000..9a774046455b --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * dwmac-renesas-gbeth.c - DWMAC Specific Glue layer for Renesas GBETH + * + * The Rx and Tx clocks are supplied as follows for the GBETH IP. + * + * Rx / Tx + * -------+------------- on / off ------- + * | + * | Rx-180 / Tx-180 + * +---- not ---- on / off ------- + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/reset.h> + +#include "stmmac_platform.h" + +struct renesas_gbeth { + struct plat_stmmacenet_data *plat_dat; + struct reset_control *rstc; + struct device *dev; +}; + +static const char *const renesas_gbeth_clks[] = { + "tx", "tx-180", "rx", "rx-180", +}; + +static int renesas_gbeth_init(struct platform_device *pdev, void *priv) +{ + struct plat_stmmacenet_data *plat_dat; + struct renesas_gbeth *gbeth = priv; + int ret; + + plat_dat = gbeth->plat_dat; + + ret = reset_control_deassert(gbeth->rstc); + if (ret) { + dev_err(gbeth->dev, "Reset deassert failed\n"); + return ret; + } + + ret = clk_bulk_prepare_enable(plat_dat->num_clks, + plat_dat->clks); + if (ret) + reset_control_assert(gbeth->rstc); + + return ret; +} + +static void renesas_gbeth_exit(struct platform_device *pdev, void *priv) +{ + struct plat_stmmacenet_data *plat_dat; + struct renesas_gbeth *gbeth = priv; + int ret; + + plat_dat = gbeth->plat_dat; + + clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); + + ret = reset_control_assert(gbeth->rstc); + if (ret) + dev_err(gbeth->dev, "Reset assert failed\n"); +} + +static int renesas_gbeth_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + struct renesas_gbeth *gbeth; + unsigned int i; + int err; + + err = stmmac_get_platform_resources(pdev, &stmmac_res); + if (err) + return dev_err_probe(dev, err, + "failed to get resources\n"); + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(dev, PTR_ERR(plat_dat), + "dt configuration failed\n"); + + gbeth = devm_kzalloc(dev, sizeof(*gbeth), GFP_KERNEL); + if (!gbeth) + return -ENOMEM; + + plat_dat->num_clks = ARRAY_SIZE(renesas_gbeth_clks); + plat_dat->clks = devm_kcalloc(dev, plat_dat->num_clks, + sizeof(*plat_dat->clks), GFP_KERNEL); + if (!plat_dat->clks) + return -ENOMEM; + + for (i = 0; i < plat_dat->num_clks; i++) + plat_dat->clks[i].id = renesas_gbeth_clks[i]; + + err = devm_clk_bulk_get(dev, plat_dat->num_clks, plat_dat->clks); + if (err < 0) + return err; + + plat_dat->clk_tx_i = stmmac_pltfr_find_clk(plat_dat, "tx"); + if (!plat_dat->clk_tx_i) + return dev_err_probe(dev, -EINVAL, + "error finding tx clock\n"); + + gbeth->rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(gbeth->rstc)) + return PTR_ERR(gbeth->rstc); + + gbeth->dev = dev; + gbeth->plat_dat = plat_dat; + plat_dat->bsp_priv = gbeth; + plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; + plat_dat->init = renesas_gbeth_init; + plat_dat->exit = renesas_gbeth_exit; + plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | + STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP | + STMMAC_FLAG_SPH_DISABLE; + + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id renesas_gbeth_match[] = { + { .compatible = "renesas,rzv2h-gbeth", }, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(of, renesas_gbeth_match); + +static struct platform_driver renesas_gbeth_driver = { + .probe = renesas_gbeth_probe, + .driver = { + .name = "renesas-gbeth", + .of_match_table = renesas_gbeth_match, + }, +}; +module_platform_driver(renesas_gbeth_driver); + +MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>"); +MODULE_DESCRIPTION("Renesas GBETH DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 116855658559..72b50f6d72f4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -50,6 +50,7 @@ struct socfpga_dwmac { u32 reg_offset; u32 reg_shift; struct device *dev; + struct plat_stmmacenet_data *plat_dat; struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; struct reset_control *stmmac_ocp_rst; @@ -58,17 +59,15 @@ struct socfpga_dwmac { void __iomem *sgmii_adapter_base; bool f2h_ptp_ref_clk; const struct socfpga_dwmac_ops *ops; - struct mdio_device *pcs_mdiodev; }; -static void socfpga_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode) +static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, int speed, + unsigned int mode) { - struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; + struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)bsp_priv; + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dwmac->dev)); void __iomem *splitter_base = dwmac->splitter_base; void __iomem *sgmii_adapter_base = dwmac->sgmii_adapter_base; - struct device *dev = dwmac->dev; - struct net_device *ndev = dev_get_drvdata(dev); - struct phy_device *phy_dev = ndev->phydev; u32 val; if (sgmii_adapter_base) @@ -95,7 +94,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, int speed, unsigned int mode writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (phy_dev && sgmii_adapter_base) + if ((priv->plat->phy_interface == PHY_INTERFACE_MODE_SGMII || + priv->plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) && + sgmii_adapter_base) writew(SGMII_ADAPTER_ENABLE, sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); } @@ -233,10 +234,7 @@ err_node_put: static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) { - struct net_device *ndev = dev_get_drvdata(dwmac->dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - return priv->plat->mac_interface; + return dwmac->plat_dat->mac_interface; } static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) @@ -258,6 +256,7 @@ static int socfpga_set_phy_mode_common(int phymode, u32 *val) case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_GMII: case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; break; case PHY_INTERFACE_MODE_RMII: @@ -435,6 +434,13 @@ static struct phylink_pcs *socfpga_dwmac_select_pcs(struct stmmac_priv *priv, return priv->hw->phylink_pcs; } +static int socfpga_dwmac_init(struct platform_device *pdev, void *bsp_priv) +{ + struct socfpga_dwmac *dwmac = bsp_priv; + + return dwmac->ops->set_phy_mode(dwmac); +} + static int socfpga_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -442,8 +448,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; struct socfpga_dwmac *dwmac; - struct net_device *ndev; - struct stmmac_priv *stpriv; const struct socfpga_dwmac_ops *ops; ops = device_get_match_data(&pdev->dev); @@ -479,9 +483,17 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) return ret; } + /* The socfpga driver needs to control the stmmac reset to set the phy + * mode. Create a copy of the core reset handle so it can be used by + * the driver later. + */ + dwmac->stmmac_rst = plat_dat->stmmac_rst; dwmac->ops = ops; + dwmac->plat_dat = plat_dat; + plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; + plat_dat->init = socfpga_dwmac_init; plat_dat->pcs_init = socfpga_dwmac_pcs_init; plat_dat->pcs_exit = socfpga_dwmac_pcs_exit; plat_dat->select_pcs = socfpga_dwmac_select_pcs; @@ -489,67 +501,9 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->riwt_off = 1; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - return ret; - - ndev = platform_get_drvdata(pdev); - stpriv = netdev_priv(ndev); - - /* The socfpga driver needs to control the stmmac reset to set the phy - * mode. Create a copy of the core reset handle so it can be used by - * the driver later. - */ - dwmac->stmmac_rst = stpriv->plat->stmmac_rst; - - ret = ops->set_phy_mode(dwmac); - if (ret) - goto err_dvr_remove; - - return 0; - -err_dvr_remove: - stmmac_dvr_remove(&pdev->dev); - - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } -#ifdef CONFIG_PM_SLEEP -static int socfpga_dwmac_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev); - - dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv); - - return stmmac_resume(dev); -} -#endif /* CONFIG_PM_SLEEP */ - -static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - stmmac_bus_clks_config(priv, false); - - return 0; -} - -static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - - return stmmac_bus_clks_config(priv, true); -} - -static const struct dev_pm_ops socfpga_dwmac_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume) - SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL) -}; - static const struct socfpga_dwmac_ops socfpga_gen5_ops = { .set_phy_mode = socfpga_gen5_set_phy_mode, }; @@ -567,10 +521,9 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove = stmmac_pltfr_remove, .driver = { .name = "socfpga-dwmac", - .pm = &socfpga_dwmac_pm_ops, + .pm = &stmmac_pltfr_pm_ops, .of_match_table = socfpga_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index be57c6c12c1c..53d5ce1f6dc6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -23,12 +23,7 @@ #define DWMAC_50MHZ 50000000 -#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \ - iface == PHY_INTERFACE_MODE_RGMII_ID || \ - iface == PHY_INTERFACE_MODE_RGMII_RXID || \ - iface == PHY_INTERFACE_MODE_RGMII_TXID) - -#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \ +#define IS_PHY_IF_MODE_GBIT(iface) (phy_interface_mode_is_rgmii(iface) || \ iface == PHY_INTERFACE_MODE_GMII) /* STiH4xx register definitions (STiH407/STiH410 families) @@ -148,7 +143,7 @@ static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) src = TX_RETIME_SRC_CLKGEN; freq = DWMAC_50MHZ; } - } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { + } else if (phy_interface_mode_is_rgmii(dwmac->interface)) { /* On GiGa clk source can be either ext or from clkgen */ freq = rgmii_clock(spd); @@ -238,6 +233,29 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, return 0; } +static int sti_dwmac_init(struct platform_device *pdev, void *bsp_priv) +{ + struct sti_dwmac *dwmac = bsp_priv; + int ret; + + ret = clk_prepare_enable(dwmac->clk); + if (ret) + return ret; + + ret = sti_dwmac_set_mode(dwmac); + if (ret) + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static void sti_dwmac_exit(struct platform_device *pdev, void *bsp_priv) +{ + struct sti_dwmac *dwmac = bsp_priv; + + clk_disable_unprepare(dwmac->clk); +} + static int sti_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -274,59 +292,12 @@ static int sti_dwmac_probe(struct platform_device *pdev) plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = data->fix_retime_src; + plat_dat->init = sti_dwmac_init; + plat_dat->exit = sti_dwmac_exit; - ret = clk_prepare_enable(dwmac->clk); - if (ret) - return ret; - - ret = sti_dwmac_set_mode(dwmac); - if (ret) - goto disable_clk; - - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto disable_clk; - - return 0; - -disable_clk: - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -static void sti_dwmac_remove(struct platform_device *pdev) -{ - struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - - stmmac_dvr_remove(&pdev->dev); - - clk_disable_unprepare(dwmac->clk); + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } -static int sti_dwmac_suspend(struct device *dev) -{ - struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret = stmmac_suspend(dev); - - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -static int sti_dwmac_resume(struct device *dev) -{ - struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); - - clk_prepare_enable(dwmac->clk); - sti_dwmac_set_mode(dwmac); - - return stmmac_resume(dev); -} - -static DEFINE_SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend, - sti_dwmac_resume); - static const struct sti_dwmac_of_data stih4xx_dwmac_data = { .fix_retime_src = stih4xx_fix_retime_src, }; @@ -339,10 +310,9 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match); static struct platform_driver sti_dwmac_driver = { .probe = sti_dwmac_probe, - .remove = sti_dwmac_remove, .driver = { .name = "sti-dwmac", - .pm = pm_sleep_ptr(&sti_dwmac_pm_ops), + .pm = &stmmac_pltfr_pm_ops, .of_match_table = sti_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index c3d321192581..1eb16eec9c0d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -119,7 +119,7 @@ struct stm32_ops { u32 syscfg_clr_off; }; -static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume) +static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac) { int ret; @@ -127,11 +127,9 @@ static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume) if (ret) goto err_clk_tx; - if (!dwmac->ops->clk_rx_enable_in_suspend || !resume) { - ret = clk_prepare_enable(dwmac->clk_rx); - if (ret) - goto err_clk_rx; - } + ret = clk_prepare_enable(dwmac->clk_rx); + if (ret) + goto err_clk_rx; ret = clk_prepare_enable(dwmac->syscfg_clk); if (ret) @@ -148,15 +146,14 @@ static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume) err_clk_eth_ck: clk_disable_unprepare(dwmac->syscfg_clk); err_syscfg_clk: - if (!dwmac->ops->clk_rx_enable_in_suspend || !resume) - clk_disable_unprepare(dwmac->clk_rx); + clk_disable_unprepare(dwmac->clk_rx); err_clk_rx: clk_disable_unprepare(dwmac->clk_tx); err_clk_tx: return ret; } -static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat, bool resume) +static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; int ret; @@ -167,7 +164,7 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat, bool resume) return ret; } - return stm32_dwmac_clk_enable(dwmac, resume); + return stm32_dwmac_clk_enable(dwmac); } static int stm32mp1_select_ethck_external(struct plat_stmmacenet_data *plat_dat) @@ -382,12 +379,10 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat) SYSCFG_MCU_ETH_MASK, val << 23); } -static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac, bool suspend) +static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac) { clk_disable_unprepare(dwmac->clk_tx); - if (!dwmac->ops->clk_rx_enable_in_suspend || !suspend) - clk_disable_unprepare(dwmac->clk_rx); - + clk_disable_unprepare(dwmac->clk_rx); clk_disable_unprepare(dwmac->syscfg_clk); if (dwmac->enable_eth_ck) clk_disable_unprepare(dwmac->clk_eth_ck); @@ -541,18 +536,32 @@ static int stm32_dwmac_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP; plat_dat->bsp_priv = dwmac; - ret = stm32_dwmac_init(plat_dat, false); + ret = stm32_dwmac_init(plat_dat); if (ret) return ret; + /* If this platform requires the clock to be running in suspend, + * prepare and enable the receive clock an additional time to keep + * it running. + */ + if (dwmac->ops->clk_rx_enable_in_suspend) { + ret = clk_prepare_enable(dwmac->clk_rx); + if (ret) + goto err_clk_disable; + } + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_clk_disable; + goto err_clk_disable_suspend; return 0; +err_clk_disable_suspend: + if (dwmac->ops->clk_rx_enable_in_suspend) + clk_disable_unprepare(dwmac->clk_rx); + err_clk_disable: - stm32_dwmac_clk_disable(dwmac, false); + stm32_dwmac_clk_disable(dwmac); return ret; } @@ -565,7 +574,15 @@ static void stm32_dwmac_remove(struct platform_device *pdev) stmmac_dvr_remove(&pdev->dev); - stm32_dwmac_clk_disable(dwmac, false); + /* If this platform requires the clock to be running in suspend, + * we need to disable and unprepare the receive clock an additional + * time to balance the extra clk_prepare_enable() in the probe + * function. + */ + if (dwmac->ops->clk_rx_enable_in_suspend) + clk_disable_unprepare(dwmac->clk_rx); + + stm32_dwmac_clk_disable(dwmac); if (dwmac->irq_pwr_wakeup >= 0) { dev_pm_clear_wake_irq(&pdev->dev); @@ -596,7 +613,7 @@ static int stm32_dwmac_suspend(struct device *dev) if (ret) return ret; - stm32_dwmac_clk_disable(dwmac, true); + stm32_dwmac_clk_disable(dwmac); if (dwmac->ops->suspend) ret = dwmac->ops->suspend(dwmac); @@ -614,7 +631,7 @@ static int stm32_dwmac_resume(struct device *dev) if (dwmac->ops->resume) dwmac->ops->resume(dwmac); - ret = stm32_dwmac_init(priv->plat, true); + ret = stm32_dwmac_init(priv->plat); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793a..fd6518e252e3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1239,14 +1239,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); + ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); if (ret) goto dwmac_syscon; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto dwmac_exit; - ndev = dev_get_drvdata(&pdev->dev); priv = netdev_priv(ndev); @@ -1283,9 +1279,7 @@ dwmac_mux: clk_put(gmac->ephy_clk); dwmac_remove: pm_runtime_put_noidle(&pdev->dev); - stmmac_dvr_remove(&pdev->dev); -dwmac_exit: - sun8i_dwmac_exit(pdev, gmac); + stmmac_pltfr_remove(pdev); dwmac_syscon: sun8i_dwmac_unset_syscon(gmac); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 9f098ff0ff05..1eadcf5d1ad6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -72,28 +72,28 @@ static void sun7i_gmac_exit(struct platform_device *pdev, void *priv) regulator_disable(gmac->regulator); } -static void sun7i_fix_speed(void *priv, int speed, unsigned int mode) +static int sun7i_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed) { - struct sunxi_priv_data *gmac = priv; - - /* only GMII mode requires us to reconfigure the clock lines */ - if (gmac->interface != PHY_INTERFACE_MODE_GMII) - return; - - if (gmac->clk_enabled) { - clk_disable(gmac->tx_clk); - gmac->clk_enabled = 0; - } - clk_unprepare(gmac->tx_clk); - - if (speed == 1000) { - clk_set_rate(gmac->tx_clk, SUN7I_GMAC_GMII_RGMII_RATE); - clk_prepare_enable(gmac->tx_clk); - gmac->clk_enabled = 1; - } else { - clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - clk_prepare(gmac->tx_clk); + struct sunxi_priv_data *gmac = bsp_priv; + + if (interface == PHY_INTERFACE_MODE_GMII) { + if (gmac->clk_enabled) { + clk_disable(gmac->tx_clk); + gmac->clk_enabled = 0; + } + clk_unprepare(gmac->tx_clk); + + if (speed == 1000) { + clk_set_rate(gmac->tx_clk, SUN7I_GMAC_GMII_RGMII_RATE); + clk_prepare_enable(gmac->tx_clk); + gmac->clk_enabled = 1; + } else { + clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); + clk_prepare(gmac->tx_clk); + } } + return 0; } static int sun7i_gmac_probe(struct platform_device *pdev) @@ -140,24 +140,11 @@ static int sun7i_gmac_probe(struct platform_device *pdev) plat_dat->bsp_priv = gmac; plat_dat->init = sun7i_gmac_init; plat_dat->exit = sun7i_gmac_exit; - plat_dat->fix_mac_speed = sun7i_fix_speed; + plat_dat->set_clk_tx_rate = sun7i_set_clk_tx_rate; plat_dat->tx_fifo_size = 4096; plat_dat->rx_fifo_size = 16384; - ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv); - if (ret) - return ret; - - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_gmac_exit; - - return 0; - -err_gmac_exit: - sun7i_gmac_exit(pdev, plat_dat->bsp_priv); - - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id sun7i_dwmac_match[] = { @@ -168,7 +155,6 @@ MODULE_DEVICE_TABLE(of, sun7i_dwmac_match); static struct platform_driver sun7i_dwmac_driver = { .probe = sun7i_gmac_probe, - .remove = stmmac_pltfr_remove, .driver = { .name = "sun7i-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 33cf99797df5..5e6ac82a89b9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -51,21 +51,14 @@ struct visconti_eth { u32 phy_intf_sel; struct clk *phy_ref_clk; struct device *dev; - spinlock_t lock; /* lock to protect register update */ }; -static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) +static int visconti_eth_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed) { - struct visconti_eth *dwmac = priv; + struct visconti_eth *dwmac = bsp_priv; struct net_device *netdev = dev_get_drvdata(dwmac->dev); unsigned int val, clk_sel_val = 0; - unsigned long flags; - - spin_lock_irqsave(&dwmac->lock, flags); - - /* adjust link */ - val = readl(dwmac->reg + MAC_CTRL_REG); - val &= ~(GMAC_CONFIG_PS | GMAC_CONFIG_FES); switch (speed) { case SPEED_1000: @@ -77,24 +70,19 @@ static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) clk_sel_val = ETHER_CLK_SEL_FREQ_SEL_25M; if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RMII) clk_sel_val = ETHER_CLK_SEL_DIV_SEL_2; - val |= GMAC_CONFIG_PS | GMAC_CONFIG_FES; break; case SPEED_10: if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RGMII) clk_sel_val = ETHER_CLK_SEL_FREQ_SEL_2P5M; if (dwmac->phy_intf_sel == ETHER_CONFIG_INTF_RMII) clk_sel_val = ETHER_CLK_SEL_DIV_SEL_20; - val |= GMAC_CONFIG_PS; break; default: /* No bit control */ netdev_err(netdev, "Unsupported speed request (%d)", speed); - spin_unlock_irqrestore(&dwmac->lock, flags); - return; + return -EINVAL; } - writel(val, dwmac->reg + MAC_CTRL_REG); - /* Stop internal clock */ val = readl(dwmac->reg + REG_ETHER_CLOCK_SEL); val &= ~(ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN); @@ -136,7 +124,7 @@ static void visconti_eth_fix_mac_speed(void *priv, int speed, unsigned int mode) break; } - spin_unlock_irqrestore(&dwmac->lock, flags); + return 0; } static int visconti_eth_init_hw(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) @@ -228,11 +216,10 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; - plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; + plat_dat->set_clk_tx_rate = visconti_eth_set_clk_tx_rate; ret = visconti_eth_clock_probe(pdev, plat_dat); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 42fe29a4e300..f4694fd576f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -17,11 +17,7 @@ #define GMAC_EXT_CONFIG 0x00000004 #define GMAC_PACKET_FILTER 0x00000008 #define GMAC_HASH_TAB(x) (0x10 + (x) * 4) -#define GMAC_VLAN_TAG 0x00000050 -#define GMAC_VLAN_TAG_DATA 0x00000054 -#define GMAC_VLAN_HASH_TABLE 0x00000058 #define GMAC_RX_FLOW_CTRL 0x00000090 -#define GMAC_VLAN_INCL 0x00000060 #define GMAC_QX_TX_FLOW_CTRL(x) (0x70 + x * 4) #define GMAC_TXQ_PRTY_MAP0 0x98 #define GMAC_TXQ_PRTY_MAP1 0x9C @@ -31,7 +27,6 @@ #define GMAC_RXQ_CTRL3 0x000000ac #define GMAC_INT_STATUS 0x000000b0 #define GMAC_INT_EN 0x000000b4 -#define GMAC_1US_TIC_COUNTER 0x000000dc #define GMAC_PCS_BASE 0x000000e0 #define GMAC_PHYIF_CONTROL_STATUS 0x000000f8 #define GMAC_PMT 0x000000c0 @@ -82,42 +77,6 @@ #define GMAC_MAX_PERFECT_ADDRESSES 128 -/* MAC VLAN */ -#define GMAC_VLAN_EDVLP BIT(26) -#define GMAC_VLAN_VTHM BIT(25) -#define GMAC_VLAN_DOVLTC BIT(20) -#define GMAC_VLAN_ESVL BIT(18) -#define GMAC_VLAN_ETV BIT(16) -#define GMAC_VLAN_VID GENMASK(15, 0) -#define GMAC_VLAN_VLTI BIT(20) -#define GMAC_VLAN_CSVL BIT(19) -#define GMAC_VLAN_VLC GENMASK(17, 16) -#define GMAC_VLAN_VLC_SHIFT 16 -#define GMAC_VLAN_VLHT GENMASK(15, 0) - -/* MAC VLAN Tag */ -#define GMAC_VLAN_TAG_VID GENMASK(15, 0) -#define GMAC_VLAN_TAG_ETV BIT(16) - -/* MAC VLAN Tag Control */ -#define GMAC_VLAN_TAG_CTRL_OB BIT(0) -#define GMAC_VLAN_TAG_CTRL_CT BIT(1) -#define GMAC_VLAN_TAG_CTRL_OFS_MASK GENMASK(6, 2) -#define GMAC_VLAN_TAG_CTRL_OFS_SHIFT 2 -#define GMAC_VLAN_TAG_CTRL_EVLS_MASK GENMASK(22, 21) -#define GMAC_VLAN_TAG_CTRL_EVLS_SHIFT 21 -#define GMAC_VLAN_TAG_CTRL_EVLRXS BIT(24) - -#define GMAC_VLAN_TAG_STRIP_NONE (0x0 << GMAC_VLAN_TAG_CTRL_EVLS_SHIFT) -#define GMAC_VLAN_TAG_STRIP_PASS (0x1 << GMAC_VLAN_TAG_CTRL_EVLS_SHIFT) -#define GMAC_VLAN_TAG_STRIP_FAIL (0x2 << GMAC_VLAN_TAG_CTRL_EVLS_SHIFT) -#define GMAC_VLAN_TAG_STRIP_ALL (0x3 << GMAC_VLAN_TAG_CTRL_EVLS_SHIFT) - -/* MAC VLAN Tag Data/Filter */ -#define GMAC_VLAN_TAG_DATA_VID GENMASK(15, 0) -#define GMAC_VLAN_TAG_DATA_VEN BIT(16) -#define GMAC_VLAN_TAG_DATA_ETV BIT(17) - /* MAC RX Queue Enable */ #define GMAC_RX_QUEUE_CLEAR(queue) ~(GENMASK(1, 0) << ((queue) * 2)) #define GMAC_RX_AV_QUEUE_ENABLE(queue) BIT((queue) * 2) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index cc4ddf608652..9c2549d4100f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -18,6 +18,7 @@ #include "stmmac.h" #include "stmmac_fpe.h" #include "stmmac_pcs.h" +#include "stmmac_vlan.h" #include "dwmac4.h" #include "dwmac5.h" @@ -448,165 +449,6 @@ static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw) writel(value, ioaddr + GMAC4_LPI_TIMER_CTRL); } -static void dwmac4_write_single_vlan(struct net_device *dev, u16 vid) -{ - void __iomem *ioaddr = (void __iomem *)dev->base_addr; - u32 val; - - val = readl(ioaddr + GMAC_VLAN_TAG); - val &= ~GMAC_VLAN_TAG_VID; - val |= GMAC_VLAN_TAG_ETV | vid; - - writel(val, ioaddr + GMAC_VLAN_TAG); -} - -static int dwmac4_write_vlan_filter(struct net_device *dev, - struct mac_device_info *hw, - u8 index, u32 data) -{ - void __iomem *ioaddr = (void __iomem *)dev->base_addr; - int ret; - u32 val; - - if (index >= hw->num_vlan) - return -EINVAL; - - writel(data, ioaddr + GMAC_VLAN_TAG_DATA); - - val = readl(ioaddr + GMAC_VLAN_TAG); - val &= ~(GMAC_VLAN_TAG_CTRL_OFS_MASK | - GMAC_VLAN_TAG_CTRL_CT | - GMAC_VLAN_TAG_CTRL_OB); - val |= (index << GMAC_VLAN_TAG_CTRL_OFS_SHIFT) | GMAC_VLAN_TAG_CTRL_OB; - - writel(val, ioaddr + GMAC_VLAN_TAG); - - ret = readl_poll_timeout(ioaddr + GMAC_VLAN_TAG, val, - !(val & GMAC_VLAN_TAG_CTRL_OB), - 1000, 500000); - if (ret) { - netdev_err(dev, "Timeout accessing MAC_VLAN_Tag_Filter\n"); - return -EBUSY; - } - - return 0; -} - -static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev, - struct mac_device_info *hw, - __be16 proto, u16 vid) -{ - int index = -1; - u32 val = 0; - int i, ret; - - if (vid > 4095) - return -EINVAL; - - /* Single Rx VLAN Filter */ - if (hw->num_vlan == 1) { - /* For single VLAN filter, VID 0 means VLAN promiscuous */ - if (vid == 0) { - netdev_warn(dev, "Adding VLAN ID 0 is not supported\n"); - return -EPERM; - } - - if (hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) { - netdev_err(dev, "Only single VLAN ID supported\n"); - return -EPERM; - } - - hw->vlan_filter[0] = vid; - dwmac4_write_single_vlan(dev, vid); - - return 0; - } - - /* Extended Rx VLAN Filter Enable */ - val |= GMAC_VLAN_TAG_DATA_ETV | GMAC_VLAN_TAG_DATA_VEN | vid; - - for (i = 0; i < hw->num_vlan; i++) { - if (hw->vlan_filter[i] == val) - return 0; - else if (!(hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN)) - index = i; - } - - if (index == -1) { - netdev_err(dev, "MAC_VLAN_Tag_Filter full (size: %0u)\n", - hw->num_vlan); - return -EPERM; - } - - ret = dwmac4_write_vlan_filter(dev, hw, index, val); - - if (!ret) - hw->vlan_filter[index] = val; - - return ret; -} - -static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev, - struct mac_device_info *hw, - __be16 proto, u16 vid) -{ - int i, ret = 0; - - /* Single Rx VLAN Filter */ - if (hw->num_vlan == 1) { - if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) { - hw->vlan_filter[0] = 0; - dwmac4_write_single_vlan(dev, 0); - } - return 0; - } - - /* Extended Rx VLAN Filter Enable */ - for (i = 0; i < hw->num_vlan; i++) { - if ((hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VID) == vid) { - ret = dwmac4_write_vlan_filter(dev, hw, i, 0); - - if (!ret) - hw->vlan_filter[i] = 0; - else - return ret; - } - } - - return ret; -} - -static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev, - struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - u32 hash; - u32 val; - int i; - - /* Single Rx VLAN Filter */ - if (hw->num_vlan == 1) { - dwmac4_write_single_vlan(dev, hw->vlan_filter[0]); - return; - } - - /* Extended Rx VLAN Filter Enable */ - for (i = 0; i < hw->num_vlan; i++) { - if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) { - val = hw->vlan_filter[i]; - dwmac4_write_vlan_filter(dev, hw, i, val); - } - } - - hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE); - if (hash & GMAC_VLAN_VLHT) { - value = readl(ioaddr + GMAC_VLAN_TAG); - value |= GMAC_VLAN_VTHM; - writel(value, ioaddr + GMAC_VLAN_TAG); - } -} - static void dwmac4_set_filter(struct mac_device_info *hw, struct net_device *dev) { @@ -965,45 +807,6 @@ static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable) writel(value, ioaddr + GMAC_CONFIG); } -static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, - u16 perfect_match, bool is_double) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - - writel(hash, ioaddr + GMAC_VLAN_HASH_TABLE); - - value = readl(ioaddr + GMAC_VLAN_TAG); - - if (hash) { - value |= GMAC_VLAN_VTHM | GMAC_VLAN_ETV; - if (is_double) { - value |= GMAC_VLAN_EDVLP; - value |= GMAC_VLAN_ESVL; - value |= GMAC_VLAN_DOVLTC; - } - - writel(value, ioaddr + GMAC_VLAN_TAG); - } else if (perfect_match) { - u32 value = GMAC_VLAN_ETV; - - if (is_double) { - value |= GMAC_VLAN_EDVLP; - value |= GMAC_VLAN_ESVL; - value |= GMAC_VLAN_DOVLTC; - } - - writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG); - } else { - value &= ~(GMAC_VLAN_VTHM | GMAC_VLAN_ETV); - value &= ~(GMAC_VLAN_EDVLP | GMAC_VLAN_ESVL); - value &= ~GMAC_VLAN_DOVLTC; - value &= ~GMAC_VLAN_VID; - - writel(value, ioaddr + GMAC_VLAN_TAG); - } -} - static void dwmac4_sarc_configure(void __iomem *ioaddr, int val) { u32 value = readl(ioaddr + GMAC_CONFIG); @@ -1014,19 +817,6 @@ static void dwmac4_sarc_configure(void __iomem *ioaddr, int val) writel(value, ioaddr + GMAC_CONFIG); } -static void dwmac4_enable_vlan(struct mac_device_info *hw, u32 type) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - - value = readl(ioaddr + GMAC_VLAN_INCL); - value |= GMAC_VLAN_VLTI; - value |= GMAC_VLAN_CSVL; /* Only use SVLAN */ - value &= ~GMAC_VLAN_VLC; - value |= (type << GMAC_VLAN_VLC_SHIFT) & GMAC_VLAN_VLC; - writel(value, ioaddr + GMAC_VLAN_INCL); -} - static void dwmac4_set_arp_offload(struct mac_device_info *hw, bool en, u32 addr) { @@ -1143,35 +933,6 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, return 0; } -static void dwmac4_rx_hw_vlan(struct mac_device_info *hw, - struct dma_desc *rx_desc, struct sk_buff *skb) -{ - if (hw->desc->get_rx_vlan_valid(rx_desc)) { - u16 vid = hw->desc->get_rx_vlan_tci(rx_desc); - - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); - } -} - -static void dwmac4_set_hw_vlan_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value = readl(ioaddr + GMAC_VLAN_TAG); - - value &= ~GMAC_VLAN_TAG_CTRL_EVLS_MASK; - - if (hw->hw_vlan_en) - /* Always strip VLAN on Receive */ - value |= GMAC_VLAN_TAG_STRIP_ALL; - else - /* Do not strip VLAN on Receive */ - value |= GMAC_VLAN_TAG_STRIP_NONE; - - /* Enable outer VLAN Tag in Rx DMA descriptor */ - value |= GMAC_VLAN_TAG_CTRL_EVLRXS; - writel(value, ioaddr + GMAC_VLAN_TAG); -} - const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .update_caps = dwmac4_update_caps, @@ -1201,17 +962,10 @@ const struct stmmac_ops dwmac4_ops = { .debug = dwmac4_debug, .set_filter = dwmac4_set_filter, .set_mac_loopback = dwmac4_set_mac_loopback, - .update_vlan_hash = dwmac4_update_vlan_hash, .sarc_configure = dwmac4_sarc_configure, - .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, - .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, - .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, - .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, - .rx_hw_vlan = dwmac4_rx_hw_vlan, - .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; const struct stmmac_ops dwmac410_ops = { @@ -1244,18 +998,11 @@ const struct stmmac_ops dwmac410_ops = { .set_filter = dwmac4_set_filter, .flex_pps_config = dwmac5_flex_pps_config, .set_mac_loopback = dwmac4_set_mac_loopback, - .update_vlan_hash = dwmac4_update_vlan_hash, .sarc_configure = dwmac4_sarc_configure, - .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .fpe_map_preemption_class = dwmac5_fpe_map_preemption_class, - .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, - .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, - .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, - .rx_hw_vlan = dwmac4_rx_hw_vlan, - .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; const struct stmmac_ops dwmac510_ops = { @@ -1292,51 +1039,13 @@ const struct stmmac_ops dwmac510_ops = { .rxp_config = dwmac5_rxp_config, .flex_pps_config = dwmac5_flex_pps_config, .set_mac_loopback = dwmac4_set_mac_loopback, - .update_vlan_hash = dwmac4_update_vlan_hash, .sarc_configure = dwmac4_sarc_configure, - .enable_vlan = dwmac4_enable_vlan, .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .fpe_map_preemption_class = dwmac5_fpe_map_preemption_class, - .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, - .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, - .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, - .rx_hw_vlan = dwmac4_rx_hw_vlan, - .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; -static u32 dwmac4_get_num_vlan(void __iomem *ioaddr) -{ - u32 val, num_vlan; - - val = readl(ioaddr + GMAC_HW_FEATURE3); - switch (val & GMAC_HW_FEAT_NRVF) { - case 0: - num_vlan = 1; - break; - case 1: - num_vlan = 4; - break; - case 2: - num_vlan = 8; - break; - case 3: - num_vlan = 16; - break; - case 4: - num_vlan = 24; - break; - case 5: - num_vlan = 32; - break; - default: - num_vlan = 1; - } - - return num_vlan; -} - int dwmac4_setup(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -1368,7 +1077,7 @@ int dwmac4_setup(struct stmmac_priv *priv) mac->mii.reg_mask = GENMASK(20, 16); mac->mii.clk_csr_shift = 8; mac->mii.clk_csr_mask = GENMASK(11, 8); - mac->num_vlan = dwmac4_get_num_vlan(priv->ioaddr); + mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index a03f5d771566..0d408ee17f33 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -57,19 +57,6 @@ #define XGMAC_FILTER_PR BIT(0) #define XGMAC_HASH_TABLE(x) (0x00000010 + (x) * 4) #define XGMAC_MAX_HASH_TABLE 8 -#define XGMAC_VLAN_TAG 0x00000050 -#define XGMAC_VLAN_EDVLP BIT(26) -#define XGMAC_VLAN_VTHM BIT(25) -#define XGMAC_VLAN_DOVLTC BIT(20) -#define XGMAC_VLAN_ESVL BIT(18) -#define XGMAC_VLAN_ETV BIT(16) -#define XGMAC_VLAN_VID GENMASK(15, 0) -#define XGMAC_VLAN_HASH_TABLE 0x00000058 -#define XGMAC_VLAN_INCL 0x00000060 -#define XGMAC_VLAN_VLTI BIT(20) -#define XGMAC_VLAN_CSVL BIT(19) -#define XGMAC_VLAN_VLC GENMASK(17, 16) -#define XGMAC_VLAN_VLC_SHIFT 16 #define XGMAC_RXQ_CTRL0 0x000000a0 #define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2) #define XGMAC_RXQEN_SHIFT(x) ((x) * 2) @@ -477,6 +464,7 @@ #define XGMAC_RDES3_RSV BIT(26) #define XGMAC_RDES3_L34T GENMASK(23, 20) #define XGMAC_RDES3_L34T_SHIFT 20 +#define XGMAC_RDES3_ET_LT GENMASK(19, 16) #define XGMAC_L34T_IP4TCP 0x1 #define XGMAC_L34T_IP4UDP 0x2 #define XGMAC_L34T_IP6TCP 0x9 @@ -486,6 +474,17 @@ #define XGMAC_RDES3_TSD BIT(6) #define XGMAC_RDES3_TSA BIT(4) +/* RDES0 (write back format) */ +#define XGMAC_RDES0_VLAN_TAG_MASK GENMASK(15, 0) + +/* Error Type or L2 Type(ET/LT) Field Number */ +#define XGMAC_ET_LT_VLAN_STAG 8 +#define XGMAC_ET_LT_VLAN_CTAG 9 +#define XGMAC_ET_LT_DVLAN_CTAG_CTAG 10 +#define XGMAC_ET_LT_DVLAN_STAG_STAG 11 +#define XGMAC_ET_LT_DVLAN_CTAG_STAG 12 +#define XGMAC_ET_LT_DVLAN_STAG_CTAG 13 + extern const struct stmmac_ops dwxgmac210_ops; extern const struct stmmac_ops dwxlgmac2_ops; extern const struct stmmac_dma_ops dwxgmac210_dma_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index a6d395c6bacd..6cadf8de4fdf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -10,6 +10,7 @@ #include "stmmac.h" #include "stmmac_fpe.h" #include "stmmac_ptp.h" +#include "stmmac_vlan.h" #include "dwxlgmac2.h" #include "dwxgmac2.h" @@ -614,76 +615,6 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, return 0; } -static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, - u16 perfect_match, bool is_double) -{ - void __iomem *ioaddr = hw->pcsr; - - writel(hash, ioaddr + XGMAC_VLAN_HASH_TABLE); - - if (hash) { - u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); - - value |= XGMAC_FILTER_VTFE; - - writel(value, ioaddr + XGMAC_PACKET_FILTER); - - value = readl(ioaddr + XGMAC_VLAN_TAG); - - value |= XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV; - if (is_double) { - value |= XGMAC_VLAN_EDVLP; - value |= XGMAC_VLAN_ESVL; - value |= XGMAC_VLAN_DOVLTC; - } else { - value &= ~XGMAC_VLAN_EDVLP; - value &= ~XGMAC_VLAN_ESVL; - value &= ~XGMAC_VLAN_DOVLTC; - } - - value &= ~XGMAC_VLAN_VID; - writel(value, ioaddr + XGMAC_VLAN_TAG); - } else if (perfect_match) { - u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); - - value |= XGMAC_FILTER_VTFE; - - writel(value, ioaddr + XGMAC_PACKET_FILTER); - - value = readl(ioaddr + XGMAC_VLAN_TAG); - - value &= ~XGMAC_VLAN_VTHM; - value |= XGMAC_VLAN_ETV; - if (is_double) { - value |= XGMAC_VLAN_EDVLP; - value |= XGMAC_VLAN_ESVL; - value |= XGMAC_VLAN_DOVLTC; - } else { - value &= ~XGMAC_VLAN_EDVLP; - value &= ~XGMAC_VLAN_ESVL; - value &= ~XGMAC_VLAN_DOVLTC; - } - - value &= ~XGMAC_VLAN_VID; - writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG); - } else { - u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); - - value &= ~XGMAC_FILTER_VTFE; - - writel(value, ioaddr + XGMAC_PACKET_FILTER); - - value = readl(ioaddr + XGMAC_VLAN_TAG); - - value &= ~(XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV); - value &= ~(XGMAC_VLAN_EDVLP | XGMAC_VLAN_ESVL); - value &= ~XGMAC_VLAN_DOVLTC; - value &= ~XGMAC_VLAN_VID; - - writel(value, ioaddr + XGMAC_VLAN_TAG); - } -} - struct dwxgmac3_error_desc { bool valid; const char *desc; @@ -1300,19 +1231,6 @@ static void dwxgmac2_sarc_configure(void __iomem *ioaddr, int val) writel(value, ioaddr + XGMAC_TX_CONFIG); } -static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - - value = readl(ioaddr + XGMAC_VLAN_INCL); - value |= XGMAC_VLAN_VLTI; - value |= XGMAC_VLAN_CSVL; /* Only use SVLAN */ - value &= ~XGMAC_VLAN_VLC; - value |= (type << XGMAC_VLAN_VLC_SHIFT) & XGMAC_VLAN_VLC; - writel(value, ioaddr + XGMAC_VLAN_INCL); -} - static int dwxgmac2_filter_wait(struct mac_device_info *hw) { void __iomem *ioaddr = hw->pcsr; @@ -1534,12 +1452,10 @@ const struct stmmac_ops dwxgmac210_ops = { .safety_feat_dump = dwxgmac3_safety_feat_dump, .set_mac_loopback = dwxgmac2_set_mac_loopback, .rss_configure = dwxgmac2_rss_configure, - .update_vlan_hash = dwxgmac2_update_vlan_hash, .rxp_config = dwxgmac3_rxp_config, .get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp, .flex_pps_config = dwxgmac2_flex_pps_config, .sarc_configure = dwxgmac2_sarc_configure, - .enable_vlan = dwxgmac2_enable_vlan, .config_l3_filter = dwxgmac2_config_l3_filter, .config_l4_filter = dwxgmac2_config_l4_filter, .set_arp_offload = dwxgmac2_set_arp_offload, @@ -1590,12 +1506,10 @@ const struct stmmac_ops dwxlgmac2_ops = { .safety_feat_dump = dwxgmac3_safety_feat_dump, .set_mac_loopback = dwxgmac2_set_mac_loopback, .rss_configure = dwxgmac2_rss_configure, - .update_vlan_hash = dwxgmac2_update_vlan_hash, .rxp_config = dwxgmac3_rxp_config, .get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp, .flex_pps_config = dwxgmac2_flex_pps_config, .sarc_configure = dwxgmac2_sarc_configure, - .enable_vlan = dwxgmac2_enable_vlan, .config_l3_filter = dwxgmac2_config_l3_filter, .config_l4_filter = dwxgmac2_config_l4_filter, .set_arp_offload = dwxgmac2_set_arp_offload, @@ -1638,6 +1552,7 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mii.reg_mask = GENMASK(15, 0); mac->mii.clk_csr_shift = 19; mac->mii.clk_csr_mask = GENMASK(21, 19); + mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 389aad7b5c1e..a2980482fcce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -4,6 +4,7 @@ * stmmac XGMAC support. */ +#include <linux/bitfield.h> #include <linux/stmmac.h> #include "common.h" #include "dwxgmac2.h" @@ -69,6 +70,21 @@ static int dwxgmac2_get_tx_ls(struct dma_desc *p) return (le32_to_cpu(p->des3) & XGMAC_RDES3_LD) > 0; } +static u16 dwxgmac2_wrback_get_rx_vlan_tci(struct dma_desc *p) +{ + return le32_to_cpu(p->des0) & XGMAC_RDES0_VLAN_TAG_MASK; +} + +static bool dwxgmac2_wrback_get_rx_vlan_valid(struct dma_desc *p) +{ + u32 et_lt; + + et_lt = FIELD_GET(XGMAC_RDES3_ET_LT, le32_to_cpu(p->des3)); + + return et_lt >= XGMAC_ET_LT_VLAN_STAG && + et_lt <= XGMAC_ET_LT_DVLAN_STAG_CTAG; +} + static int dwxgmac2_get_rx_frame_len(struct dma_desc *p, int rx_coe) { return (le32_to_cpu(p->des3) & XGMAC_RDES3_PL); @@ -351,6 +367,8 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = { .set_tx_owner = dwxgmac2_set_tx_owner, .set_rx_owner = dwxgmac2_set_rx_owner, .get_tx_ls = dwxgmac2_get_tx_ls, + .get_rx_vlan_tci = dwxgmac2_wrback_get_rx_vlan_tci, + .get_rx_vlan_valid = dwxgmac2_wrback_get_rx_vlan_valid, .get_rx_frame_len = dwxgmac2_get_rx_frame_len, .enable_tx_timestamp = dwxgmac2_enable_tx_timestamp, .get_tx_timestamp_status = dwxgmac2_get_tx_timestamp_status, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 31bdbab9a46c..99635b37044a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -9,6 +9,7 @@ #include "stmmac_fpe.h" #include "stmmac_ptp.h" #include "stmmac_est.h" +#include "stmmac_vlan.h" #include "dwmac4_descs.h" #include "dwxgmac2.h" @@ -120,6 +121,7 @@ static const struct stmmac_hwif_entry { const void *tc; const void *mmc; const void *est; + const void *vlan; int (*setup)(struct stmmac_priv *priv); int (*quirks)(struct stmmac_priv *priv); } stmmac_hw[] = { @@ -175,6 +177,7 @@ static const struct stmmac_hwif_entry { .desc = &dwmac4_desc_ops, .dma = &dwmac4_dma_ops, .mac = &dwmac4_ops, + .vlan = &dwmac_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = NULL, @@ -197,6 +200,7 @@ static const struct stmmac_hwif_entry { .desc = &dwmac4_desc_ops, .dma = &dwmac4_dma_ops, .mac = &dwmac410_ops, + .vlan = &dwmac_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = &dwmac4_ring_mode_ops, @@ -219,6 +223,7 @@ static const struct stmmac_hwif_entry { .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, .mac = &dwmac410_ops, + .vlan = &dwmac_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = &dwmac4_ring_mode_ops, @@ -241,6 +246,7 @@ static const struct stmmac_hwif_entry { .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, .mac = &dwmac510_ops, + .vlan = &dwmac_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = &dwmac4_ring_mode_ops, @@ -264,6 +270,7 @@ static const struct stmmac_hwif_entry { .desc = &dwxgmac210_desc_ops, .dma = &dwxgmac210_dma_ops, .mac = &dwxgmac210_ops, + .vlan = &dwxgmac210_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = NULL, @@ -287,6 +294,7 @@ static const struct stmmac_hwif_entry { .desc = &dwxgmac210_desc_ops, .dma = &dwxgmac210_dma_ops, .mac = &dwxlgmac2_ops, + .vlan = &dwxlgmac2_vlan_ops, .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = NULL, @@ -368,6 +376,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv) mac->tc = mac->tc ? : entry->tc; mac->mmc = mac->mmc ? : entry->mmc; mac->est = mac->est ? : entry->est; + mac->vlan = mac->vlan ? : entry->vlan; priv->hw = mac; priv->fpe_cfg.reg = entry->regs.fpe_reg; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 27c63a9fc163..ae4efffb785f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -398,21 +398,6 @@ struct stmmac_ops { /* RSS */ int (*rss_configure)(struct mac_device_info *hw, struct stmmac_rss *cfg, u32 num_rxq); - /* VLAN */ - void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash, - u16 perfect_match, bool is_double); - void (*enable_vlan)(struct mac_device_info *hw, u32 type); - void (*rx_hw_vlan)(struct mac_device_info *hw, struct dma_desc *rx_desc, - struct sk_buff *skb); - void (*set_hw_vlan_mode)(struct mac_device_info *hw); - int (*add_hw_vlan_rx_fltr)(struct net_device *dev, - struct mac_device_info *hw, - __be16 proto, u16 vid); - int (*del_hw_vlan_rx_fltr)(struct net_device *dev, - struct mac_device_info *hw, - __be16 proto, u16 vid); - void (*restore_hw_vlan_rx_fltr)(struct net_device *dev, - struct mac_device_info *hw); /* TX Timestamp */ int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts); /* Source Address Insertion / Replacement */ @@ -498,20 +483,6 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args) #define stmmac_rss_configure(__priv, __args...) \ stmmac_do_callback(__priv, mac, rss_configure, __args) -#define stmmac_update_vlan_hash(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args) -#define stmmac_enable_vlan(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, enable_vlan, __args) -#define stmmac_rx_hw_vlan(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, rx_hw_vlan, __args) -#define stmmac_set_hw_vlan_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, set_hw_vlan_mode, __args) -#define stmmac_add_hw_vlan_rx_fltr(__priv, __args...) \ - stmmac_do_callback(__priv, mac, add_hw_vlan_rx_fltr, __args) -#define stmmac_del_hw_vlan_rx_fltr(__priv, __args...) \ - stmmac_do_callback(__priv, mac, del_hw_vlan_rx_fltr, __args) -#define stmmac_restore_hw_vlan_rx_fltr(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, restore_hw_vlan_rx_fltr, __args) #define stmmac_get_mac_tx_timestamp(__priv, __args...) \ stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args) #define stmmac_sarc_configure(__priv, __args...) \ @@ -659,6 +630,39 @@ struct stmmac_est_ops { #define stmmac_est_irq_status(__priv, __args...) \ stmmac_do_void_callback(__priv, est, irq_status, __args) +struct stmmac_vlan_ops { + /* VLAN */ + void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash, + u16 perfect_match, bool is_double); + void (*enable_vlan)(struct mac_device_info *hw, u32 type); + void (*rx_hw_vlan)(struct mac_device_info *hw, struct dma_desc *rx_desc, + struct sk_buff *skb); + void (*set_hw_vlan_mode)(struct mac_device_info *hw); + int (*add_hw_vlan_rx_fltr)(struct net_device *dev, + struct mac_device_info *hw, + __be16 proto, u16 vid); + int (*del_hw_vlan_rx_fltr)(struct net_device *dev, + struct mac_device_info *hw, + __be16 proto, u16 vid); + void (*restore_hw_vlan_rx_fltr)(struct net_device *dev, + struct mac_device_info *hw); +}; + +#define stmmac_update_vlan_hash(__priv, __args...) \ + stmmac_do_void_callback(__priv, vlan, update_vlan_hash, __args) +#define stmmac_enable_vlan(__priv, __args...) \ + stmmac_do_void_callback(__priv, vlan, enable_vlan, __args) +#define stmmac_rx_hw_vlan(__priv, __args...) \ + stmmac_do_void_callback(__priv, vlan, rx_hw_vlan, __args) +#define stmmac_set_hw_vlan_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, vlan, set_hw_vlan_mode, __args) +#define stmmac_add_hw_vlan_rx_fltr(__priv, __args...) \ + stmmac_do_callback(__priv, vlan, add_hw_vlan_rx_fltr, __args) +#define stmmac_del_hw_vlan_rx_fltr(__priv, __args...) \ + stmmac_do_callback(__priv, vlan, del_hw_vlan_rx_fltr, __args) +#define stmmac_restore_hw_vlan_rx_fltr(__priv, __args...) \ + stmmac_do_void_callback(__priv, vlan, restore_hw_vlan_rx_fltr, __args) + struct stmmac_regs_off { const struct stmmac_fpe_reg *fpe_reg; u32 ptp_off; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index bddfa0f4aa21..1686e559f66e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -149,21 +149,9 @@ struct stmmac_channel { }; struct stmmac_fpe_cfg { - /* Serialize access to MAC Merge state between ethtool requests - * and link state updates. - */ - spinlock_t lock; - + struct ethtool_mmsv mmsv; const struct stmmac_fpe_reg *reg; - u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ - - enum ethtool_mm_verify_status status; - struct timer_list verify_timer; - bool verify_enabled; - int verify_retries; - bool pmac_enabled; - u32 verify_time; - bool tx_enabled; + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_tc_entry { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 918a32f8fda8..f702f7b7bf9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -37,7 +37,7 @@ #define ETHTOOL_DMA_OFFSET 55 struct stmmac_stats { - char stat_string[ETH_GSTRING_LEN]; + char stat_string[ETH_GSTRING_LEN] __nonstring; int sizeof_stat; int stat_offset; }; @@ -1210,36 +1210,16 @@ static int stmmac_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) { struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long flags; u32 frag_size; if (!stmmac_fpe_supported(priv)) return -EOPNOTSUPP; - spin_lock_irqsave(&priv->fpe_cfg.lock, flags); - - state->max_verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS; - state->verify_enabled = priv->fpe_cfg.verify_enabled; - state->pmac_enabled = priv->fpe_cfg.pmac_enabled; - state->verify_time = priv->fpe_cfg.verify_time; - state->tx_enabled = priv->fpe_cfg.tx_enabled; - state->verify_status = priv->fpe_cfg.status; state->rx_min_frag_size = ETH_ZLEN; - - /* FPE active if common tx_enabled and - * (verification success or disabled(forced)) - */ - if (state->tx_enabled && - (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED || - state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED)) - state->tx_active = true; - else - state->tx_active = false; - frag_size = stmmac_fpe_get_add_frag_size(priv); state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(frag_size); - spin_unlock_irqrestore(&priv->fpe_cfg.lock, flags); + ethtool_mmsv_get_mm(&priv->fpe_cfg.mmsv, state); return 0; } @@ -1248,8 +1228,6 @@ static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack) { struct stmmac_priv *priv = netdev_priv(ndev); - struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; - unsigned long flags; u32 frag_size; int err; @@ -1258,23 +1236,8 @@ static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, if (err) return err; - /* Wait for the verification that's currently in progress to finish */ - timer_shutdown_sync(&fpe_cfg->verify_timer); - - spin_lock_irqsave(&fpe_cfg->lock, flags); - - fpe_cfg->verify_enabled = cfg->verify_enabled; - fpe_cfg->pmac_enabled = cfg->pmac_enabled; - fpe_cfg->verify_time = cfg->verify_time; - fpe_cfg->tx_enabled = cfg->tx_enabled; - - if (!cfg->verify_enabled) - fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; - stmmac_fpe_set_add_frag_size(priv, frag_size); - stmmac_fpe_apply(priv); - - spin_unlock_irqrestore(&fpe_cfg->lock, flags); + ethtool_mmsv_set_mm(&priv->fpe_cfg.mmsv, cfg); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c index 3a4bee029c7f..75b470ee621a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.c @@ -27,12 +27,6 @@ #define STMMAC_MAC_FPE_CTRL_STS_SVER BIT(1) #define STMMAC_MAC_FPE_CTRL_STS_EFPE BIT(0) -/* FPE link-partner hand-shaking mPacket type */ -enum stmmac_mpacket_type { - MPACKET_VERIFY = 0, - MPACKET_RESPONSE = 1, -}; - struct stmmac_fpe_reg { const u32 mac_fpe_reg; /* offset of MAC_FPE_CTRL_STS */ const u32 mtl_fpe_reg; /* offset of MTL_FPE_CTRL_STS */ @@ -48,10 +42,10 @@ bool stmmac_fpe_supported(struct stmmac_priv *priv) priv->hw->mac->fpe_map_preemption_class; } -static void stmmac_fpe_configure(struct stmmac_priv *priv, bool tx_enable, - bool pmac_enable) +static void stmmac_fpe_configure_tx(struct ethtool_mmsv *mmsv, bool tx_enable) { - struct stmmac_fpe_cfg *cfg = &priv->fpe_cfg; + struct stmmac_fpe_cfg *cfg = container_of(mmsv, struct stmmac_fpe_cfg, mmsv); + struct stmmac_priv *priv = container_of(cfg, struct stmmac_priv, fpe_cfg); const struct stmmac_fpe_reg *reg = cfg->reg; u32 num_rxq = priv->plat->rx_queues_to_use; void __iomem *ioaddr = priv->ioaddr; @@ -68,6 +62,15 @@ static void stmmac_fpe_configure(struct stmmac_priv *priv, bool tx_enable, cfg->fpe_csr = 0; } writel(cfg->fpe_csr, ioaddr + reg->mac_fpe_reg); +} + +static void stmmac_fpe_configure_pmac(struct ethtool_mmsv *mmsv, bool pmac_enable) +{ + struct stmmac_fpe_cfg *cfg = container_of(mmsv, struct stmmac_fpe_cfg, mmsv); + struct stmmac_priv *priv = container_of(cfg, struct stmmac_priv, fpe_cfg); + const struct stmmac_fpe_reg *reg = cfg->reg; + void __iomem *ioaddr = priv->ioaddr; + u32 value; value = readl(ioaddr + reg->int_en_reg); @@ -85,47 +88,45 @@ static void stmmac_fpe_configure(struct stmmac_priv *priv, bool tx_enable, writel(value, ioaddr + reg->int_en_reg); } -static void stmmac_fpe_send_mpacket(struct stmmac_priv *priv, - enum stmmac_mpacket_type type) +static void stmmac_fpe_send_mpacket(struct ethtool_mmsv *mmsv, + enum ethtool_mpacket type) { - const struct stmmac_fpe_reg *reg = priv->fpe_cfg.reg; + struct stmmac_fpe_cfg *cfg = container_of(mmsv, struct stmmac_fpe_cfg, mmsv); + struct stmmac_priv *priv = container_of(cfg, struct stmmac_priv, fpe_cfg); + const struct stmmac_fpe_reg *reg = cfg->reg; void __iomem *ioaddr = priv->ioaddr; - u32 value = priv->fpe_cfg.fpe_csr; + u32 value = cfg->fpe_csr; - if (type == MPACKET_VERIFY) + if (type == ETHTOOL_MPACKET_VERIFY) value |= STMMAC_MAC_FPE_CTRL_STS_SVER; - else if (type == MPACKET_RESPONSE) + else if (type == ETHTOOL_MPACKET_RESPONSE) value |= STMMAC_MAC_FPE_CTRL_STS_SRSP; writel(value, ioaddr + reg->mac_fpe_reg); } +static const struct ethtool_mmsv_ops stmmac_mmsv_ops = { + .configure_tx = stmmac_fpe_configure_tx, + .configure_pmac = stmmac_fpe_configure_pmac, + .send_mpacket = stmmac_fpe_send_mpacket, +}; + static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) { struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; + struct ethtool_mmsv *mmsv = &fpe_cfg->mmsv; - /* This is interrupt context, just spin_lock() */ - spin_lock(&fpe_cfg->lock); - - if (!fpe_cfg->pmac_enabled || status == FPE_EVENT_UNKNOWN) - goto unlock_out; + if (status == FPE_EVENT_UNKNOWN) + return; - /* LP has sent verify mPacket */ if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) - stmmac_fpe_send_mpacket(priv, MPACKET_RESPONSE); + ethtool_mmsv_event_handle(mmsv, ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET); - /* Local has sent verify mPacket */ - if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER && - fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) - fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_VERIFYING; + if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) + ethtool_mmsv_event_handle(mmsv, ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); - /* LP has sent response mPacket */ - if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP && - fpe_cfg->status == ETHTOOL_MM_VERIFY_STATUS_VERIFYING) - fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED; - -unlock_out: - spin_unlock(&fpe_cfg->lock); + if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP) + ethtool_mmsv_event_handle(mmsv, ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET); } void stmmac_fpe_irq_status(struct stmmac_priv *priv) @@ -164,119 +165,16 @@ void stmmac_fpe_irq_status(struct stmmac_priv *priv) stmmac_fpe_event_status(priv, status); } -/** - * stmmac_fpe_verify_timer - Timer for MAC Merge verification - * @t: timer_list struct containing private info - * - * Verify the MAC Merge capability in the local TX direction, by - * transmitting Verify mPackets up to 3 times. Wait until link - * partner responds with a Response mPacket, otherwise fail. - */ -static void stmmac_fpe_verify_timer(struct timer_list *t) -{ - struct stmmac_fpe_cfg *fpe_cfg = from_timer(fpe_cfg, t, verify_timer); - struct stmmac_priv *priv = container_of(fpe_cfg, struct stmmac_priv, - fpe_cfg); - unsigned long flags; - bool rearm = false; - - spin_lock_irqsave(&fpe_cfg->lock, flags); - - switch (fpe_cfg->status) { - case ETHTOOL_MM_VERIFY_STATUS_INITIAL: - case ETHTOOL_MM_VERIFY_STATUS_VERIFYING: - if (fpe_cfg->verify_retries != 0) { - stmmac_fpe_send_mpacket(priv, MPACKET_VERIFY); - rearm = true; - } else { - fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_FAILED; - } - - fpe_cfg->verify_retries--; - break; - - case ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED: - stmmac_fpe_configure(priv, true, true); - break; - - default: - break; - } - - if (rearm) { - mod_timer(&fpe_cfg->verify_timer, - jiffies + msecs_to_jiffies(fpe_cfg->verify_time)); - } - - spin_unlock_irqrestore(&fpe_cfg->lock, flags); -} - -static void stmmac_fpe_verify_timer_arm(struct stmmac_fpe_cfg *fpe_cfg) -{ - if (fpe_cfg->pmac_enabled && fpe_cfg->tx_enabled && - fpe_cfg->verify_enabled && - fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_FAILED && - fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) { - timer_setup(&fpe_cfg->verify_timer, stmmac_fpe_verify_timer, 0); - mod_timer(&fpe_cfg->verify_timer, jiffies); - } -} - void stmmac_fpe_init(struct stmmac_priv *priv) { - priv->fpe_cfg.verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES; - priv->fpe_cfg.verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS; - priv->fpe_cfg.status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; - timer_setup(&priv->fpe_cfg.verify_timer, stmmac_fpe_verify_timer, 0); - spin_lock_init(&priv->fpe_cfg.lock); + ethtool_mmsv_init(&priv->fpe_cfg.mmsv, priv->dev, + &stmmac_mmsv_ops); if ((!priv->fpe_cfg.reg || !priv->hw->mac->fpe_map_preemption_class) && priv->dma_cap.fpesel) dev_info(priv->device, "FPE is not supported by driver.\n"); } -void stmmac_fpe_apply(struct stmmac_priv *priv) -{ - struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; - - /* If verification is disabled, configure FPE right away. - * Otherwise let the timer code do it. - */ - if (!fpe_cfg->verify_enabled) { - stmmac_fpe_configure(priv, fpe_cfg->tx_enabled, - fpe_cfg->pmac_enabled); - } else { - fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_INITIAL; - fpe_cfg->verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES; - - if (netif_running(priv->dev)) - stmmac_fpe_verify_timer_arm(fpe_cfg); - } -} - -void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) -{ - struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; - unsigned long flags; - - timer_shutdown_sync(&fpe_cfg->verify_timer); - - spin_lock_irqsave(&fpe_cfg->lock, flags); - - if (is_up && fpe_cfg->pmac_enabled) { - /* VERIFY process requires pmac enabled when NIC comes up */ - stmmac_fpe_configure(priv, false, true); - - /* New link => maybe new partner => new verification process */ - stmmac_fpe_apply(priv); - } else { - /* No link => turn off EFPE */ - stmmac_fpe_configure(priv, false, false); - } - - spin_unlock_irqrestore(&fpe_cfg->lock, flags); -} - int stmmac_fpe_get_add_frag_size(struct stmmac_priv *priv) { const struct stmmac_fpe_reg *reg = priv->fpe_cfg.reg; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.h index b884eac7142d..3fc46acf7001 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_fpe.h @@ -9,15 +9,10 @@ #include <linux/types.h> #include <linux/netdevice.h> -#define STMMAC_FPE_MM_MAX_VERIFY_RETRIES 3 -#define STMMAC_FPE_MM_MAX_VERIFY_TIME_MS 128 - struct stmmac_priv; -void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up); bool stmmac_fpe_supported(struct stmmac_priv *priv); void stmmac_fpe_init(struct stmmac_priv *priv); -void stmmac_fpe_apply(struct stmmac_priv *priv); void stmmac_fpe_irq_status(struct stmmac_priv *priv); int stmmac_fpe_get_add_frag_size(struct stmmac_priv *priv); void stmmac_fpe_set_add_frag_size(struct stmmac_priv *priv, u32 add_frag_size); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59d07d0d3369..6612f9f4b9f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -946,7 +946,7 @@ static void stmmac_mac_link_down(struct phylink_config *config, stmmac_set_eee_pls(priv, priv->hw, false); if (stmmac_fpe_supported(priv)) - stmmac_fpe_link_state_handle(priv, false); + ethtool_mmsv_link_state_handle(&priv->fpe_cfg.mmsv, false); } static void stmmac_mac_link_up(struct phylink_config *config, @@ -1064,7 +1064,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, stmmac_set_eee_pls(priv, priv->hw, true); if (stmmac_fpe_supported(priv)) - stmmac_fpe_link_state_handle(priv, true); + ethtool_mmsv_link_state_handle(&priv->fpe_cfg.mmsv, true); if (priv->plat->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) stmmac_hwtstamp_correct_latency(priv, priv); @@ -1258,20 +1258,22 @@ static int stmmac_init_phy(struct net_device *dev) static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; - int mode = priv->plat->phy_interface; + struct phylink_config *config; struct fwnode_handle *fwnode; struct phylink_pcs *pcs; struct phylink *phylink; - priv->phylink_config.dev = &priv->dev->dev; - priv->phylink_config.type = PHYLINK_NETDEV; - priv->phylink_config.mac_managed_pm = true; + config = &priv->phylink_config; + + config->dev = &priv->dev->dev; + config->type = PHYLINK_NETDEV; + config->mac_managed_pm = true; /* Stmmac always requires an RX clock for hardware initialization */ - priv->phylink_config.mac_requires_rxc = true; + config->mac_requires_rxc = true; if (!(priv->plat->flags & STMMAC_FLAG_RX_CLK_RUNS_IN_LPI)) - priv->phylink_config.eee_rx_clk_stop_enable = true; + config->eee_rx_clk_stop_enable = true; /* Set the default transmit clock stop bit based on the platform glue */ priv->tx_lpi_clk_stop = priv->plat->flags & @@ -1279,13 +1281,22 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) - priv->phylink_config.default_an_inband = - mdio_bus_data->default_an_inband; + config->default_an_inband = mdio_bus_data->default_an_inband; + + /* Get the PHY interface modes (at the PHY end of the link) that + * are supported by the platform. + */ + if (priv->plat->get_interfaces) + priv->plat->get_interfaces(priv, priv->plat->bsp_priv, + config->supported_interfaces); - /* Set the platform/firmware specified interface mode. Note, phylink - * deals with the PHY interface mode, not the MAC interface mode. + /* Set the platform/firmware specified interface mode if the + * supported interfaces have not already been provided using + * phy_interface as a last resort. */ - __set_bit(mode, priv->phylink_config.supported_interfaces); + if (phy_interface_empty(config->supported_interfaces)) + __set_bit(priv->plat->phy_interface, + config->supported_interfaces); /* If we have an xpcs, it defines which PHY interfaces are supported. */ if (priv->hw->xpcs) @@ -1294,29 +1305,27 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) pcs = priv->hw->phylink_pcs; if (pcs) - phy_interface_or(priv->phylink_config.supported_interfaces, - priv->phylink_config.supported_interfaces, + phy_interface_or(config->supported_interfaces, + config->supported_interfaces, pcs->supported_interfaces); if (priv->dma_cap.eee) { /* Assume all supported interfaces also support LPI */ - memcpy(priv->phylink_config.lpi_interfaces, - priv->phylink_config.supported_interfaces, - sizeof(priv->phylink_config.lpi_interfaces)); + memcpy(config->lpi_interfaces, config->supported_interfaces, + sizeof(config->lpi_interfaces)); /* All full duplex speeds above 100Mbps are supported */ - priv->phylink_config.lpi_capabilities = ~(MAC_1000FD - 1) | - MAC_100FD; - priv->phylink_config.lpi_timer_default = eee_timer * 1000; - priv->phylink_config.eee_enabled_default = true; + config->lpi_capabilities = ~(MAC_1000FD - 1) | MAC_100FD; + config->lpi_timer_default = eee_timer * 1000; + config->eee_enabled_default = true; } fwnode = priv->plat->port_node; if (!fwnode) fwnode = dev_fwnode(priv->device); - phylink = phylink_create(&priv->phylink_config, fwnode, - mode, &stmmac_phylink_mac_ops); + phylink = phylink_create(config, fwnode, priv->plat->phy_interface, + &stmmac_phylink_mac_ops); if (IS_ERR(phylink)) return PTR_ERR(phylink); @@ -4152,7 +4161,7 @@ static int stmmac_release(struct net_device *dev) stmmac_release_ptp(priv); if (stmmac_fpe_supported(priv)) - timer_shutdown_sync(&priv->fpe_cfg.verify_timer); + ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); pm_runtime_put(priv->device); @@ -4488,8 +4497,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); - skb_tx_timestamp(skb); - if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)) { /* declare that device is doing timestamping */ @@ -4522,6 +4529,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) } netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); + skb_tx_timestamp(skb); stmmac_flush_tx_descriptors(priv, queue); stmmac_tx_timer_arm(priv, queue); @@ -4765,8 +4773,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); - skb_tx_timestamp(skb); - /* Ready to fill the first descriptor and set the OWN bit w/o any * problems because all the descriptors are actually ready to be * passed to the DMA engine. @@ -4813,7 +4819,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); - + skb_tx_timestamp(skb); stmmac_flush_tx_descriptors(priv, queue); stmmac_tx_timer_arm(priv, queue); @@ -7644,7 +7650,7 @@ int stmmac_dvr_probe(struct device *device, #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX; - if (priv->plat->has_gmac4) { + if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; priv->hw->hw_vlan_en = true; } @@ -7727,9 +7733,6 @@ int stmmac_dvr_probe(struct device *device, goto error_mdio_register; } - if (priv->plat->speed_mode_2500) - priv->plat->speed_mode_2500(ndev, priv->plat->bsp_priv); - ret = stmmac_pcs_setup(ndev); if (ret) goto error_pcs_setup; @@ -7871,7 +7874,7 @@ int stmmac_suspend(struct device *dev) rtnl_unlock(); if (stmmac_fpe_supported(priv)) - timer_shutdown_sync(&priv->fpe_cfg.verify_timer); + ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c73eff6a56b8..43c869f64c39 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -709,6 +709,17 @@ devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) #endif /* CONFIG_OF */ EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); +struct clk *stmmac_pltfr_find_clk(struct plat_stmmacenet_data *plat_dat, + const char *name) +{ + for (int i = 0; i < plat_dat->num_clks; i++) + if (strcmp(plat_dat->clks[i].id, name) == 0) + return plat_dat->clks[i].clk; + + return NULL; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_find_clk); + int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 72dc1a32e46d..6e6561e29d6e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -14,6 +14,9 @@ struct plat_stmmacenet_data * devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); +struct clk *stmmac_pltfr_find_clk(struct plat_stmmacenet_data *plat_dat, + const char *name); + int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c new file mode 100644 index 000000000000..0b6f6228ae35 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Altera Corporation + * stmmac VLAN (802.1Q) handling + */ + +#include "stmmac.h" +#include "stmmac_vlan.h" + +static void vlan_write_single(struct net_device *dev, u16 vid) +{ + void __iomem *ioaddr = (void __iomem *)dev->base_addr; + u32 val; + + val = readl(ioaddr + VLAN_TAG); + val &= ~VLAN_TAG_VID; + val |= VLAN_TAG_ETV | vid; + + writel(val, ioaddr + VLAN_TAG); +} + +static int vlan_write_filter(struct net_device *dev, + struct mac_device_info *hw, + u8 index, u32 data) +{ + void __iomem *ioaddr = (void __iomem *)dev->base_addr; + int ret; + u32 val; + + if (index >= hw->num_vlan) + return -EINVAL; + + writel(data, ioaddr + VLAN_TAG_DATA); + + val = readl(ioaddr + VLAN_TAG); + val &= ~(VLAN_TAG_CTRL_OFS_MASK | + VLAN_TAG_CTRL_CT | + VLAN_TAG_CTRL_OB); + val |= (index << VLAN_TAG_CTRL_OFS_SHIFT) | VLAN_TAG_CTRL_OB; + + writel(val, ioaddr + VLAN_TAG); + + ret = readl_poll_timeout(ioaddr + VLAN_TAG, val, + !(val & VLAN_TAG_CTRL_OB), + 1000, 500000); + if (ret) { + netdev_err(dev, "Timeout accessing MAC_VLAN_Tag_Filter\n"); + return -EBUSY; + } + + return 0; +} + +static int vlan_add_hw_rx_fltr(struct net_device *dev, + struct mac_device_info *hw, + __be16 proto, u16 vid) +{ + int index = -1; + u32 val = 0; + int i, ret; + + if (vid > 4095) + return -EINVAL; + + /* Single Rx VLAN Filter */ + if (hw->num_vlan == 1) { + /* For single VLAN filter, VID 0 means VLAN promiscuous */ + if (vid == 0) { + netdev_warn(dev, "Adding VLAN ID 0 is not supported\n"); + return -EPERM; + } + + if (hw->vlan_filter[0] & VLAN_TAG_VID) { + netdev_err(dev, "Only single VLAN ID supported\n"); + return -EPERM; + } + + hw->vlan_filter[0] = vid; + vlan_write_single(dev, vid); + + return 0; + } + + /* Extended Rx VLAN Filter Enable */ + val |= VLAN_TAG_DATA_ETV | VLAN_TAG_DATA_VEN | vid; + + for (i = 0; i < hw->num_vlan; i++) { + if (hw->vlan_filter[i] == val) + return 0; + else if (!(hw->vlan_filter[i] & VLAN_TAG_DATA_VEN)) + index = i; + } + + if (index == -1) { + netdev_err(dev, "MAC_VLAN_Tag_Filter full (size: %0u)\n", + hw->num_vlan); + return -EPERM; + } + + ret = vlan_write_filter(dev, hw, index, val); + + if (!ret) + hw->vlan_filter[index] = val; + + return ret; +} + +static int vlan_del_hw_rx_fltr(struct net_device *dev, + struct mac_device_info *hw, + __be16 proto, u16 vid) +{ + int i, ret = 0; + + /* Single Rx VLAN Filter */ + if (hw->num_vlan == 1) { + if ((hw->vlan_filter[0] & VLAN_TAG_VID) == vid) { + hw->vlan_filter[0] = 0; + vlan_write_single(dev, 0); + } + return 0; + } + + /* Extended Rx VLAN Filter Enable */ + for (i = 0; i < hw->num_vlan; i++) { + if ((hw->vlan_filter[i] & VLAN_TAG_DATA_VID) == vid) { + ret = vlan_write_filter(dev, hw, i, 0); + + if (!ret) + hw->vlan_filter[i] = 0; + else + return ret; + } + } + + return ret; +} + +static void vlan_restore_hw_rx_fltr(struct net_device *dev, + struct mac_device_info *hw) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + u32 hash; + u32 val; + int i; + + /* Single Rx VLAN Filter */ + if (hw->num_vlan == 1) { + vlan_write_single(dev, hw->vlan_filter[0]); + return; + } + + /* Extended Rx VLAN Filter Enable */ + for (i = 0; i < hw->num_vlan; i++) { + if (hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) { + val = hw->vlan_filter[i]; + vlan_write_filter(dev, hw, i, val); + } + } + + hash = readl(ioaddr + VLAN_HASH_TABLE); + if (hash & VLAN_VLHT) { + value = readl(ioaddr + VLAN_TAG); + value |= VLAN_VTHM; + writel(value, ioaddr + VLAN_TAG); + } +} + +static void vlan_update_hash(struct mac_device_info *hw, u32 hash, + u16 perfect_match, bool is_double) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + writel(hash, ioaddr + VLAN_HASH_TABLE); + + value = readl(ioaddr + VLAN_TAG); + + if (hash) { + value |= VLAN_VTHM | VLAN_ETV; + if (is_double) { + value |= VLAN_EDVLP; + value |= VLAN_ESVL; + value |= VLAN_DOVLTC; + } + + writel(value, ioaddr + VLAN_TAG); + } else if (perfect_match) { + u32 value = VLAN_ETV; + + if (is_double) { + value |= VLAN_EDVLP; + value |= VLAN_ESVL; + value |= VLAN_DOVLTC; + } + + writel(value | perfect_match, ioaddr + VLAN_TAG); + } else { + value &= ~(VLAN_VTHM | VLAN_ETV); + value &= ~(VLAN_EDVLP | VLAN_ESVL); + value &= ~VLAN_DOVLTC; + value &= ~VLAN_VID; + + writel(value, ioaddr + VLAN_TAG); + } +} + +static void vlan_enable(struct mac_device_info *hw, u32 type) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + VLAN_INCL); + value |= VLAN_VLTI; + value |= VLAN_CSVL; /* Only use SVLAN */ + value &= ~VLAN_VLC; + value |= (type << VLAN_VLC_SHIFT) & VLAN_VLC; + writel(value, ioaddr + VLAN_INCL); +} + +static void vlan_rx_hw(struct mac_device_info *hw, + struct dma_desc *rx_desc, struct sk_buff *skb) +{ + if (hw->desc->get_rx_vlan_valid(rx_desc)) { + u16 vid = hw->desc->get_rx_vlan_tci(rx_desc); + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } +} + +static void vlan_set_hw_mode(struct mac_device_info *hw) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value = readl(ioaddr + VLAN_TAG); + + value &= ~VLAN_TAG_CTRL_EVLS_MASK; + + if (hw->hw_vlan_en) + /* Always strip VLAN on Receive */ + value |= VLAN_TAG_STRIP_ALL; + else + /* Do not strip VLAN on Receive */ + value |= VLAN_TAG_STRIP_NONE; + + /* Enable outer VLAN Tag in Rx DMA descriptor */ + value |= VLAN_TAG_CTRL_EVLRXS; + writel(value, ioaddr + VLAN_TAG); +} + +static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, + u16 perfect_match, bool is_double) +{ + void __iomem *ioaddr = hw->pcsr; + + writel(hash, ioaddr + VLAN_HASH_TABLE); + + if (hash) { + u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); + + value |= XGMAC_FILTER_VTFE; + + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + value = readl(ioaddr + VLAN_TAG); + + value |= VLAN_VTHM | VLAN_ETV; + if (is_double) { + value |= VLAN_EDVLP; + value |= VLAN_ESVL; + value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; + } + + value &= ~VLAN_VID; + writel(value, ioaddr + VLAN_TAG); + } else if (perfect_match) { + u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); + + value |= XGMAC_FILTER_VTFE; + + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + value = readl(ioaddr + VLAN_TAG); + + value &= ~VLAN_VTHM; + value |= VLAN_ETV; + if (is_double) { + value |= VLAN_EDVLP; + value |= VLAN_ESVL; + value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; + } + + value &= ~VLAN_VID; + writel(value | perfect_match, ioaddr + VLAN_TAG); + } else { + u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); + + value &= ~XGMAC_FILTER_VTFE; + + writel(value, ioaddr + XGMAC_PACKET_FILTER); + + value = readl(ioaddr + VLAN_TAG); + + value &= ~(VLAN_VTHM | VLAN_ETV); + value &= ~(VLAN_EDVLP | VLAN_ESVL); + value &= ~VLAN_DOVLTC; + value &= ~VLAN_VID; + + writel(value, ioaddr + VLAN_TAG); + } +} + +const struct stmmac_vlan_ops dwmac_vlan_ops = { + .update_vlan_hash = vlan_update_hash, + .enable_vlan = vlan_enable, + .add_hw_vlan_rx_fltr = vlan_add_hw_rx_fltr, + .del_hw_vlan_rx_fltr = vlan_del_hw_rx_fltr, + .restore_hw_vlan_rx_fltr = vlan_restore_hw_rx_fltr, + .rx_hw_vlan = vlan_rx_hw, + .set_hw_vlan_mode = vlan_set_hw_mode, +}; + +const struct stmmac_vlan_ops dwxlgmac2_vlan_ops = { + .update_vlan_hash = dwxgmac2_update_vlan_hash, + .enable_vlan = vlan_enable, +}; + +const struct stmmac_vlan_ops dwxgmac210_vlan_ops = { + .update_vlan_hash = dwxgmac2_update_vlan_hash, + .enable_vlan = vlan_enable, + .add_hw_vlan_rx_fltr = vlan_add_hw_rx_fltr, + .del_hw_vlan_rx_fltr = vlan_del_hw_rx_fltr, + .restore_hw_vlan_rx_fltr = vlan_restore_hw_rx_fltr, + .rx_hw_vlan = vlan_rx_hw, + .set_hw_vlan_mode = vlan_set_hw_mode, +}; + +u32 stmmac_get_num_vlan(void __iomem *ioaddr) +{ + u32 val, num_vlan; + + val = readl(ioaddr + HW_FEATURE3); + switch (val & VLAN_HW_FEAT_NRVF) { + case 0: + num_vlan = 1; + break; + case 1: + num_vlan = 4; + break; + case 2: + num_vlan = 8; + break; + case 3: + num_vlan = 16; + break; + case 4: + num_vlan = 24; + break; + case 5: + num_vlan = 32; + break; + default: + num_vlan = 1; + } + + return num_vlan; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.h new file mode 100644 index 000000000000..c24f89a9049b --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025, Altera Corporation + * stmmac VLAN(802.1Q) handling + */ + +#ifndef __STMMAC_VLAN_H__ +#define __STMMAC_VLAN_H__ + +#include <linux/bitfield.h> +#include "dwxgmac2.h" + +#define VLAN_TAG 0x00000050 +#define VLAN_TAG_DATA 0x00000054 +#define VLAN_HASH_TABLE 0x00000058 +#define VLAN_INCL 0x00000060 + +/* MAC VLAN */ +#define VLAN_EDVLP BIT(26) +#define VLAN_VTHM BIT(25) +#define VLAN_DOVLTC BIT(20) +#define VLAN_ESVL BIT(18) +#define VLAN_ETV BIT(16) +#define VLAN_VID GENMASK(15, 0) +#define VLAN_VLTI BIT(20) +#define VLAN_CSVL BIT(19) +#define VLAN_VLC GENMASK(17, 16) +#define VLAN_VLC_SHIFT 16 +#define VLAN_VLHT GENMASK(15, 0) + +/* MAC VLAN Tag */ +#define VLAN_TAG_VID GENMASK(15, 0) +#define VLAN_TAG_ETV BIT(16) + +/* MAC VLAN Tag Control */ +#define VLAN_TAG_CTRL_OB BIT(0) +#define VLAN_TAG_CTRL_CT BIT(1) +#define VLAN_TAG_CTRL_OFS_MASK GENMASK(6, 2) +#define VLAN_TAG_CTRL_OFS_SHIFT 2 +#define VLAN_TAG_CTRL_EVLS_MASK GENMASK(22, 21) +#define VLAN_TAG_CTRL_EVLS_SHIFT 21 +#define VLAN_TAG_CTRL_EVLRXS BIT(24) + +#define VLAN_TAG_STRIP_NONE FIELD_PREP(VLAN_TAG_CTRL_EVLS_MASK, 0x0) +#define VLAN_TAG_STRIP_PASS FIELD_PREP(VLAN_TAG_CTRL_EVLS_MASK, 0x1) +#define VLAN_TAG_STRIP_FAIL FIELD_PREP(VLAN_TAG_CTRL_EVLS_MASK, 0x2) +#define VLAN_TAG_STRIP_ALL FIELD_PREP(VLAN_TAG_CTRL_EVLS_MASK, 0x3) + +/* MAC VLAN Tag Data/Filter */ +#define VLAN_TAG_DATA_VID GENMASK(15, 0) +#define VLAN_TAG_DATA_VEN BIT(16) +#define VLAN_TAG_DATA_ETV BIT(17) + +/* MAC VLAN HW FEAT */ +#define HW_FEATURE3 0x00000128 +#define VLAN_HW_FEAT_NRVF GENMASK(2, 0) + +extern const struct stmmac_vlan_ops dwmac_vlan_ops; +extern const struct stmmac_vlan_ops dwxgmac210_vlan_ops; +extern const struct stmmac_vlan_ops dwxlgmac2_vlan_ops; + +u32 stmmac_get_num_vlan(void __iomem *ioaddr); + +#endif /* __STMMAC_VLAN_H__ */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1e6d2335293d..988ce9119306 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -427,7 +427,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, if (netif_tx_queue_stopped(netif_txq)) { /* try recover if stopped by us */ - txq_trans_update(netif_txq); + txq_trans_update(ndev, netif_txq); netif_tx_wake_queue(netif_txq); } } @@ -2679,7 +2679,9 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) goto of_node_put; ret = of_get_mac_address(port_np, port->slave.mac_addr); - if (ret) { + if (ret == -EPROBE_DEFER) { + goto of_node_put; + } else if (ret) { am65_cpsw_am654_get_efuse_macid(port_np, port->port_id, port->slave.mac_addr); @@ -2760,7 +2762,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) mutex_init(&ndev_priv->mm_lock); port->qos.link_speed = SPEED_UNKNOWN; SET_NETDEV_DEV(port->ndev, dev); - port->ndev->dev.of_node = port->slave.port_np; + device_set_node(&port->ndev->dev, of_fwnode_handle(port->slave.port_np)); eth_hw_addr_set(port->ndev, port->slave.mac_addr); @@ -3561,6 +3563,16 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) return ret; } + am65_cpsw_nuss_get_ver(common); + + ret = am65_cpsw_nuss_init_host_p(common); + if (ret) + goto err_pm_clear; + + ret = am65_cpsw_nuss_init_slave_ports(common); + if (ret) + goto err_pm_clear; + node = of_get_child_by_name(dev->of_node, "mdio"); if (!node) { dev_warn(dev, "MDIO node not found\n"); @@ -3577,16 +3589,6 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) } of_node_put(node); - am65_cpsw_nuss_get_ver(common); - - ret = am65_cpsw_nuss_init_host_p(common); - if (ret) - goto err_of_clear; - - ret = am65_cpsw_nuss_init_slave_ports(common); - if (ret) - goto err_of_clear; - /* init common data */ ale_params.dev = dev; ale_params.ale_ageout = AM65_CPSW_ALE_AGEOUT_DEFAULT; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index a984b7d84e5e..54c24cd3d3be 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1156,6 +1156,27 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) } #endif +/* We need a custom implementation of phy_do_ioctl_running() because in switch + * mode, dev->phydev may be different than the phy of the active_slave. We need + * to operate on the locally saved phy instead. + */ +static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) +{ + struct cpsw_priv *priv = netdev_priv(dev); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); + struct phy_device *phy; + + if (!netif_running(dev)) + return -EINVAL; + + phy = cpsw->slaves[slave_no].phy; + if (phy) + return phy_mii_ioctl(phy, req, cmd); + + return -EOPNOTSUPP; +} + static const struct net_device_ops cpsw_netdev_ops = { .ndo_open = cpsw_ndo_open, .ndo_stop = cpsw_ndo_stop, @@ -1174,6 +1195,8 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_setup_tc = cpsw_ndo_setup_tc, .ndo_bpf = cpsw_ndo_bpf, .ndo_xdp_xmit = cpsw_ndo_xdp_xmit, + .ndo_hwtstamp_get = cpsw_hwtstamp_get, + .ndo_hwtstamp_set = cpsw_hwtstamp_set, }; static void cpsw_get_drvinfo(struct net_device *ndev, @@ -1646,6 +1669,9 @@ static int cpsw_probe(struct platform_device *pdev) ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; + /* Hijack PHY timestamping requests in order to block them */ + if (!cpsw->data.dual_emac) + ndev->see_all_hwtstamp_requests = true; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 5b5b52e4e7a7..8b9e2078c602 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1132,7 +1132,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_eth_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, @@ -1147,6 +1147,8 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_bpf = cpsw_ndo_bpf, .ndo_xdp_xmit = cpsw_ndo_xdp_xmit, .ndo_get_port_parent_id = cpsw_get_port_parent_id, + .ndo_hwtstamp_get = cpsw_hwtstamp_get, + .ndo_hwtstamp_set = cpsw_hwtstamp_set, }; static void cpsw_get_drvinfo(struct net_device *ndev, diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 6fe4edabba44..bc4fdf17a99e 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -614,24 +614,29 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) writel_relaxed(ETH_P_8021Q, &cpsw->regs->vlan_ltype); } -static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +int cpsw_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct cpsw_priv *priv = netdev_priv(dev); struct cpsw_common *cpsw = priv->cpsw; - struct hwtstamp_config cfg; + + /* This will only execute if dev->see_all_hwtstamp_requests is set */ + if (cfg->source != HWTSTAMP_SOURCE_NETDEV) { + NL_SET_ERR_MSG_MOD(extack, + "Switch mode only supports MAC timestamping"); + return -EOPNOTSUPP; + } if (cpsw->version != CPSW_VERSION_1 && cpsw->version != CPSW_VERSION_2 && cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON) + if (cfg->tx_type != HWTSTAMP_TX_OFF && cfg->tx_type != HWTSTAMP_TX_ON) return -ERANGE; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: priv->rx_ts_enabled = 0; break; @@ -651,13 +656,13 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: priv->rx_ts_enabled = HWTSTAMP_FILTER_PTP_V2_EVENT; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; default: return -ERANGE; } - priv->tx_ts_enabled = cfg.tx_type == HWTSTAMP_TX_ON; + priv->tx_ts_enabled = cfg->tx_type == HWTSTAMP_TX_ON; switch (cpsw->version) { case CPSW_VERSION_1: @@ -671,65 +676,40 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) WARN_ON(1); } - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } -static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +int cpsw_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) { struct cpsw_common *cpsw = ndev_to_cpsw(dev); struct cpsw_priv *priv = netdev_priv(dev); - struct hwtstamp_config cfg; if (cpsw->version != CPSW_VERSION_1 && cpsw->version != CPSW_VERSION_2 && cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; - cfg.flags = 0; - cfg.tx_type = priv->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg.rx_filter = priv->rx_ts_enabled; + cfg->tx_type = priv->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + cfg->rx_filter = priv->rx_ts_enabled; - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } #else -static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +int cpsw_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) { return -EOPNOTSUPP; } -static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +int cpsw_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } #endif /*CONFIG_TI_CPTS*/ -int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) -{ - struct cpsw_priv *priv = netdev_priv(dev); - struct cpsw_common *cpsw = priv->cpsw; - int slave_no = cpsw_slave_index(cpsw, priv); - struct phy_device *phy; - - if (!netif_running(dev)) - return -EINVAL; - - phy = cpsw->slaves[slave_no].phy; - - if (!phy_has_hwtstamp(phy)) { - switch (cmd) { - case SIOCSHWTSTAMP: - return cpsw_hwtstamp_set(dev, req); - case SIOCGHWTSTAMP: - return cpsw_hwtstamp_get(dev, req); - } - } - - if (phy) - return phy_mii_ioctl(phy, req, cmd); - - return -EOPNOTSUPP; -} - int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) { struct cpsw_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index f2fc55d9295d..91add8925e23 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -461,7 +461,6 @@ void soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); -int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd); int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate); int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data); @@ -469,6 +468,11 @@ bool cpsw_shp_is_off(struct cpsw_priv *priv); void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_qos_clsflower_resume(struct cpsw_priv *priv); +int cpsw_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg); +int cpsw_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); /* ethtool */ u32 cpsw_get_msglevel(struct net_device *ndev); diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index d88a0180294e..5b8fdb882172 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1329,10 +1329,28 @@ void icssg_ndo_get_stats64(struct net_device *ndev, stats->rx_over_errors = emac_get_stat_by_name(emac, "rx_over_errors"); stats->multicast = emac_get_stat_by_name(emac, "rx_multicast_frames"); - stats->rx_errors = ndev->stats.rx_errors; - stats->rx_dropped = ndev->stats.rx_dropped; + stats->rx_errors = ndev->stats.rx_errors + + emac_get_stat_by_name(emac, "FW_RX_ERROR") + + emac_get_stat_by_name(emac, "FW_RX_EOF_SHORT_FRMERR") + + emac_get_stat_by_name(emac, "FW_RX_B0_DROP_EARLY_EOF") + + emac_get_stat_by_name(emac, "FW_RX_EXP_FRAG_Q_DROP") + + emac_get_stat_by_name(emac, "FW_RX_FIFO_OVERRUN"); + stats->rx_dropped = ndev->stats.rx_dropped + + emac_get_stat_by_name(emac, "FW_DROPPED_PKT") + + emac_get_stat_by_name(emac, "FW_INF_PORT_DISABLED") + + emac_get_stat_by_name(emac, "FW_INF_SAV") + + emac_get_stat_by_name(emac, "FW_INF_SA_DL") + + emac_get_stat_by_name(emac, "FW_INF_PORT_BLOCKED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_TAGGED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_PRIOTAGGED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_NOTAG") + + emac_get_stat_by_name(emac, "FW_INF_DROP_NOTMEMBER"); stats->tx_errors = ndev->stats.tx_errors; - stats->tx_dropped = ndev->stats.tx_dropped; + stats->tx_dropped = ndev->stats.tx_dropped + + emac_get_stat_by_name(emac, "FW_RTU_PKT_DROP") + + emac_get_stat_by_name(emac, "FW_TX_DROPPED_PACKET") + + emac_get_stat_by_name(emac, "FW_TX_TS_DROPPED_PACKET") + + emac_get_stat_by_name(emac, "FW_TX_JUMBO_FRM_CUTOFF"); } EXPORT_SYMBOL_GPL(icssg_ndo_get_stats64); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index b6be4aa57a61..23c465f1ce7f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -54,7 +54,7 @@ #define ICSSG_MAX_RFLOWS 8 /* per slice */ -#define ICSSG_NUM_PA_STATS 4 +#define ICSSG_NUM_PA_STATS 32 #define ICSSG_NUM_MIIG_STATS 60 /* Number of ICSSG related stats */ #define ICSSG_NUM_STATS (ICSSG_NUM_MIIG_STATS + ICSSG_NUM_PA_STATS) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 6f0edae38ea2..e8241e998aa9 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -11,7 +11,6 @@ #define ICSSG_TX_PACKET_OFFSET 0xA0 #define ICSSG_TX_BYTE_OFFSET 0xEC -#define ICSSG_FW_STATS_BASE 0x0248 static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ @@ -46,9 +45,8 @@ void emac_update_hardware_stats(struct prueth_emac *emac) if (prueth->pa_stats) { for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { - reg = ICSSG_FW_STATS_BASE + - icssg_all_pa_stats[i].offset * - PRUETH_NUM_MACS + slice * sizeof(u32); + reg = icssg_all_pa_stats[i].offset + + slice * sizeof(u32); regmap_read(prueth->pa_stats, reg, &val); emac->pa_stats[i] += val; } @@ -80,7 +78,7 @@ int emac_get_stat_by_name(struct prueth_emac *emac, char *stat_name) if (emac->prueth->pa_stats) { for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { if (!strcmp(icssg_all_pa_stats[i].name, stat_name)) - return emac->pa_stats[icssg_all_pa_stats[i].offset / sizeof(u32)]; + return emac->pa_stats[i]; } } diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.h b/drivers/net/ethernet/ti/icssg/icssg_stats.h index e88b919f532c..5ec0b38e0c67 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.h +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.h @@ -155,24 +155,10 @@ static const struct icssg_miig_stats icssg_all_miig_stats[] = { ICSSG_MIIG_STATS(tx_bytes, true), }; -/** - * struct pa_stats_regs - ICSSG Firmware maintained PA Stats register - * @fw_rx_cnt: Number of valid packets sent by Rx PRU to Host on PSI - * @fw_tx_cnt: Number of valid packets copied by RTU0 to Tx queues - * @fw_tx_pre_overflow: Host Egress Q (Pre-emptible) Overflow Counter - * @fw_tx_exp_overflow: Host Egress Q (Express) Overflow Counter - */ -struct pa_stats_regs { - u32 fw_rx_cnt; - u32 fw_tx_cnt; - u32 fw_tx_pre_overflow; - u32 fw_tx_exp_overflow; -}; - -#define ICSSG_PA_STATS(field) \ -{ \ - #field, \ - offsetof(struct pa_stats_regs, field), \ +#define ICSSG_PA_STATS(field) \ +{ \ + #field, \ + field, \ } struct icssg_pa_stats { @@ -181,10 +167,38 @@ struct icssg_pa_stats { }; static const struct icssg_pa_stats icssg_all_pa_stats[] = { - ICSSG_PA_STATS(fw_rx_cnt), - ICSSG_PA_STATS(fw_tx_cnt), - ICSSG_PA_STATS(fw_tx_pre_overflow), - ICSSG_PA_STATS(fw_tx_exp_overflow), + ICSSG_PA_STATS(FW_RTU_PKT_DROP), + ICSSG_PA_STATS(FW_Q0_OVERFLOW), + ICSSG_PA_STATS(FW_Q1_OVERFLOW), + ICSSG_PA_STATS(FW_Q2_OVERFLOW), + ICSSG_PA_STATS(FW_Q3_OVERFLOW), + ICSSG_PA_STATS(FW_Q4_OVERFLOW), + ICSSG_PA_STATS(FW_Q5_OVERFLOW), + ICSSG_PA_STATS(FW_Q6_OVERFLOW), + ICSSG_PA_STATS(FW_Q7_OVERFLOW), + ICSSG_PA_STATS(FW_DROPPED_PKT), + ICSSG_PA_STATS(FW_RX_ERROR), + ICSSG_PA_STATS(FW_RX_DS_INVALID), + ICSSG_PA_STATS(FW_TX_DROPPED_PACKET), + ICSSG_PA_STATS(FW_TX_TS_DROPPED_PACKET), + ICSSG_PA_STATS(FW_INF_PORT_DISABLED), + ICSSG_PA_STATS(FW_INF_SAV), + ICSSG_PA_STATS(FW_INF_SA_DL), + ICSSG_PA_STATS(FW_INF_PORT_BLOCKED), + ICSSG_PA_STATS(FW_INF_DROP_TAGGED), + ICSSG_PA_STATS(FW_INF_DROP_PRIOTAGGED), + ICSSG_PA_STATS(FW_INF_DROP_NOTAG), + ICSSG_PA_STATS(FW_INF_DROP_NOTMEMBER), + ICSSG_PA_STATS(FW_RX_EOF_SHORT_FRMERR), + ICSSG_PA_STATS(FW_RX_B0_DROP_EARLY_EOF), + ICSSG_PA_STATS(FW_TX_JUMBO_FRM_CUTOFF), + ICSSG_PA_STATS(FW_RX_EXP_FRAG_Q_DROP), + ICSSG_PA_STATS(FW_RX_FIFO_OVERRUN), + ICSSG_PA_STATS(FW_CUT_THR_PKT), + ICSSG_PA_STATS(FW_HOST_RX_PKT_CNT), + ICSSG_PA_STATS(FW_HOST_TX_PKT_CNT), + ICSSG_PA_STATS(FW_HOST_EGRESS_Q_PRE_OVERFLOW), + ICSSG_PA_STATS(FW_HOST_EGRESS_Q_EXP_OVERFLOW), }; #endif /* __NET_TI_ICSSG_STATS_H */ diff --git a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h index 424a7e945ea8..490a9cc06fb0 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h +++ b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h @@ -231,4 +231,37 @@ /* Start of 32 bits PA_STAT counters */ #define PA_STAT_32b_START_OFFSET 0x0080 +#define FW_RTU_PKT_DROP 0x0088 +#define FW_Q0_OVERFLOW 0x0090 +#define FW_Q1_OVERFLOW 0x0098 +#define FW_Q2_OVERFLOW 0x00A0 +#define FW_Q3_OVERFLOW 0x00A8 +#define FW_Q4_OVERFLOW 0x00B0 +#define FW_Q5_OVERFLOW 0x00B8 +#define FW_Q6_OVERFLOW 0x00C0 +#define FW_Q7_OVERFLOW 0x00C8 +#define FW_DROPPED_PKT 0x00F8 +#define FW_RX_ERROR 0x0100 +#define FW_RX_DS_INVALID 0x0108 +#define FW_TX_DROPPED_PACKET 0x0110 +#define FW_TX_TS_DROPPED_PACKET 0x0118 +#define FW_INF_PORT_DISABLED 0x0120 +#define FW_INF_SAV 0x0128 +#define FW_INF_SA_DL 0x0130 +#define FW_INF_PORT_BLOCKED 0x0138 +#define FW_INF_DROP_TAGGED 0x0140 +#define FW_INF_DROP_PRIOTAGGED 0x0148 +#define FW_INF_DROP_NOTAG 0x0150 +#define FW_INF_DROP_NOTMEMBER 0x0158 +#define FW_RX_EOF_SHORT_FRMERR 0x0188 +#define FW_RX_B0_DROP_EARLY_EOF 0x0190 +#define FW_TX_JUMBO_FRM_CUTOFF 0x0198 +#define FW_RX_EXP_FRAG_Q_DROP 0x01A0 +#define FW_RX_FIFO_OVERRUN 0x01A8 +#define FW_CUT_THR_PKT 0x01B0 +#define FW_HOST_RX_PKT_CNT 0x0248 +#define FW_HOST_TX_PKT_CNT 0x0250 +#define FW_HOST_EGRESS_Q_PRE_OVERFLOW 0x0258 +#define FW_HOST_EGRESS_Q_EXP_OVERFLOW 0x0260 + #endif /* __NET_TI_ICSSG_SWITCH_MAP_H */ diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index e4d993f31374..a75bca1243e3 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -8,6 +8,7 @@ #include <linux/if_vlan.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -16,6 +17,7 @@ #include <linux/cache.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/string_choices.h> #include <linux/spi/spi.h> #include <linux/of_net.h> @@ -45,7 +47,6 @@ struct mse102x_stats { u64 xfer_err; - u64 invalid_cmd; u64 invalid_ctr; u64 invalid_dft; u64 invalid_len; @@ -56,7 +57,6 @@ struct mse102x_stats { static const char mse102x_gstrings_stats[][ETH_GSTRING_LEN] = { "SPI transfer errors", - "Invalid command", "Invalid CTR", "Invalid DFT", "Invalid frame length", @@ -85,6 +85,8 @@ struct mse102x_net_spi { struct spi_message spi_msg; struct spi_transfer spi_xfer; + bool valid_cmd_received; + #ifdef CONFIG_DEBUG_FS struct dentry *device_root; #endif @@ -98,16 +100,18 @@ static int mse102x_info_show(struct seq_file *s, void *what) { struct mse102x_net_spi *mses = s->private; - seq_printf(s, "TX ring size : %u\n", + seq_printf(s, "TX ring size : %u\n", skb_queue_len(&mses->mse102x.txq)); - seq_printf(s, "IRQ : %d\n", + seq_printf(s, "IRQ : %d\n", mses->spidev->irq); - seq_printf(s, "SPI effective speed : %lu\n", + seq_printf(s, "SPI effective speed : %lu\n", (unsigned long)mses->spi_xfer.effective_speed_hz); - seq_printf(s, "SPI mode : %x\n", + seq_printf(s, "SPI mode : %x\n", mses->spidev->mode); + seq_printf(s, "Received valid CMD once : %s\n", + str_yes_no(mses->valid_cmd_received)); return 0; } @@ -194,10 +198,10 @@ static int mse102x_rx_cmd_spi(struct mse102x_net *mse, u8 *rxb) } else if (*cmd != cpu_to_be16(DET_CMD)) { net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, *cmd); - mse->stats.invalid_cmd++; ret = -EIO; } else { memcpy(rxb, trx + 2, 2); + mses->valid_cmd_received = true; } return ret; @@ -306,7 +310,7 @@ static void mse102x_dump_packet(const char *msg, int len, const char *data) data, len, true); } -static void mse102x_rx_pkt_spi(struct mse102x_net *mse) +static irqreturn_t mse102x_rx_pkt_spi(struct mse102x_net *mse) { struct sk_buff *skb; unsigned int rxalign; @@ -315,31 +319,20 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; - int ret; mse102x_tx_cmd_spi(mse, CMD_CTR); - ret = mse102x_rx_cmd_spi(mse, (u8 *)&rx); - cmd_resp = be16_to_cpu(rx); - - if (ret || ((cmd_resp & CMD_MASK) != CMD_RTS)) { + if (mse102x_rx_cmd_spi(mse, (u8 *)&rx)) { usleep_range(50, 100); + return IRQ_NONE; + } - mse102x_tx_cmd_spi(mse, CMD_CTR); - ret = mse102x_rx_cmd_spi(mse, (u8 *)&rx); - if (ret) - return; - - cmd_resp = be16_to_cpu(rx); - if ((cmd_resp & CMD_MASK) != CMD_RTS) { - net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", - __func__, cmd_resp); - mse->stats.invalid_rts++; - drop = true; - goto drop; - } - - net_dbg_ratelimited("%s: Unexpected response to first CMD\n", - __func__); + cmd_resp = be16_to_cpu(rx); + if ((cmd_resp & CMD_MASK) != CMD_RTS) { + net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", + __func__, cmd_resp); + mse->stats.invalid_rts++; + drop = true; + goto drop; } rxlen = cmd_resp & LEN_MASK; @@ -360,7 +353,7 @@ drop: rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) - return; + return IRQ_NONE; /* 2 bytes Start of frame (before ethernet header) * 2 bytes Data frame tail (after ethernet frame) @@ -370,7 +363,7 @@ drop: if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); - return; + return IRQ_HANDLED; } if (netif_msg_pktdata(mse)) @@ -381,6 +374,8 @@ drop: mse->ndev->stats.rx_packets++; mse->ndev->stats.rx_bytes += rxlen; + + return IRQ_HANDLED; } static int mse102x_tx_pkt_spi(struct mse102x_net *mse, struct sk_buff *txb, @@ -512,20 +507,36 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) { struct mse102x_net *mse = _mse; struct mse102x_net_spi *mses = to_mse102x_spi(mse); + irqreturn_t ret; mutex_lock(&mses->lock); - mse102x_rx_pkt_spi(mse); + ret = mse102x_rx_pkt_spi(mse); mutex_unlock(&mses->lock); - return IRQ_HANDLED; + return ret; } static int mse102x_net_open(struct net_device *ndev) { + struct irq_data *irq_data = irq_get_irq_data(ndev->irq); struct mse102x_net *mse = netdev_priv(ndev); struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; + if (!irq_data) { + netdev_err(ndev, "Invalid IRQ: %d\n", ndev->irq); + return -EINVAL; + } + + switch (irqd_get_trigger_type(irq_data)) { + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_LEVEL_LOW: + break; + default: + netdev_warn_once(ndev, "Only IRQ type level recommended, please update your device tree firmware.\n"); + break; + } + ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, ndev->name, mse); if (ret < 0) { @@ -543,7 +554,8 @@ static int mse102x_net_open(struct net_device *ndev) * So poll for possible packet(s) to re-arm the interrupt. */ mutex_lock(&mses->lock); - mse102x_rx_pkt_spi(mse); + if (mse102x_rx_pkt_spi(mse) == IRQ_NONE) + mse102x_rx_pkt_spi(mse); mutex_unlock(&mses->lock); netif_dbg(mse, ifup, ndev, "network device up\n"); diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 47e3e8434b9e..e5fc942c28cc 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -40,7 +40,7 @@ config NGBE will be called ngbe. config TXGBE - tristate "Wangxun(R) 10GbE PCI Express adapters support" + tristate "Wangxun(R) 10/25/40GbE PCI Express adapters support" depends on PCI depends on COMMON_CLK depends on I2C_DESIGNWARE_PLATFORM @@ -55,7 +55,7 @@ config TXGBE select PCS_XPCS select LIBWX help - This driver supports Wangxun(R) 10GbE PCI Express family of + This driver supports Wangxun(R) 10/25/40GbE PCI Express family of adapters. More specific information on configuring the driver is in diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile index e9f0f1f2309b..9b78b604a94e 100644 --- a/drivers/net/ethernet/wangxun/libwx/Makefile +++ b/drivers/net/ethernet/wangxun/libwx/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_LIBWX) += libwx.o -libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o +libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 490d34233d38..143cc1088eea 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -10,6 +10,7 @@ #include "wx_type.h" #include "wx_lib.h" +#include "wx_sriov.h" #include "wx_hw.h" static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum) @@ -970,11 +971,28 @@ static void wx_sync_mac_table(struct wx *wx) } } +static void wx_full_sync_mac_table(struct wx *wx) +{ + int i; + + for (i = 0; i < wx->mac.num_rar_entries; i++) { + if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE) { + wx_set_rar(wx, i, + wx->mac_table[i].addr, + wx->mac_table[i].pools, + WX_PSR_MAC_SWC_AD_H_AV); + } else { + wx_clear_rar(wx, i); + } + wx->mac_table[i].state &= ~(WX_MAC_STATE_MODIFIED); + } +} + /* this function destroys the first RAR entry */ void wx_mac_set_default_filter(struct wx *wx, u8 *addr) { memcpy(&wx->mac_table[0].addr, addr, ETH_ALEN); - wx->mac_table[0].pools = 1ULL; + wx->mac_table[0].pools = BIT(VMDQ_P(0)); wx->mac_table[0].state = (WX_MAC_STATE_DEFAULT | WX_MAC_STATE_IN_USE); wx_set_rar(wx, 0, wx->mac_table[0].addr, wx->mac_table[0].pools, @@ -999,7 +1017,7 @@ void wx_flush_sw_mac_table(struct wx *wx) } EXPORT_SYMBOL(wx_flush_sw_mac_table); -static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool) +int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool) { u32 i; @@ -1030,7 +1048,7 @@ static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool) return -ENOMEM; } -static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool) +int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool) { u32 i; @@ -1212,6 +1230,35 @@ static void wx_update_mc_addr_list(struct wx *wx, struct net_device *netdev) wx_dbg(wx, "Update mc addr list Complete\n"); } +static void wx_restore_vf_multicasts(struct wx *wx) +{ + u32 i, j, vector_bit, vector_reg; + struct vf_data_storage *vfinfo; + + for (i = 0; i < wx->num_vfs; i++) { + u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(i)); + + vfinfo = &wx->vfinfo[i]; + for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) { + wx->addr_ctrl.mta_in_use++; + vector_reg = WX_PSR_MC_TBL_REG(vfinfo->vf_mc_hashes[j]); + vector_bit = WX_PSR_MC_TBL_BIT(vfinfo->vf_mc_hashes[j]); + wr32m(wx, WX_PSR_MC_TBL(vector_reg), + BIT(vector_bit), BIT(vector_bit)); + /* errata 5: maintain a copy of the reg table conf */ + wx->mac.mta_shadow[vector_reg] |= BIT(vector_bit); + } + if (vfinfo->num_vf_mc_hashes) + vmolr |= WX_PSR_VM_L2CTL_ROMPE; + else + vmolr &= ~WX_PSR_VM_L2CTL_ROMPE; + wr32(wx, WX_PSR_VM_L2CTL(i), vmolr); + } + + /* Restore any VF macvlans */ + wx_full_sync_mac_table(wx); +} + /** * wx_write_mc_addr_list - write multicast addresses to MTA * @netdev: network interface device structure @@ -1229,6 +1276,9 @@ static int wx_write_mc_addr_list(struct net_device *netdev) wx_update_mc_addr_list(wx, netdev); + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + wx_restore_vf_multicasts(wx); + return netdev_mc_count(netdev); } @@ -1249,7 +1299,7 @@ int wx_set_mac(struct net_device *netdev, void *p) if (retval) return retval; - wx_del_mac_filter(wx, wx->mac.addr, 0); + wx_del_mac_filter(wx, wx->mac.addr, VMDQ_P(0)); eth_hw_addr_set(netdev, addr->sa_data); memcpy(wx->mac.addr, addr->sa_data, netdev->addr_len); @@ -1351,6 +1401,10 @@ static int wx_hpbthresh(struct wx *wx) /* Calculate delay value for device */ dv_id = WX_DV(link, tc); + /* Loopback switch introduces additional latency */ + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + dv_id += WX_B2BT(tc); + /* Delay value is calculated in bit times convert to KB */ kb = WX_BT2KB(dv_id); rx_pba = rd32(wx, WX_RDB_PB_SZ(0)) >> WX_RDB_PB_SZ_SHIFT; @@ -1406,12 +1460,107 @@ static void wx_pbthresh_setup(struct wx *wx) wx->fc.low_water = 0; } +static void wx_set_ethertype_anti_spoofing(struct wx *wx, bool enable, int vf) +{ + u32 pfvfspoof, reg_offset, vf_shift; + + vf_shift = WX_VF_IND_SHIFT(vf); + reg_offset = WX_VF_REG_OFFSET(vf); + + pfvfspoof = rd32(wx, WX_TDM_ETYPE_AS(reg_offset)); + if (enable) + pfvfspoof |= BIT(vf_shift); + else + pfvfspoof &= ~BIT(vf_shift); + wr32(wx, WX_TDM_ETYPE_AS(reg_offset), pfvfspoof); +} + +int wx_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) +{ + u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf); + struct wx *wx = netdev_priv(netdev); + u32 regval; + + if (vf >= wx->num_vfs) + return -EINVAL; + + wx->vfinfo[vf].spoofchk_enabled = setting; + + regval = (setting << vf_bit); + wr32m(wx, WX_TDM_MAC_AS(index), regval | BIT(vf_bit), regval); + + if (wx->vfinfo[vf].vlan_count) + wr32m(wx, WX_TDM_VLAN_AS(index), regval | BIT(vf_bit), regval); + + return 0; +} + +static void wx_configure_virtualization(struct wx *wx) +{ + u16 pool = wx->num_rx_pools; + u32 reg_offset, vf_shift; + u32 i; + + if (!test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + return; + + wr32m(wx, WX_PSR_VM_CTL, + WX_PSR_VM_CTL_POOL_MASK | WX_PSR_VM_CTL_REPLEN, + FIELD_PREP(WX_PSR_VM_CTL_POOL_MASK, VMDQ_P(0)) | + WX_PSR_VM_CTL_REPLEN); + while (pool--) + wr32m(wx, WX_PSR_VM_L2CTL(pool), + WX_PSR_VM_L2CTL_AUPE, WX_PSR_VM_L2CTL_AUPE); + + if (wx->mac.type == wx_mac_em) { + vf_shift = BIT(VMDQ_P(0)); + /* Enable only the PF pools for Tx/Rx */ + wr32(wx, WX_RDM_VF_RE(0), vf_shift); + wr32(wx, WX_TDM_VF_TE(0), vf_shift); + } else { + vf_shift = WX_VF_IND_SHIFT(VMDQ_P(0)); + reg_offset = WX_VF_REG_OFFSET(VMDQ_P(0)); + + /* Enable only the PF pools for Tx/Rx */ + wr32(wx, WX_RDM_VF_RE(reg_offset), GENMASK(31, vf_shift)); + wr32(wx, WX_RDM_VF_RE(reg_offset ^ 1), reg_offset - 1); + wr32(wx, WX_TDM_VF_TE(reg_offset), GENMASK(31, vf_shift)); + wr32(wx, WX_TDM_VF_TE(reg_offset ^ 1), reg_offset - 1); + } + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + clear_bit(WX_FLAG_VLAN_PROMISC, wx->flags); + + for (i = 0; i < wx->num_vfs; i++) { + if (!wx->vfinfo[i].spoofchk_enabled) + wx_set_vf_spoofchk(wx->netdev, i, false); + /* enable ethertype anti spoofing if hw supports it */ + wx_set_ethertype_anti_spoofing(wx, true, i); + } +} + static void wx_configure_port(struct wx *wx) { u32 value, i; - value = WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ; + if (wx->mac.type == wx_mac_em) { + value = (wx->num_vfs == 0) ? + WX_CFG_PORT_CTL_NUM_VT_NONE : + WX_CFG_PORT_CTL_NUM_VT_8; + } else { + if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) { + if (wx->ring_feature[RING_F_RSS].indices == 4) + value = WX_CFG_PORT_CTL_NUM_VT_32; + else + value = WX_CFG_PORT_CTL_NUM_VT_64; + } else { + value = 0; + } + } + + value |= WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ; wr32m(wx, WX_CFG_PORT_CTL, + WX_CFG_PORT_CTL_NUM_VT_MASK | WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ, value); @@ -1472,6 +1621,83 @@ static void wx_vlan_strip_control(struct wx *wx, bool enable) } } +static void wx_vlan_promisc_enable(struct wx *wx) +{ + u32 vlnctrl, i, vind, bits, reg_idx; + + vlnctrl = rd32(wx, WX_PSR_VLAN_CTL); + if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) { + /* we need to keep the VLAN filter on in SRIOV */ + vlnctrl |= WX_PSR_VLAN_CTL_VFE; + wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); + } else { + vlnctrl &= ~WX_PSR_VLAN_CTL_VFE; + wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); + return; + } + /* We are already in VLAN promisc, nothing to do */ + if (test_bit(WX_FLAG_VLAN_PROMISC, wx->flags)) + return; + /* Set flag so we don't redo unnecessary work */ + set_bit(WX_FLAG_VLAN_PROMISC, wx->flags); + /* Add PF to all active pools */ + for (i = WX_PSR_VLAN_SWC_ENTRIES; --i;) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, i); + vind = WX_VF_IND_SHIFT(VMDQ_P(0)); + reg_idx = WX_VF_REG_OFFSET(VMDQ_P(0)); + bits = rd32(wx, WX_PSR_VLAN_SWC_VM(reg_idx)); + bits |= BIT(vind); + wr32(wx, WX_PSR_VLAN_SWC_VM(reg_idx), bits); + } + /* Set all bits in the VLAN filter table array */ + for (i = 0; i < wx->mac.vft_size; i++) + wr32(wx, WX_PSR_VLAN_TBL(i), U32_MAX); +} + +static void wx_scrub_vfta(struct wx *wx) +{ + u32 i, vid, bits, vfta, vind, vlvf, reg_idx; + + for (i = WX_PSR_VLAN_SWC_ENTRIES; --i;) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, i); + vlvf = rd32(wx, WX_PSR_VLAN_SWC_IDX); + /* pull VLAN ID from VLVF */ + vid = vlvf & ~WX_PSR_VLAN_SWC_VIEN; + if (vlvf & WX_PSR_VLAN_SWC_VIEN) { + /* if PF is part of this then continue */ + if (test_bit(vid, wx->active_vlans)) + continue; + } + /* remove PF from the pool */ + vind = WX_VF_IND_SHIFT(VMDQ_P(0)); + reg_idx = WX_VF_REG_OFFSET(VMDQ_P(0)); + bits = rd32(wx, WX_PSR_VLAN_SWC_VM(reg_idx)); + bits &= ~BIT(vind); + wr32(wx, WX_PSR_VLAN_SWC_VM(reg_idx), bits); + } + /* extract values from vft_shadow and write back to VFTA */ + for (i = 0; i < wx->mac.vft_size; i++) { + vfta = wx->mac.vft_shadow[i]; + wr32(wx, WX_PSR_VLAN_TBL(i), vfta); + } +} + +static void wx_vlan_promisc_disable(struct wx *wx) +{ + u32 vlnctrl; + + /* configure vlan filtering */ + vlnctrl = rd32(wx, WX_PSR_VLAN_CTL); + vlnctrl |= WX_PSR_VLAN_CTL_VFE; + wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); + /* We are not in VLAN promisc, nothing to do */ + if (!test_bit(WX_FLAG_VLAN_PROMISC, wx->flags)) + return; + /* Set flag so we don't redo unnecessary work */ + clear_bit(WX_FLAG_VLAN_PROMISC, wx->flags); + wx_scrub_vfta(wx); +} + void wx_set_rx_mode(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); @@ -1484,7 +1710,7 @@ void wx_set_rx_mode(struct net_device *netdev) /* Check for Promiscuous and All Multicast modes */ fctrl = rd32(wx, WX_PSR_CTL); fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE); - vmolr = rd32(wx, WX_PSR_VM_L2CTL(0)); + vmolr = rd32(wx, WX_PSR_VM_L2CTL(VMDQ_P(0))); vmolr &= ~(WX_PSR_VM_L2CTL_UPE | WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_ROPE | @@ -1505,7 +1731,10 @@ void wx_set_rx_mode(struct net_device *netdev) fctrl |= WX_PSR_CTL_UPE | WX_PSR_CTL_MPE; /* pf don't want packets routing to vf, so clear UPE */ vmolr |= WX_PSR_VM_L2CTL_MPE; - vlnctrl &= ~WX_PSR_VLAN_CTL_VFE; + if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags) && + test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + vlnctrl |= WX_PSR_VLAN_CTL_VFE; + features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; } if (netdev->flags & IFF_ALLMULTI) { @@ -1528,7 +1757,7 @@ void wx_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = wx_write_uc_addr_list(netdev, 0); + count = wx_write_uc_addr_list(netdev, VMDQ_P(0)); if (count < 0) { vmolr &= ~WX_PSR_VM_L2CTL_ROPE; vmolr |= WX_PSR_VM_L2CTL_UPE; @@ -1546,7 +1775,7 @@ void wx_set_rx_mode(struct net_device *netdev) wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); wr32(wx, WX_PSR_CTL, fctrl); - wr32(wx, WX_PSR_VM_L2CTL(0), vmolr); + wr32(wx, WX_PSR_VM_L2CTL(VMDQ_P(0)), vmolr); if ((features & NETIF_F_HW_VLAN_CTAG_RX) && (features & NETIF_F_HW_VLAN_STAG_RX)) @@ -1554,6 +1783,10 @@ void wx_set_rx_mode(struct net_device *netdev) else wx_vlan_strip_control(wx, false); + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + wx_vlan_promisc_disable(wx); + else + wx_vlan_promisc_enable(wx); } EXPORT_SYMBOL(wx_set_rx_mode); @@ -1803,6 +2036,13 @@ static void wx_setup_reta(struct wx *wx) u32 random_key_size = WX_RSS_KEY_SIZE / 4; u32 i, j; + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) { + if (wx->mac.type == wx_mac_em) + rss_i = 1; + else + rss_i = rss_i < 4 ? 4 : rss_i; + } + /* Fill out hash function seeds */ for (i = 0; i < random_key_size; i++) wr32(wx, WX_RDB_RSSRK(i), wx->rss_key[i]); @@ -1820,10 +2060,42 @@ static void wx_setup_reta(struct wx *wx) wx_store_reta(wx); } +#define WX_RDB_RSS_PL_2 FIELD_PREP(GENMASK(31, 29), 1) +#define WX_RDB_RSS_PL_4 FIELD_PREP(GENMASK(31, 29), 2) +static void wx_setup_psrtype(struct wx *wx) +{ + int rss_i = wx->ring_feature[RING_F_RSS].indices; + u32 psrtype; + int pool; + + psrtype = WX_RDB_PL_CFG_L4HDR | + WX_RDB_PL_CFG_L3HDR | + WX_RDB_PL_CFG_L2HDR | + WX_RDB_PL_CFG_TUN_OUTL2HDR | + WX_RDB_PL_CFG_TUN_TUNHDR; + + if (wx->mac.type == wx_mac_em) { + for_each_set_bit(pool, &wx->fwd_bitmask, 8) + wr32(wx, WX_RDB_PL_CFG(VMDQ_P(pool)), psrtype); + } else { + if (rss_i > 3) + psrtype |= WX_RDB_RSS_PL_4; + else if (rss_i > 1) + psrtype |= WX_RDB_RSS_PL_2; + + for_each_set_bit(pool, &wx->fwd_bitmask, 32) + wr32(wx, WX_RDB_PL_CFG(VMDQ_P(pool)), psrtype); + } +} + static void wx_setup_mrqc(struct wx *wx) { u32 rss_field = 0; + /* VT, and RSS do not coexist at the same time */ + if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) + return; + /* Disable indicating checksum in descriptor, enables RSS hash */ wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD); @@ -1853,16 +2125,11 @@ static void wx_setup_mrqc(struct wx *wx) **/ void wx_configure_rx(struct wx *wx) { - u32 psrtype, i; int ret; + u32 i; wx_disable_rx(wx); - - psrtype = WX_RDB_PL_CFG_L4HDR | - WX_RDB_PL_CFG_L3HDR | - WX_RDB_PL_CFG_L2HDR | - WX_RDB_PL_CFG_TUN_TUNHDR; - wr32(wx, WX_RDB_PL_CFG(0), psrtype); + wx_setup_psrtype(wx); /* enable hw crc stripping */ wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_CRC_STRIP, WX_RSC_CTL_CRC_STRIP); @@ -1910,6 +2177,7 @@ void wx_configure(struct wx *wx) { wx_set_rxpba(wx); wx_pbthresh_setup(wx); + wx_configure_virtualization(wx); wx_configure_port(wx); wx_set_rx_mode(wx->netdev); @@ -2268,7 +2536,7 @@ static int wx_set_vlvf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on, * * Turn on/off specified VLAN in the VLAN filter table. **/ -static int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on) +int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on) { u32 bitindex, vfta, targetbit; bool vfta_changed = false; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index b883342bb576..91c1d6135045 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -26,9 +26,12 @@ void wx_init_eeprom_params(struct wx *wx); void wx_get_mac_addr(struct wx *wx, u8 *mac_addr); void wx_init_rx_addrs(struct wx *wx); void wx_mac_set_default_filter(struct wx *wx, u8 *addr); +int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool); +int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool); void wx_flush_sw_mac_table(struct wx *wx); int wx_set_mac(struct net_device *netdev, void *p); void wx_disable_rx(struct wx *wx); +int wx_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); int wx_disable_sec_rx_path(struct wx *wx); void wx_enable_sec_rx_path(struct wx *wx); void wx_set_rx_mode(struct net_device *netdev); @@ -42,6 +45,7 @@ int wx_stop_adapter(struct wx *wx); void wx_reset_misc(struct wx *wx); int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); int wx_sw_init(struct wx *wx); +int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on); int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index e69eaa65e0de..2a808afeb414 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1620,6 +1620,65 @@ void wx_napi_disable_all(struct wx *wx) } EXPORT_SYMBOL(wx_napi_disable_all); +static bool wx_set_vmdq_queues(struct wx *wx) +{ + u16 vmdq_i = wx->ring_feature[RING_F_VMDQ].limit; + u16 rss_i = wx->ring_feature[RING_F_RSS].limit; + u16 rss_m = WX_RSS_DISABLED_MASK; + u16 vmdq_m = 0; + + /* only proceed if VMDq is enabled */ + if (!test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) + return false; + /* Add starting offset to total pool count */ + vmdq_i += wx->ring_feature[RING_F_VMDQ].offset; + + if (wx->mac.type == wx_mac_sp) { + /* double check we are limited to maximum pools */ + vmdq_i = min_t(u16, 64, vmdq_i); + + /* 64 pool mode with 2 queues per pool, or + * 16/32/64 pool mode with 1 queue per pool + */ + if (vmdq_i > 32 || rss_i < 4) { + vmdq_m = WX_VMDQ_2Q_MASK; + rss_m = WX_RSS_2Q_MASK; + rss_i = min_t(u16, rss_i, 2); + /* 32 pool mode with 4 queues per pool */ + } else { + vmdq_m = WX_VMDQ_4Q_MASK; + rss_m = WX_RSS_4Q_MASK; + rss_i = 4; + } + } else { + /* double check we are limited to maximum pools */ + vmdq_i = min_t(u16, 8, vmdq_i); + + /* when VMDQ on, disable RSS */ + rss_i = 1; + } + + /* remove the starting offset from the pool count */ + vmdq_i -= wx->ring_feature[RING_F_VMDQ].offset; + + /* save features for later use */ + wx->ring_feature[RING_F_VMDQ].indices = vmdq_i; + wx->ring_feature[RING_F_VMDQ].mask = vmdq_m; + + /* limit RSS based on user input and save for later use */ + wx->ring_feature[RING_F_RSS].indices = rss_i; + wx->ring_feature[RING_F_RSS].mask = rss_m; + + wx->queues_per_pool = rss_i;/*maybe same to num_rx_queues_per_pool*/ + wx->num_rx_pools = vmdq_i; + wx->num_rx_queues_per_pool = rss_i; + + wx->num_rx_queues = vmdq_i * rss_i; + wx->num_tx_queues = vmdq_i * rss_i; + + return true; +} + /** * wx_set_rss_queues: Allocate queues for RSS * @wx: board private structure to initialize @@ -1634,6 +1693,10 @@ static void wx_set_rss_queues(struct wx *wx) /* set mask for 16 queue limit of RSS */ f = &wx->ring_feature[RING_F_RSS]; + if (wx->mac.type == wx_mac_sp) + f->mask = WX_RSS_64Q_MASK; + else + f->mask = WX_RSS_8Q_MASK; f->indices = f->limit; if (!(test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags))) @@ -1666,6 +1729,9 @@ static void wx_set_num_queues(struct wx *wx) wx->num_tx_queues = 1; wx->queues_per_pool = 1; + if (wx_set_vmdq_queues(wx)) + return; + wx_set_rss_queues(wx); } @@ -1746,6 +1812,10 @@ static int wx_set_interrupt_capability(struct wx *wx) if (ret == 0 || (ret == -ENOMEM)) return ret; + /* Disable VMDq support */ + dev_warn(&wx->pdev->dev, "Disabling VMQQ support\n"); + clear_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); + /* Disable RSS */ dev_warn(&wx->pdev->dev, "Disabling RSS support\n"); wx->ring_feature[RING_F_RSS].limit = 1; @@ -1772,6 +1842,49 @@ static int wx_set_interrupt_capability(struct wx *wx) return 0; } +static bool wx_cache_ring_vmdq(struct wx *wx) +{ + struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ]; + struct wx_ring_feature *rss = &wx->ring_feature[RING_F_RSS]; + u16 reg_idx; + int i; + + /* only proceed if VMDq is enabled */ + if (!test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) + return false; + + if (wx->mac.type == wx_mac_sp) { + /* start at VMDq register offset for SR-IOV enabled setups */ + reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); + for (i = 0; i < wx->num_rx_queues; i++, reg_idx++) { + /* If we are greater than indices move to next pool */ + if ((reg_idx & ~vmdq->mask) >= rss->indices) + reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); + wx->rx_ring[i]->reg_idx = reg_idx; + } + reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); + for (i = 0; i < wx->num_tx_queues; i++, reg_idx++) { + /* If we are greater than indices move to next pool */ + if ((reg_idx & rss->mask) >= rss->indices) + reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask); + wx->tx_ring[i]->reg_idx = reg_idx; + } + } else { + /* start at VMDq register offset for SR-IOV enabled setups */ + reg_idx = vmdq->offset; + for (i = 0; i < wx->num_rx_queues; i++) + /* If we are greater than indices move to next pool */ + wx->rx_ring[i]->reg_idx = reg_idx + i; + + reg_idx = vmdq->offset; + for (i = 0; i < wx->num_tx_queues; i++) + /* If we are greater than indices move to next pool */ + wx->tx_ring[i]->reg_idx = reg_idx + i; + } + + return true; +} + /** * wx_cache_ring_rss - Descriptor ring to register mapping for RSS * @wx: board private structure to initialize @@ -1783,6 +1896,9 @@ static void wx_cache_ring_rss(struct wx *wx) { u16 i; + if (wx_cache_ring_vmdq(wx)) + return; + for (i = 0; i < wx->num_rx_queues; i++) wx->rx_ring[i]->reg_idx = i; @@ -2181,7 +2297,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ - msix_vector += 1; /* offset for queue vectors */ + if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7)) + msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2233,10 +2350,17 @@ void wx_configure_vectors(struct wx *wx) { struct pci_dev *pdev = wx->pdev; u32 eitrsel = 0; - u16 v_idx; + u16 v_idx, i; if (pdev->msix_enabled) { /* Populate MSIX to EITR Select */ + if (wx->mac.type == wx_mac_sp) { + if (wx->num_vfs >= 32) + eitrsel = BIT(wx->num_vfs % 32) - 1; + } else if (wx->mac.type == wx_mac_em) { + for (i = 0; i < wx->num_vfs; i++) + eitrsel |= BIT(i); + } wr32(wx, WX_PX_ITRSEL, eitrsel); /* use EIAM to auto-mask when MSI-X interrupt is asserted * this saves a register write for every interrupt @@ -2876,6 +3000,33 @@ netdev_features_t wx_fix_features(struct net_device *netdev, } EXPORT_SYMBOL(wx_fix_features); +#define WX_MAX_TUNNEL_HDR_LEN 80 +netdev_features_t wx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct wx *wx = netdev_priv(netdev); + + if (!skb->encapsulation) + return features; + + if (wx->mac.type == wx_mac_em) + return features & ~NETIF_F_CSUM_MASK; + + if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > + WX_MAX_TUNNEL_HDR_LEN)) + return features & ~NETIF_F_CSUM_MASK; + + if (skb->inner_protocol_type == ENCAP_TYPE_ETHER && + skb->inner_protocol != htons(ETH_P_IP) && + skb->inner_protocol != htons(ETH_P_IPV6) && + skb->inner_protocol != htons(ETH_P_TEB)) + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} +EXPORT_SYMBOL(wx_features_check); + void wx_set_ring(struct wx *wx, u32 new_tx_count, u32 new_rx_count, struct wx_ring *temp_ring) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h index fdeb0c315b75..49f8bde36ddb 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h @@ -33,7 +33,10 @@ void wx_get_stats64(struct net_device *netdev, int wx_set_features(struct net_device *netdev, netdev_features_t features); netdev_features_t wx_fix_features(struct net_device *netdev, netdev_features_t features); +netdev_features_t wx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); void wx_set_ring(struct wx *wx, u32 new_tx_count, u32 new_rx_count, struct wx_ring *temp_ring); -#endif /* _NGBE_LIB_H_ */ +#endif /* _WX_LIB_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.c b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c new file mode 100644 index 000000000000..73af5f11c3bd --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/pci.h> +#include "wx_type.h" +#include "wx_mbx.h" + +/** + * wx_obtain_mbx_lock_pf - obtain mailbox lock + * @wx: pointer to the HW structure + * @vf: the VF index + * + * Return: return 0 on success and -EBUSY on failure + **/ +static int wx_obtain_mbx_lock_pf(struct wx *wx, u16 vf) +{ + int count = 5; + u32 mailbox; + + while (count--) { + /* Take ownership of the buffer */ + wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_PFU); + + /* reserve mailbox for vf use */ + mailbox = rd32(wx, WX_PXMAILBOX(vf)); + if (mailbox & WX_PXMAILBOX_PFU) + return 0; + else if (count) + udelay(10); + } + wx_err(wx, "Failed to obtain mailbox lock for PF%d", vf); + + return -EBUSY; +} + +static int wx_check_for_bit_pf(struct wx *wx, u32 mask, int index) +{ + u32 mbvficr = rd32(wx, WX_MBVFICR(index)); + + if (!(mbvficr & mask)) + return -EBUSY; + wr32(wx, WX_MBVFICR(index), mask); + + return 0; +} + +/** + * wx_check_for_ack_pf - checks to see if the VF has acked + * @wx: pointer to the HW structure + * @vf: the VF index + * + * Return: return 0 if the VF has set the status bit or else -EBUSY + **/ +int wx_check_for_ack_pf(struct wx *wx, u16 vf) +{ + u32 index = vf / 16, vf_bit = vf % 16; + + return wx_check_for_bit_pf(wx, + FIELD_PREP(WX_MBVFICR_VFACK_MASK, + BIT(vf_bit)), + index); +} + +/** + * wx_check_for_msg_pf - checks to see if the VF has sent mail + * @wx: pointer to the HW structure + * @vf: the VF index + * + * Return: return 0 if the VF has got req bit or else -EBUSY + **/ +int wx_check_for_msg_pf(struct wx *wx, u16 vf) +{ + u32 index = vf / 16, vf_bit = vf % 16; + + return wx_check_for_bit_pf(wx, + FIELD_PREP(WX_MBVFICR_VFREQ_MASK, + BIT(vf_bit)), + index); +} + +/** + * wx_write_mbx_pf - Places a message in the mailbox + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf: the VF index + * + * Return: return 0 on success and -EINVAL/-EBUSY on failure + **/ +int wx_write_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf) +{ + struct wx_mbx_info *mbx = &wx->mbx; + int ret, i; + + /* mbx->size is up to 15 */ + if (size > mbx->size) { + wx_err(wx, "Invalid mailbox message size %d", size); + return -EINVAL; + } + + /* lock the mailbox to prevent pf/vf race condition */ + ret = wx_obtain_mbx_lock_pf(wx, vf); + if (ret) + return ret; + + /* flush msg and acks as we are overwriting the message buffer */ + wx_check_for_msg_pf(wx, vf); + wx_check_for_ack_pf(wx, vf); + + /* copy the caller specified message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + wr32a(wx, WX_PXMBMEM(vf), i, msg[i]); + + /* Interrupt VF to tell it a message has been sent and release buffer */ + /* set mirrored mailbox flags */ + wr32a(wx, WX_PXMBMEM(vf), WX_VXMAILBOX_SIZE, WX_PXMAILBOX_STS); + wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_STS); + + return 0; +} + +/** + * wx_read_mbx_pf - Read a message from the mailbox + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf: the VF index + * + * Return: return 0 on success and -EBUSY on failure + **/ +int wx_read_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf) +{ + struct wx_mbx_info *mbx = &wx->mbx; + int ret; + u16 i; + + /* limit read to size of mailbox and mbx->size is up to 15 */ + if (size > mbx->size) + size = mbx->size; + + /* lock the mailbox to prevent pf/vf race condition */ + ret = wx_obtain_mbx_lock_pf(wx, vf); + if (ret) + return ret; + + for (i = 0; i < size; i++) + msg[i] = rd32a(wx, WX_PXMBMEM(vf), i); + + /* Acknowledge the message and release buffer */ + /* set mirrored mailbox flags */ + wr32a(wx, WX_PXMBMEM(vf), WX_VXMAILBOX_SIZE, WX_PXMAILBOX_ACK); + wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_ACK); + + return 0; +} + +/** + * wx_check_for_rst_pf - checks to see if the VF has reset + * @wx: pointer to the HW structure + * @vf: the VF index + * + * Return: return 0 on success and -EBUSY on failure + **/ +int wx_check_for_rst_pf(struct wx *wx, u16 vf) +{ + u32 reg_offset = WX_VF_REG_OFFSET(vf); + u32 vf_shift = WX_VF_IND_SHIFT(vf); + u32 vflre = 0; + + vflre = rd32(wx, WX_VFLRE(reg_offset)); + if (!(vflre & BIT(vf_shift))) + return -EBUSY; + wr32(wx, WX_VFLREC(reg_offset), BIT(vf_shift)); + + return 0; +} diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.h b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h new file mode 100644 index 000000000000..05aae138dbc3 --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ +#ifndef _WX_MBX_H_ +#define _WX_MBX_H_ + +#define WX_VXMAILBOX_SIZE 15 + +/* PF Registers */ +#define WX_PXMAILBOX(i) (0x600 + (4 * (i))) /* i=[0,63] */ +#define WX_PXMAILBOX_STS BIT(0) /* Initiate message send to VF */ +#define WX_PXMAILBOX_ACK BIT(1) /* Ack message recv'd from VF */ +#define WX_PXMAILBOX_PFU BIT(3) /* PF owns the mailbox buffer */ + +#define WX_PXMBMEM(i) (0x5000 + (64 * (i))) /* i=[0,63] */ + +#define WX_VFLRE(i) (0x4A0 + (4 * (i))) /* i=[0,1] */ +#define WX_VFLREC(i) (0x4A8 + (4 * (i))) /* i=[0,1] */ + +/* SR-IOV specific macros */ +#define WX_MBVFICR(i) (0x480 + (4 * (i))) /* i=[0,3] */ +#define WX_MBVFICR_VFREQ_MASK GENMASK(15, 0) +#define WX_MBVFICR_VFACK_MASK GENMASK(31, 16) + +#define WX_VT_MSGTYPE_ACK BIT(31) +#define WX_VT_MSGTYPE_NACK BIT(30) +#define WX_VT_MSGTYPE_CTS BIT(29) +#define WX_VT_MSGINFO_SHIFT 16 +#define WX_VT_MSGINFO_MASK GENMASK(23, 16) + +enum wx_pfvf_api_rev { + wx_mbox_api_null, + wx_mbox_api_13 = 4, /* API version 1.3 */ + wx_mbox_api_unknown, /* indicates that API version is not known */ +}; + +/* mailbox API */ +#define WX_VF_RESET 0x01 /* VF requests reset */ +#define WX_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ +#define WX_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ +#define WX_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ +#define WX_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define WX_VF_SET_MACVLAN 0x06 /* VF requests PF unicast filter */ +#define WX_VF_API_NEGOTIATE 0x08 /* negotiate API version */ +#define WX_VF_GET_QUEUES 0x09 /* get queue configuration */ +#define WX_VF_GET_RETA 0x0a /* VF request for RETA */ +#define WX_VF_GET_RSS_KEY 0x0b /* get RSS key */ +#define WX_VF_UPDATE_XCAST_MODE 0x0c +#define WX_VF_GET_LINK_STATE 0x10 /* get vf link state */ +#define WX_VF_GET_FW_VERSION 0x11 /* get fw version */ + +#define WX_VF_BACKUP 0x8001 /* VF requests backup */ + +#define WX_PF_CONTROL_MSG BIT(8) /* PF control message */ +#define WX_PF_NOFITY_VF_LINK_STATUS 0x1 +#define WX_PF_NOFITY_VF_NET_NOT_RUNNING BIT(31) + +#define WX_VF_TX_QUEUES 1 /* number of Tx queues supported */ +#define WX_VF_RX_QUEUES 2 /* number of Rx queues supported */ +#define WX_VF_TRANS_VLAN 3 /* Indication of port vlan */ +#define WX_VF_DEF_QUEUE 4 /* Default queue offset */ + +#define WX_VF_PERMADDR_MSG_LEN 4 + +enum wxvf_xcast_modes { + WXVF_XCAST_MODE_NONE = 0, + WXVF_XCAST_MODE_MULTI, + WXVF_XCAST_MODE_ALLMULTI, + WXVF_XCAST_MODE_PROMISC, +}; + +int wx_write_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf); +int wx_read_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf); +int wx_check_for_rst_pf(struct wx *wx, u16 mbx_id); +int wx_check_for_msg_pf(struct wx *wx, u16 mbx_id); +int wx_check_for_ack_pf(struct wx *wx, u16 mbx_id); + +#endif /* _WX_MBX_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c new file mode 100644 index 000000000000..52e6a6faf715 --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -0,0 +1,909 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "wx_type.h" +#include "wx_hw.h" +#include "wx_mbx.h" +#include "wx_sriov.h" + +static void wx_vf_configuration(struct pci_dev *pdev, int event_mask) +{ + bool enable = !!WX_VF_ENABLE_CHECK(event_mask); + struct wx *wx = pci_get_drvdata(pdev); + u32 vfn = WX_VF_NUM_GET(event_mask); + + if (enable) + eth_zero_addr(wx->vfinfo[vfn].vf_mac_addr); +} + +static int wx_alloc_vf_macvlans(struct wx *wx, u8 num_vfs) +{ + struct vf_macvlans *mv_list; + int num_vf_macvlans, i; + + /* Initialize list of VF macvlans */ + INIT_LIST_HEAD(&wx->vf_mvs.mvlist); + + num_vf_macvlans = wx->mac.num_rar_entries - + (WX_MAX_PF_MACVLANS + 1 + num_vfs); + if (!num_vf_macvlans) + return -EINVAL; + + mv_list = kcalloc(num_vf_macvlans, sizeof(struct vf_macvlans), + GFP_KERNEL); + if (!mv_list) + return -ENOMEM; + + for (i = 0; i < num_vf_macvlans; i++) { + mv_list[i].vf = -1; + mv_list[i].free = true; + list_add(&mv_list[i].mvlist, &wx->vf_mvs.mvlist); + } + wx->mv_list = mv_list; + + return 0; +} + +static void wx_sriov_clear_data(struct wx *wx) +{ + /* set num VFs to 0 to prevent access to vfinfo */ + wx->num_vfs = 0; + + /* free VF control structures */ + kfree(wx->vfinfo); + wx->vfinfo = NULL; + + /* free macvlan list */ + kfree(wx->mv_list); + wx->mv_list = NULL; + + /* set default pool back to 0 */ + wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0); + wx->ring_feature[RING_F_VMDQ].offset = 0; + + clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); + /* Disable VMDq flag so device will be set in NM mode */ + if (wx->ring_feature[RING_F_VMDQ].limit == 1) + clear_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); +} + +static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) +{ + int i, ret = 0; + u32 value = 0; + + set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); + wx_err(wx, "SR-IOV enabled with %d VFs\n", num_vfs); + + /* Enable VMDq flag so device will be set in VM mode */ + set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); + if (!wx->ring_feature[RING_F_VMDQ].limit) + wx->ring_feature[RING_F_VMDQ].limit = 1; + wx->ring_feature[RING_F_VMDQ].offset = num_vfs; + + wx->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage), + GFP_KERNEL); + if (!wx->vfinfo) + return -ENOMEM; + + ret = wx_alloc_vf_macvlans(wx, num_vfs); + if (ret) + return ret; + + /* Initialize default switching mode VEB */ + wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_SW_EN, WX_PSR_CTL_SW_EN); + + for (i = 0; i < num_vfs; i++) { + /* enable spoof checking for all VFs */ + wx->vfinfo[i].spoofchk_enabled = true; + wx->vfinfo[i].link_enable = true; + /* untrust all VFs */ + wx->vfinfo[i].trusted = false; + /* set the default xcast mode */ + wx->vfinfo[i].xcast_mode = WXVF_XCAST_MODE_NONE; + } + + if (wx->mac.type == wx_mac_em) { + value = WX_CFG_PORT_CTL_NUM_VT_8; + } else { + if (num_vfs < 32) + value = WX_CFG_PORT_CTL_NUM_VT_32; + else + value = WX_CFG_PORT_CTL_NUM_VT_64; + } + wr32m(wx, WX_CFG_PORT_CTL, + WX_CFG_PORT_CTL_NUM_VT_MASK, + value); + + return ret; +} + +static void wx_sriov_reinit(struct wx *wx) +{ + rtnl_lock(); + wx->setup_tc(wx->netdev, netdev_get_num_tc(wx->netdev)); + rtnl_unlock(); +} + +void wx_disable_sriov(struct wx *wx) +{ + if (!pci_vfs_assigned(wx->pdev)) + pci_disable_sriov(wx->pdev); + else + wx_err(wx, "Unloading driver while VFs are assigned.\n"); + + /* clear flags and free allloced data */ + wx_sriov_clear_data(wx); +} +EXPORT_SYMBOL(wx_disable_sriov); + +static int wx_pci_sriov_enable(struct pci_dev *dev, + int num_vfs) +{ + struct wx *wx = pci_get_drvdata(dev); + int err = 0, i; + + err = __wx_enable_sriov(wx, num_vfs); + if (err) + return err; + + wx->num_vfs = num_vfs; + for (i = 0; i < wx->num_vfs; i++) + wx_vf_configuration(dev, (i | WX_VF_ENABLE)); + + /* reset before enabling SRIOV to avoid mailbox issues */ + wx_sriov_reinit(wx); + + err = pci_enable_sriov(dev, num_vfs); + if (err) { + wx_err(wx, "Failed to enable PCI sriov: %d\n", err); + goto err_out; + } + + return num_vfs; +err_out: + wx_sriov_clear_data(wx); + return err; +} + +static void wx_pci_sriov_disable(struct pci_dev *dev) +{ + struct wx *wx = pci_get_drvdata(dev); + + wx_disable_sriov(wx); + wx_sriov_reinit(wx); +} + +int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct wx *wx = pci_get_drvdata(pdev); + int err; + + if (!num_vfs) { + if (!pci_vfs_assigned(pdev)) { + wx_pci_sriov_disable(pdev); + return 0; + } + + wx_err(wx, "can't free VFs because some are assigned to VMs.\n"); + return -EBUSY; + } + + err = wx_pci_sriov_enable(pdev, num_vfs); + if (err) + return err; + + return num_vfs; +} +EXPORT_SYMBOL(wx_pci_sriov_configure); + +static int wx_set_vf_mac(struct wx *wx, u16 vf, const u8 *mac_addr) +{ + u8 hw_addr[ETH_ALEN]; + int ret = 0; + + ether_addr_copy(hw_addr, mac_addr); + wx_del_mac_filter(wx, wx->vfinfo[vf].vf_mac_addr, vf); + ret = wx_add_mac_filter(wx, hw_addr, vf); + if (ret >= 0) + ether_addr_copy(wx->vfinfo[vf].vf_mac_addr, mac_addr); + else + eth_zero_addr(wx->vfinfo[vf].vf_mac_addr); + + return ret; +} + +static void wx_set_vmolr(struct wx *wx, u16 vf, bool aupe) +{ + u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf)); + + vmolr |= WX_PSR_VM_L2CTL_BAM; + if (aupe) + vmolr |= WX_PSR_VM_L2CTL_AUPE; + else + vmolr &= ~WX_PSR_VM_L2CTL_AUPE; + wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr); +} + +static void wx_set_vmvir(struct wx *wx, u16 vid, u16 qos, u16 vf) +{ + u32 vmvir = vid | (qos << VLAN_PRIO_SHIFT) | + WX_TDM_VLAN_INS_VLANA_DEFAULT; + + wr32(wx, WX_TDM_VLAN_INS(vf), vmvir); +} + +static int wx_set_vf_vlan(struct wx *wx, int add, int vid, u16 vf) +{ + if (!vid && !add) + return 0; + + return wx_set_vfta(wx, vid, vf, (bool)add); +} + +static void wx_set_vlan_anti_spoofing(struct wx *wx, bool enable, int vf) +{ + u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf); + u32 pfvfspoof; + + pfvfspoof = rd32(wx, WX_TDM_VLAN_AS(index)); + if (enable) + pfvfspoof |= BIT(vf_bit); + else + pfvfspoof &= ~BIT(vf_bit); + wr32(wx, WX_TDM_VLAN_AS(index), pfvfspoof); +} + +static void wx_write_qde(struct wx *wx, u32 vf, u32 qde) +{ + struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ]; + u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask); + u32 reg = 0, n = vf * q_per_pool / 32; + u32 i = vf * q_per_pool; + + reg = rd32(wx, WX_RDM_PF_QDE(n)); + for (i = (vf * q_per_pool - n * 32); + i < ((vf + 1) * q_per_pool - n * 32); + i++) { + if (qde == 1) + reg |= qde << i; + else + reg &= qde << i; + } + + wr32(wx, WX_RDM_PF_QDE(n), reg); +} + +static void wx_clear_vmvir(struct wx *wx, u32 vf) +{ + wr32(wx, WX_TDM_VLAN_INS(vf), 0); +} + +static void wx_ping_vf(struct wx *wx, int vf) +{ + u32 ping = WX_PF_CONTROL_MSG; + + if (wx->vfinfo[vf].clear_to_send) + ping |= WX_VT_MSGTYPE_CTS; + wx_write_mbx_pf(wx, &ping, 1, vf); +} + +static void wx_set_vf_rx_tx(struct wx *wx, int vf) +{ + u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf); + u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx; + + reg_cur_tx = rd32(wx, WX_TDM_VF_TE(index)); + reg_cur_rx = rd32(wx, WX_RDM_VF_RE(index)); + + if (wx->vfinfo[vf].link_enable) { + reg_req_tx = reg_cur_tx | BIT(vf_bit); + reg_req_rx = reg_cur_rx | BIT(vf_bit); + /* Enable particular VF */ + if (reg_cur_tx != reg_req_tx) + wr32(wx, WX_TDM_VF_TE(index), reg_req_tx); + if (reg_cur_rx != reg_req_rx) + wr32(wx, WX_RDM_VF_RE(index), reg_req_rx); + } else { + reg_req_tx = BIT(vf_bit); + reg_req_rx = BIT(vf_bit); + /* Disable particular VF */ + if (reg_cur_tx & reg_req_tx) + wr32(wx, WX_TDM_VFTE_CLR(index), reg_req_tx); + if (reg_cur_rx & reg_req_rx) + wr32(wx, WX_RDM_VFRE_CLR(index), reg_req_rx); + } +} + +static int wx_get_vf_queues(struct wx *wx, u32 *msgbuf, u32 vf) +{ + struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ]; + unsigned int default_tc = 0; + + msgbuf[WX_VF_TX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask); + msgbuf[WX_VF_RX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask); + + if (wx->vfinfo[vf].pf_vlan || wx->vfinfo[vf].pf_qos) + msgbuf[WX_VF_TRANS_VLAN] = 1; + else + msgbuf[WX_VF_TRANS_VLAN] = 0; + + /* notify VF of default queue */ + msgbuf[WX_VF_DEF_QUEUE] = default_tc; + + return 0; +} + +static void wx_vf_reset_event(struct wx *wx, u16 vf) +{ + struct vf_data_storage *vfinfo = &wx->vfinfo[vf]; + u8 num_tcs = netdev_get_num_tc(wx->netdev); + + /* add PF assigned VLAN */ + wx_set_vf_vlan(wx, true, vfinfo->pf_vlan, vf); + + /* reset offloads to defaults */ + wx_set_vmolr(wx, vf, !vfinfo->pf_vlan); + + /* set outgoing tags for VFs */ + if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) { + wx_clear_vmvir(wx, vf); + } else { + if (vfinfo->pf_qos || !num_tcs) + wx_set_vmvir(wx, vfinfo->pf_vlan, + vfinfo->pf_qos, vf); + else + wx_set_vmvir(wx, vfinfo->pf_vlan, + wx->default_up, vf); + } + + /* reset multicast table array for vf */ + wx->vfinfo[vf].num_vf_mc_hashes = 0; + + /* Flush and reset the mta with the new values */ + wx_set_rx_mode(wx->netdev); + + wx_del_mac_filter(wx, wx->vfinfo[vf].vf_mac_addr, vf); + /* reset VF api back to unknown */ + wx->vfinfo[vf].vf_api = wx_mbox_api_null; +} + +static void wx_vf_reset_msg(struct wx *wx, u16 vf) +{ + const u8 *vf_mac = wx->vfinfo[vf].vf_mac_addr; + struct net_device *dev = wx->netdev; + u32 msgbuf[5] = {0, 0, 0, 0, 0}; + u8 *addr = (u8 *)(&msgbuf[1]); + u32 reg = 0, index, vf_bit; + int pf_max_frame; + + /* reset the filters for the device */ + wx_vf_reset_event(wx, vf); + + /* set vf mac address */ + if (!is_zero_ether_addr(vf_mac)) + wx_set_vf_mac(wx, vf, vf_mac); + + index = WX_VF_REG_OFFSET(vf); + vf_bit = WX_VF_IND_SHIFT(vf); + + /* force drop enable for all VF Rx queues */ + wx_write_qde(wx, vf, 1); + + /* set transmit and receive for vf */ + wx_set_vf_rx_tx(wx, vf); + + pf_max_frame = dev->mtu + ETH_HLEN; + + if (pf_max_frame > ETH_FRAME_LEN) + reg = BIT(vf_bit); + wr32(wx, WX_RDM_VFRE_CLR(index), reg); + + /* enable VF mailbox for further messages */ + wx->vfinfo[vf].clear_to_send = true; + + /* reply to reset with ack and vf mac address */ + msgbuf[0] = WX_VF_RESET; + if (!is_zero_ether_addr(vf_mac)) { + msgbuf[0] |= WX_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + } else { + msgbuf[0] |= WX_VT_MSGTYPE_NACK; + wx_err(wx, "VF %d has no MAC address assigned", vf); + } + + msgbuf[3] = wx->mac.mc_filter_type; + wx_write_mbx_pf(wx, msgbuf, WX_VF_PERMADDR_MSG_LEN, vf); +} + +static int wx_set_vf_mac_addr(struct wx *wx, u32 *msgbuf, u16 vf) +{ + const u8 *new_mac = ((u8 *)(&msgbuf[1])); + int ret; + + if (!is_valid_ether_addr(new_mac)) { + wx_err(wx, "VF %d attempted to set invalid mac\n", vf); + return -EINVAL; + } + + if (wx->vfinfo[vf].pf_set_mac && + memcmp(wx->vfinfo[vf].vf_mac_addr, new_mac, ETH_ALEN)) { + wx_err(wx, + "VF %d attempt to set a MAC but it already had a MAC.", + vf); + return -EBUSY; + } + + ret = wx_set_vf_mac(wx, vf, new_mac); + if (ret < 0) + return ret; + + return 0; +} + +static void wx_set_vf_multicasts(struct wx *wx, u32 *msgbuf, u32 vf) +{ + struct vf_data_storage *vfinfo = &wx->vfinfo[vf]; + u16 entries = (msgbuf[0] & WX_VT_MSGINFO_MASK) + >> WX_VT_MSGINFO_SHIFT; + u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf)); + u32 vector_bit, vector_reg, mta_reg, i; + u16 *hash_list = (u16 *)&msgbuf[1]; + + /* only so many hash values supported */ + entries = min_t(u16, entries, WX_MAX_VF_MC_ENTRIES); + vfinfo->num_vf_mc_hashes = entries; + + for (i = 0; i < entries; i++) + vfinfo->vf_mc_hashes[i] = hash_list[i]; + + for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) { + vector_reg = WX_PSR_MC_TBL_REG(vfinfo->vf_mc_hashes[i]); + vector_bit = WX_PSR_MC_TBL_BIT(vfinfo->vf_mc_hashes[i]); + mta_reg = wx->mac.mta_shadow[vector_reg]; + mta_reg |= BIT(vector_bit); + wx->mac.mta_shadow[vector_reg] = mta_reg; + wr32(wx, WX_PSR_MC_TBL(vector_reg), mta_reg); + } + vmolr |= WX_PSR_VM_L2CTL_ROMPE; + wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr); +} + +static void wx_set_vf_lpe(struct wx *wx, u32 max_frame, u32 vf) +{ + u32 index, vf_bit, vfre; + u32 max_frs, reg_val; + + /* determine VF receive enable location */ + index = WX_VF_REG_OFFSET(vf); + vf_bit = WX_VF_IND_SHIFT(vf); + + vfre = rd32(wx, WX_RDM_VF_RE(index)); + vfre |= BIT(vf_bit); + wr32(wx, WX_RDM_VF_RE(index), vfre); + + /* pull current max frame size from hardware */ + max_frs = DIV_ROUND_UP(max_frame, 1024); + reg_val = rd32(wx, WX_MAC_WDG_TIMEOUT) & WX_MAC_WDG_TIMEOUT_WTO_MASK; + if (max_frs > (reg_val + WX_MAC_WDG_TIMEOUT_WTO_DELTA)) + wr32(wx, WX_MAC_WDG_TIMEOUT, + max_frs - WX_MAC_WDG_TIMEOUT_WTO_DELTA); +} + +static int wx_find_vlvf_entry(struct wx *wx, u32 vlan) +{ + int regindex; + u32 vlvf; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the vlan id in the VLVF entries */ + for (regindex = 1; regindex < WX_PSR_VLAN_SWC_ENTRIES; regindex++) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, regindex); + vlvf = rd32(wx, WX_PSR_VLAN_SWC); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + /* Return a negative value if not found */ + if (regindex >= WX_PSR_VLAN_SWC_ENTRIES) + regindex = -EINVAL; + + return regindex; +} + +static int wx_set_vf_macvlan(struct wx *wx, + u16 vf, int index, unsigned char *mac_addr) +{ + struct vf_macvlans *entry; + struct list_head *pos; + int retval = 0; + + if (index <= 1) { + list_for_each(pos, &wx->vf_mvs.mvlist) { + entry = list_entry(pos, struct vf_macvlans, mvlist); + if (entry->vf == vf) { + entry->vf = -1; + entry->free = true; + entry->is_macvlan = false; + wx_del_mac_filter(wx, entry->vf_macvlan, vf); + } + } + } + + if (!index) + return 0; + + entry = NULL; + list_for_each(pos, &wx->vf_mvs.mvlist) { + entry = list_entry(pos, struct vf_macvlans, mvlist); + if (entry->free) + break; + } + + if (!entry || !entry->free) + return -ENOSPC; + + retval = wx_add_mac_filter(wx, mac_addr, vf); + if (retval >= 0) { + entry->free = false; + entry->is_macvlan = true; + entry->vf = vf; + memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN); + } + + return retval; +} + +static int wx_set_vf_vlan_msg(struct wx *wx, u32 *msgbuf, u16 vf) +{ + int add = (msgbuf[0] & WX_VT_MSGINFO_MASK) >> WX_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & WX_PSR_VLAN_SWC_VLANID_MASK); + int ret; + + if (add) + wx->vfinfo[vf].vlan_count++; + else if (wx->vfinfo[vf].vlan_count) + wx->vfinfo[vf].vlan_count--; + + /* in case of promiscuous mode any VLAN filter set for a VF must + * also have the PF pool added to it. + */ + if (add && wx->netdev->flags & IFF_PROMISC) + wx_set_vf_vlan(wx, add, vid, VMDQ_P(0)); + + ret = wx_set_vf_vlan(wx, add, vid, vf); + if (!ret && wx->vfinfo[vf].spoofchk_enabled) + wx_set_vlan_anti_spoofing(wx, true, vf); + + /* Go through all the checks to see if the VLAN filter should + * be wiped completely. + */ + if (!add && wx->netdev->flags & IFF_PROMISC) { + u32 bits = 0, vlvf; + int reg_ndx; + + reg_ndx = wx_find_vlvf_entry(wx, vid); + if (reg_ndx < 0) + return -ENOSPC; + wr32(wx, WX_PSR_VLAN_SWC_IDX, reg_ndx); + vlvf = rd32(wx, WX_PSR_VLAN_SWC); + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + if (VMDQ_P(0) < 32) { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L); + bits &= ~BIT(VMDQ_P(0)); + if (wx->mac.type != wx_mac_em) + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_H); + } else { + if (wx->mac.type != wx_mac_em) + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H); + bits &= ~BIT(VMDQ_P(0) % 32); + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_L); + } + /* If the filter was removed then ensure PF pool bit + * is cleared if the PF only added itself to the pool + * because the PF is in promiscuous mode. + */ + if ((vlvf & VLAN_VID_MASK) == vid && !bits) + wx_set_vf_vlan(wx, add, vid, VMDQ_P(0)); + } + + return 0; +} + +static int wx_set_vf_macvlan_msg(struct wx *wx, u32 *msgbuf, u16 vf) +{ + int index = (msgbuf[0] & WX_VT_MSGINFO_MASK) >> + WX_VT_MSGINFO_SHIFT; + u8 *new_mac = ((u8 *)(&msgbuf[1])); + int err; + + if (wx->vfinfo[vf].pf_set_mac && index > 0) { + wx_err(wx, "VF %d request MACVLAN filter but is denied\n", vf); + return -EINVAL; + } + + /* An non-zero index indicates the VF is setting a filter */ + if (index) { + if (!is_valid_ether_addr(new_mac)) { + wx_err(wx, "VF %d attempted to set invalid mac\n", vf); + return -EINVAL; + } + /* If the VF is allowed to set MAC filters then turn off + * anti-spoofing to avoid false positives. + */ + if (wx->vfinfo[vf].spoofchk_enabled) + wx_set_vf_spoofchk(wx->netdev, vf, false); + } + + err = wx_set_vf_macvlan(wx, vf, index, new_mac); + if (err == -ENOSPC) + wx_err(wx, + "VF %d request MACVLAN filter but there is no space\n", + vf); + if (err < 0) + return err; + + return 0; +} + +static int wx_negotiate_vf_api(struct wx *wx, u32 *msgbuf, u32 vf) +{ + int api = msgbuf[1]; + + switch (api) { + case wx_mbox_api_13: + wx->vfinfo[vf].vf_api = api; + return 0; + default: + wx_err(wx, "VF %d requested invalid api version %u\n", vf, api); + return -EINVAL; + } +} + +static int wx_get_vf_link_state(struct wx *wx, u32 *msgbuf, u32 vf) +{ + msgbuf[1] = wx->vfinfo[vf].link_enable; + + return 0; +} + +static int wx_get_fw_version(struct wx *wx, u32 *msgbuf, u32 vf) +{ + unsigned long fw_version = 0ULL; + int ret = 0; + + ret = kstrtoul(wx->eeprom_id, 16, &fw_version); + if (ret) + return -EOPNOTSUPP; + msgbuf[1] = fw_version; + + return 0; +} + +static int wx_update_vf_xcast_mode(struct wx *wx, u32 *msgbuf, u32 vf) +{ + int xcast_mode = msgbuf[1]; + u32 vmolr, disable, enable; + + if (wx->vfinfo[vf].xcast_mode == xcast_mode) + return 0; + + switch (xcast_mode) { + case WXVF_XCAST_MODE_NONE: + disable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE | + WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE | + WX_PSR_VM_L2CTL_VPE; + enable = 0; + break; + case WXVF_XCAST_MODE_MULTI: + disable = WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE | + WX_PSR_VM_L2CTL_VPE; + enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE; + break; + case WXVF_XCAST_MODE_ALLMULTI: + disable = WX_PSR_VM_L2CTL_UPE | WX_PSR_VM_L2CTL_VPE; + enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE | + WX_PSR_VM_L2CTL_MPE; + break; + case WXVF_XCAST_MODE_PROMISC: + disable = 0; + enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE | + WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE | + WX_PSR_VM_L2CTL_VPE; + break; + default: + return -EOPNOTSUPP; + } + + vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf)); + vmolr &= ~disable; + vmolr |= enable; + wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr); + + wx->vfinfo[vf].xcast_mode = xcast_mode; + msgbuf[1] = xcast_mode; + + return 0; +} + +static void wx_rcv_msg_from_vf(struct wx *wx, u16 vf) +{ + u16 mbx_size = WX_VXMAILBOX_SIZE; + u32 msgbuf[WX_VXMAILBOX_SIZE]; + int retval; + + retval = wx_read_mbx_pf(wx, msgbuf, mbx_size, vf); + if (retval) { + wx_err(wx, "Error receiving message from VF\n"); + return; + } + + /* this is a message we already processed, do nothing */ + if (msgbuf[0] & (WX_VT_MSGTYPE_ACK | WX_VT_MSGTYPE_NACK)) + return; + + if (msgbuf[0] == WX_VF_RESET) { + wx_vf_reset_msg(wx, vf); + return; + } + + /* until the vf completes a virtual function reset it should not be + * allowed to start any configuration. + */ + if (!wx->vfinfo[vf].clear_to_send) { + msgbuf[0] |= WX_VT_MSGTYPE_NACK; + wx_write_mbx_pf(wx, msgbuf, 1, vf); + return; + } + + switch ((msgbuf[0] & U16_MAX)) { + case WX_VF_SET_MAC_ADDR: + retval = wx_set_vf_mac_addr(wx, msgbuf, vf); + break; + case WX_VF_SET_MULTICAST: + wx_set_vf_multicasts(wx, msgbuf, vf); + retval = 0; + break; + case WX_VF_SET_VLAN: + retval = wx_set_vf_vlan_msg(wx, msgbuf, vf); + break; + case WX_VF_SET_LPE: + wx_set_vf_lpe(wx, msgbuf[1], vf); + retval = 0; + break; + case WX_VF_SET_MACVLAN: + retval = wx_set_vf_macvlan_msg(wx, msgbuf, vf); + break; + case WX_VF_API_NEGOTIATE: + retval = wx_negotiate_vf_api(wx, msgbuf, vf); + break; + case WX_VF_GET_QUEUES: + retval = wx_get_vf_queues(wx, msgbuf, vf); + break; + case WX_VF_GET_LINK_STATE: + retval = wx_get_vf_link_state(wx, msgbuf, vf); + break; + case WX_VF_GET_FW_VERSION: + retval = wx_get_fw_version(wx, msgbuf, vf); + break; + case WX_VF_UPDATE_XCAST_MODE: + retval = wx_update_vf_xcast_mode(wx, msgbuf, vf); + break; + case WX_VF_BACKUP: + break; + default: + wx_err(wx, "Unhandled Msg %8.8x\n", msgbuf[0]); + break; + } + + /* notify the VF of the results of what it sent us */ + if (retval) + msgbuf[0] |= WX_VT_MSGTYPE_NACK; + else + msgbuf[0] |= WX_VT_MSGTYPE_ACK; + + msgbuf[0] |= WX_VT_MSGTYPE_CTS; + + wx_write_mbx_pf(wx, msgbuf, mbx_size, vf); +} + +static void wx_rcv_ack_from_vf(struct wx *wx, u16 vf) +{ + u32 msg = WX_VT_MSGTYPE_NACK; + + /* if device isn't clear to send it shouldn't be reading either */ + if (!wx->vfinfo[vf].clear_to_send) + wx_write_mbx_pf(wx, &msg, 1, vf); +} + +void wx_msg_task(struct wx *wx) +{ + u16 vf; + + for (vf = 0; vf < wx->num_vfs; vf++) { + /* process any reset requests */ + if (!wx_check_for_rst_pf(wx, vf)) + wx_vf_reset_event(wx, vf); + + /* process any messages pending */ + if (!wx_check_for_msg_pf(wx, vf)) + wx_rcv_msg_from_vf(wx, vf); + + /* process any acks */ + if (!wx_check_for_ack_pf(wx, vf)) + wx_rcv_ack_from_vf(wx, vf); + } +} +EXPORT_SYMBOL(wx_msg_task); + +void wx_disable_vf_rx_tx(struct wx *wx) +{ + wr32(wx, WX_TDM_VFTE_CLR(0), U32_MAX); + wr32(wx, WX_RDM_VFRE_CLR(0), U32_MAX); + if (wx->mac.type != wx_mac_em) { + wr32(wx, WX_TDM_VFTE_CLR(1), U32_MAX); + wr32(wx, WX_RDM_VFRE_CLR(1), U32_MAX); + } +} +EXPORT_SYMBOL(wx_disable_vf_rx_tx); + +void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up) +{ + u32 msgbuf[2] = {0, 0}; + u16 i; + + if (!wx->num_vfs) + return; + msgbuf[0] = WX_PF_NOFITY_VF_LINK_STATUS | WX_PF_CONTROL_MSG; + if (link_up) + msgbuf[1] = FIELD_PREP(GENMASK(31, 1), wx->speed) | link_up; + if (wx->notify_down) + msgbuf[1] |= WX_PF_NOFITY_VF_NET_NOT_RUNNING; + for (i = 0; i < wx->num_vfs; i++) { + if (wx->vfinfo[i].clear_to_send) + msgbuf[0] |= WX_VT_MSGTYPE_CTS; + wx_write_mbx_pf(wx, msgbuf, 2, i); + } +} +EXPORT_SYMBOL(wx_ping_all_vfs_with_link_status); + +static void wx_set_vf_link_state(struct wx *wx, int vf, int state) +{ + wx->vfinfo[vf].link_state = state; + switch (state) { + case IFLA_VF_LINK_STATE_AUTO: + if (netif_running(wx->netdev)) + wx->vfinfo[vf].link_enable = true; + else + wx->vfinfo[vf].link_enable = false; + break; + case IFLA_VF_LINK_STATE_ENABLE: + wx->vfinfo[vf].link_enable = true; + break; + case IFLA_VF_LINK_STATE_DISABLE: + wx->vfinfo[vf].link_enable = false; + break; + } + /* restart the VF */ + wx->vfinfo[vf].clear_to_send = false; + wx_ping_vf(wx, vf); + + wx_set_vf_rx_tx(wx, vf); +} + +void wx_set_all_vfs(struct wx *wx) +{ + int i; + + for (i = 0; i < wx->num_vfs; i++) + wx_set_vf_link_state(wx, i, wx->vfinfo[i].link_state); +} +EXPORT_SYMBOL(wx_set_all_vfs); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h new file mode 100644 index 000000000000..8a3a47bb5815 --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _WX_SRIOV_H_ +#define _WX_SRIOV_H_ + +#define WX_VF_ENABLE_CHECK(_m) FIELD_GET(BIT(31), (_m)) +#define WX_VF_NUM_GET(_m) FIELD_GET(GENMASK(5, 0), (_m)) +#define WX_VF_ENABLE BIT(31) + +void wx_disable_sriov(struct wx *wx); +int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); +void wx_msg_task(struct wx *wx); +void wx_disable_vf_rx_tx(struct wx *wx); +void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up); +void wx_set_all_vfs(struct wx *wx); + +#endif /* _WX_SRIOV_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 4c545b2aa997..8cfed5d4ebf7 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -20,8 +20,13 @@ /* MSI-X capability fields masks */ #define WX_PCIE_MSIX_TBL_SZ_MASK 0x7FF #define WX_PCI_LINK_STATUS 0xB2 +#define WX_MAX_PF_MACVLANS 15 +#define WX_MAX_VF_MC_ENTRIES 30 /**************** Global Registers ****************************/ +#define WX_VF_REG_OFFSET(_v) FIELD_GET(GENMASK(15, 5), (_v)) +#define WX_VF_IND_SHIFT(_v) FIELD_GET(GENMASK(4, 0), (_v)) + /* chip control Registers */ #define WX_MIS_PWR 0x10000 #define WX_MIS_RST 0x1000C @@ -76,6 +81,9 @@ #define WX_MAC_LXONOFFRXC 0x11E0C /*********************** Receive DMA registers **************************/ +#define WX_RDM_VF_RE(_i) (0x12004 + ((_i) * 4)) +#define WX_RDM_PF_QDE(_i) (0x12080 + ((_i) * 4)) +#define WX_RDM_VFRE_CLR(_i) (0x120A0 + ((_i) * 4)) #define WX_RDM_DRP_PKT 0x12500 #define WX_RDM_PKT_CNT 0x12504 #define WX_RDM_BYTE_CNT_LSB 0x12508 @@ -84,12 +92,17 @@ /************************* Port Registers ************************************/ /* port cfg Registers */ #define WX_CFG_PORT_CTL 0x14400 +#define WX_CFG_PORT_CTL_PFRSTD BIT(14) #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) #define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ +#define WX_CFG_PORT_CTL_NUM_VT_NONE 0 +#define WX_CFG_PORT_CTL_NUM_VT_8 FIELD_PREP(GENMASK(13, 12), 1) +#define WX_CFG_PORT_CTL_NUM_VT_32 FIELD_PREP(GENMASK(13, 12), 2) +#define WX_CFG_PORT_CTL_NUM_VT_64 FIELD_PREP(GENMASK(13, 12), 3) /* GPIO Registers */ #define WX_GPIO_DR 0x14800 @@ -112,6 +125,11 @@ /*********************** Transmit DMA registers **************************/ /* transmit global control */ #define WX_TDM_CTL 0x18000 +#define WX_TDM_VF_TE(_i) (0x18004 + ((_i) * 4)) +#define WX_TDM_MAC_AS(_i) (0x18060 + ((_i) * 4)) +#define WX_TDM_VLAN_AS(_i) (0x18070 + ((_i) * 4)) +#define WX_TDM_VFTE_CLR(_i) (0x180A0 + ((_i) * 4)) + /* TDM CTL BIT */ #define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */ #define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4)) @@ -165,6 +183,7 @@ /******************************* PSR Registers *******************************/ /* psr control */ #define WX_PSR_CTL 0x15000 +#define WX_PSR_VM_CTL 0x151B0 /* Header split receive */ #define WX_PSR_CTL_SW_EN BIT(18) #define WX_PSR_CTL_RSC_ACK BIT(17) @@ -201,12 +220,17 @@ #define WX_PSR_1588_CTL_VALID BIT(0) /* mcasst/ucast overflow tbl */ #define WX_PSR_MC_TBL(_i) (0x15200 + ((_i) * 4)) +#define WX_PSR_MC_TBL_REG(_i) FIELD_GET(GENMASK(11, 5), (_i)) +#define WX_PSR_MC_TBL_BIT(_i) FIELD_GET(GENMASK(4, 0), (_i)) #define WX_PSR_UC_TBL(_i) (0x15400 + ((_i) * 4)) +#define WX_PSR_VM_CTL_REPLEN BIT(30) /* replication enabled */ +#define WX_PSR_VM_CTL_POOL_MASK GENMASK(12, 7) /* VM L2 contorl */ #define WX_PSR_VM_L2CTL(_i) (0x15600 + ((_i) * 4)) #define WX_PSR_VM_L2CTL_UPE BIT(4) /* unicast promiscuous */ #define WX_PSR_VM_L2CTL_VACC BIT(6) /* accept nomatched vlan */ +#define WX_PSR_VM_L2CTL_VPE BIT(7) /* vlan promiscuous mode */ #define WX_PSR_VM_L2CTL_AUPE BIT(8) /* accept untagged packets */ #define WX_PSR_VM_L2CTL_ROMPE BIT(9) /* accept packets in MTA tbl */ #define WX_PSR_VM_L2CTL_ROPE BIT(10) /* accept packets in UC tbl */ @@ -245,10 +269,12 @@ #define WX_PSR_VLAN_SWC 0x16220 #define WX_PSR_VLAN_SWC_VM_L 0x16224 #define WX_PSR_VLAN_SWC_VM_H 0x16228 +#define WX_PSR_VLAN_SWC_VM(_i) (0x16224 + ((_i) * 4)) #define WX_PSR_VLAN_SWC_IDX 0x16230 /* 64 vlan entries */ /* VLAN pool filtering masks */ #define WX_PSR_VLAN_SWC_VIEN BIT(31) /* filter is valid */ #define WX_PSR_VLAN_SWC_ENTRIES 64 +#define WX_PSR_VLAN_SWC_VLANID_MASK GENMASK(11, 0) /********************************* RSEC **************************************/ /* general rsec */ @@ -259,6 +285,13 @@ #define WX_RSC_ST 0x17004 #define WX_RSC_ST_RSEC_RDY BIT(0) +/*********************** Transmit DMA registers **************************/ +/* transmit global control */ +#define WX_TDM_ETYPE_AS(_i) (0x18058 + ((_i) * 4)) +#define WX_TDM_VLAN_INS(_i) (0x18100 + ((_i) * 4)) +/* Per VF Port VLAN insertion rules */ +#define WX_TDM_VLAN_INS_VLANA_DEFAULT BIT(30) /* Always use default VLAN*/ + /****************************** TDB ******************************************/ #define WX_TDB_PB_SZ(_i) (0x1CC00 + ((_i) * 4)) #define WX_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */ @@ -328,6 +361,9 @@ #define WX_MAC_WDG_TIMEOUT 0x1100C #define WX_MAC_RX_FLOW_CTRL 0x11090 #define WX_MAC_RX_FLOW_CTRL_RFE BIT(0) /* receive fc enable */ + +#define WX_MAC_WDG_TIMEOUT_WTO_MASK GENMASK(3, 0) +#define WX_MAC_WDG_TIMEOUT_WTO_DELTA 2 /* MDIO Registers */ #define WX_MSCA 0x11200 #define WX_MSCA_RA(v) FIELD_PREP(U16_MAX, v) @@ -417,6 +453,15 @@ enum WX_MSCA_CMD_value { /* Number of 80 microseconds we wait for PCI Express master disable */ #define WX_PCI_MASTER_DISABLE_TIMEOUT 80000 +#define WX_RSS_64Q_MASK 0x3F +#define WX_RSS_8Q_MASK 0x7 +#define WX_RSS_4Q_MASK 0x3 +#define WX_RSS_2Q_MASK 0x1 +#define WX_RSS_DISABLED_MASK 0x0 + +#define WX_VMDQ_4Q_MASK 0x7C +#define WX_VMDQ_2Q_MASK 0x7E + /****************** Manageablility Host Interface defines ********************/ #define WX_HI_MAX_BLOCK_BYTE_LENGTH 256 /* Num of bytes in range */ #define WX_HI_COMMAND_TIMEOUT 1000 /* Process HI command limit */ @@ -484,7 +529,7 @@ enum WX_MSCA_CMD_value { #define WX_REQ_TX_DESCRIPTOR_MULTIPLE 128 #define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ -#define VMDQ_P(p) p +#define VMDQ_P(p) ((p) + wx->ring_feature[RING_F_VMDQ].offset) /* Supported Rx Buffer Sizes */ #define WX_RXBUFFER_256 256 /* Used for skb receive header */ @@ -778,6 +823,10 @@ struct wx_bus_info { u16 device; }; +struct wx_mbx_info { + u16 size; +}; + struct wx_thermal_sensor_data { s16 temp; s16 alarm_thresh; @@ -1051,6 +1100,7 @@ struct wx_ring_feature { enum wx_ring_f_enum { RING_F_NONE = 0, + RING_F_VMDQ, RING_F_RSS, RING_F_FDIR, RING_F_ARRAY_SIZE /* must be last in enum set */ @@ -1106,8 +1156,38 @@ enum wx_state { WX_STATE_NBITS /* must be last */ }; +struct vf_data_storage { + struct pci_dev *vfdev; + unsigned char vf_mac_addr[ETH_ALEN]; + bool spoofchk_enabled; + bool link_enable; + bool trusted; + int xcast_mode; + unsigned int vf_api; + bool clear_to_send; + u16 pf_vlan; /* When set, guest VLAN config not allowed. */ + u16 pf_qos; + bool pf_set_mac; + + u16 vf_mc_hashes[WX_MAX_VF_MC_ENTRIES]; + u16 num_vf_mc_hashes; + u16 vlan_count; + int link_state; +}; + +struct vf_macvlans { + struct list_head mvlist; + int vf; + bool free; + bool is_macvlan; + u8 vf_macvlan[ETH_ALEN]; +}; + enum wx_pf_flags { WX_FLAG_SWFW_RING, + WX_FLAG_VMDQ_ENABLED, + WX_FLAG_VLAN_PROMISC, + WX_FLAG_SRIOV_ENABLED, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1128,6 +1208,7 @@ struct wx { struct pci_dev *pdev; struct net_device *netdev; struct wx_bus_info bus; + struct wx_mbx_info mbx; struct wx_mac_info mac; enum em_mac_type mac_type; enum sp_media_type media_type; @@ -1151,6 +1232,7 @@ struct wx { u8 swfw_index; /* PHY stuff */ + bool notify_down; unsigned int link; int speed; int duplex; @@ -1182,6 +1264,8 @@ struct wx { struct wx_ring *tx_ring[64] ____cacheline_aligned_in_smp; struct wx_ring *rx_ring[64]; struct wx_q_vector *q_vector[64]; + int num_rx_pools; + int num_rx_queues_per_pool; unsigned int queues_per_pool; struct msix_entry *msix_q_entries; @@ -1203,6 +1287,7 @@ struct wx { u32 wol; u16 bd_number; + bool default_up; struct wx_hw_stats stats; u64 tx_busy; @@ -1211,10 +1296,16 @@ struct wx { u64 hw_csum_rx_good; u64 hw_csum_rx_error; u64 alloc_rx_buff_failed; + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + struct vf_macvlans vf_mvs; + struct vf_macvlans *mv_list; + unsigned long fwd_bitmask; u32 atr_sample_rate; void (*atr)(struct wx_ring *ring, struct wx_tx_buffer *first, u8 ptype); void (*configure_fdir)(struct wx *wx); + int (*setup_tc)(struct net_device *netdev, u8 tc); void (*do_reset)(struct net_device *netdev); int (*ptp_setup_sdp)(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 91b3055a5a9f..b5022c49dc5e 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -15,6 +15,8 @@ #include "../libwx/wx_hw.h" #include "../libwx/wx_lib.h" #include "../libwx/wx_ptp.h" +#include "../libwx/wx_mbx.h" +#include "../libwx/wx_sriov.h" #include "ngbe_type.h" #include "ngbe_mdio.h" #include "ngbe_hw.h" @@ -129,6 +131,10 @@ static int ngbe_sw_init(struct wx *wx) wx->tx_work_limit = NGBE_DEFAULT_TX_WORK; wx->rx_work_limit = NGBE_DEFAULT_RX_WORK; + wx->mbx.size = WX_VXMAILBOX_SIZE; + wx->setup_tc = ngbe_setup_tc; + set_bit(0, &wx->fwd_bitmask); + return 0; } @@ -200,12 +206,10 @@ static irqreturn_t ngbe_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } -static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data) +static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr) { - struct wx *wx = data; - u32 eicr; - - eicr = wx_misc_isb(wx, WX_ISB_MISC); + if (eicr & NGBE_PX_MISC_IC_VF_MBOX) + wx_msg_task(wx); if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC)) wx_ptp_check_pps_event(wx); @@ -217,6 +221,35 @@ static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data) return IRQ_HANDLED; } +static irqreturn_t ngbe_msix_misc(int __always_unused irq, void *data) +{ + struct wx *wx = data; + u32 eicr; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); + + return __ngbe_msix_misc(wx, eicr); +} + +static irqreturn_t ngbe_misc_and_queue(int __always_unused irq, void *data) +{ + struct wx_q_vector *q_vector; + struct wx *wx = data; + u32 eicr; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); + if (!eicr) { + /* queue */ + q_vector = wx->q_vector[0]; + napi_schedule_irqoff(&q_vector->napi); + if (netif_running(wx->netdev)) + ngbe_irq_enable(wx, true); + return IRQ_HANDLED; + } + + return __ngbe_msix_misc(wx, eicr); +} + /** * ngbe_request_msix_irqs - Initialize MSI-X interrupts * @wx: board private structure @@ -249,8 +282,16 @@ static int ngbe_request_msix_irqs(struct wx *wx) } } - err = request_irq(wx->msix_entry->vector, - ngbe_msix_other, 0, netdev->name, wx); + /* Due to hardware design, when num_vfs < 7, pf can use 0 for misc and 1 + * for queue. But when num_vfs == 7, vector[1] is assigned to vf6. + * Misc and queue should reuse interrupt vector[0]. + */ + if (wx->num_vfs == 7) + err = request_irq(wx->msix_entry->vector, + ngbe_misc_and_queue, 0, netdev->name, wx); + else + err = request_irq(wx->msix_entry->vector, + ngbe_msix_misc, 0, netdev->name, wx); if (err) { wx_err(wx, "request_irq for msix_other failed: %d\n", err); @@ -302,6 +343,22 @@ static void ngbe_disable_device(struct wx *wx) struct net_device *netdev = wx->netdev; u32 i; + if (wx->num_vfs) { + /* Clear EITR Select mapping */ + wr32(wx, WX_PX_ITRSEL, 0); + + /* Mark all the VFs as inactive */ + for (i = 0; i < wx->num_vfs; i++) + wx->vfinfo[i].clear_to_send = 0; + wx->notify_down = true; + /* ping all the active vfs to let them know we are going down */ + wx_ping_all_vfs_with_link_status(wx, false); + wx->notify_down = false; + + /* Disable all VFTE/VFRE TX/RX */ + wx_disable_vf_rx_tx(wx); + } + /* disable all enabled rx queues */ for (i = 0; i < wx->num_rx_queues; i++) /* this call also flushes the previous write */ @@ -324,12 +381,19 @@ static void ngbe_disable_device(struct wx *wx) wx_update_stats(wx); } +static void ngbe_reset(struct wx *wx) +{ + wx_flush_sw_mac_table(wx); + wx_mac_set_default_filter(wx, wx->mac.addr); + if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) + wx_ptp_reset(wx); +} + void ngbe_down(struct wx *wx) { phylink_stop(wx->phylink); ngbe_disable_device(wx); - if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) - wx_ptp_reset(wx); + ngbe_reset(wx); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); } @@ -352,6 +416,11 @@ void ngbe_up(struct wx *wx) ngbe_sfp_modules_txrx_powerctl(wx, true); phylink_start(wx->phylink); + /* Set PF Reset Done bit so PF/VF Mail Ops can work */ + wr32m(wx, WX_CFG_PORT_CTL, + WX_CFG_PORT_CTL_PFRSTD, WX_CFG_PORT_CTL_PFRSTD); + if (wx->num_vfs) + wx_ping_all_vfs_with_link_status(wx, false); } /** @@ -518,6 +587,7 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_set_rx_mode = wx_set_rx_mode, .ndo_set_features = wx_set_features, .ndo_fix_features = wx_fix_features, + .ndo_features_check = wx_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, @@ -596,6 +666,10 @@ static int ngbe_probe(struct pci_dev *pdev, goto err_pci_release_regions; } + /* The emerald supports up to 8 VFs per pf, but physical + * function also need one pool for basic networking. + */ + pci_sriov_set_totalvfs(pdev, NGBE_MAX_VFS_DRV_LIMIT); wx->driver_name = ngbe_driver_name; ngbe_set_ethtool_ops(netdev); netdev->netdev_ops = &ngbe_netdev_ops; @@ -744,6 +818,7 @@ static void ngbe_remove(struct pci_dev *pdev) struct net_device *netdev; netdev = wx->netdev; + wx_disable_sriov(wx); unregister_netdev(netdev); phylink_destroy(wx->phylink); pci_release_selected_regions(pdev, @@ -803,6 +878,7 @@ static struct pci_driver ngbe_driver = { .suspend = ngbe_suspend, .resume = ngbe_resume, .shutdown = ngbe_shutdown, + .sriov_configure = wx_pci_sriov_configure, }; module_pci_driver(ngbe_driver); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index ea1d7e9a91f3..c63bb6e6f405 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -9,6 +9,7 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_ptp.h" #include "../libwx/wx_hw.h" +#include "../libwx/wx_sriov.h" #include "ngbe_type.h" #include "ngbe_mdio.h" @@ -70,6 +71,8 @@ static void ngbe_mac_link_down(struct phylink_config *config, wx->speed = SPEED_UNKNOWN; if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) wx_ptp_reset_cyclecounter(wx); + /* ping all the active vfs to let them know we are going down */ + wx_ping_all_vfs_with_link_status(wx, false); } static void ngbe_mac_link_up(struct phylink_config *config, @@ -114,6 +117,8 @@ static void ngbe_mac_link_up(struct phylink_config *config, wx->last_rx_ptp_check = jiffies; if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) wx_ptp_reset_cyclecounter(wx); + /* ping all the active vfs to let them know we are going up */ + wx_ping_all_vfs_with_link_status(wx, true); } static const struct phylink_mac_ops ngbe_mac_ops = { diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 992adbb98c7d..bb74263f0498 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -73,12 +73,14 @@ #define NGBE_PX_MISC_IEN_TIMESYNC BIT(11) #define NGBE_PX_MISC_IEN_ETH_LK BIT(18) #define NGBE_PX_MISC_IEN_INT_ERR BIT(20) +#define NGBE_PX_MISC_IC_VF_MBOX BIT(23) #define NGBE_PX_MISC_IEN_GPIO BIT(26) #define NGBE_PX_MISC_IEN_MASK ( \ NGBE_PX_MISC_IEN_DEV_RST | \ NGBE_PX_MISC_IEN_TIMESYNC | \ NGBE_PX_MISC_IEN_ETH_LK | \ NGBE_PX_MISC_IEN_INT_ERR | \ + NGBE_PX_MISC_IC_VF_MBOX | \ NGBE_PX_MISC_IEN_GPIO) /* Extended Interrupt Cause Read */ @@ -134,6 +136,7 @@ #define NGBE_MAX_RXD 8192 #define NGBE_MIN_RXD 128 +#define NGBE_MAX_VFS_DRV_LIMIT 7 extern char ngbe_driver_name[]; void ngbe_down(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 8658a51ee810..19878f02d956 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -7,6 +7,7 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_lib.h" #include "../libwx/wx_hw.h" +#include "../libwx/wx_sriov.h" #include "txgbe_type.h" #include "txgbe_phy.h" #include "txgbe_irq.h" @@ -109,8 +110,15 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) struct wx *wx = txgbe->wx; u32 eicr; - if (wx->pdev->msix_enabled) + if (wx->pdev->msix_enabled) { + eicr = wx_misc_isb(wx, WX_ISB_MISC); + txgbe->eicr = eicr; + if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) { + wx_msg_task(txgbe->wx); + wx_intr_enable(wx, TXGBE_INTR_MISC); + } return IRQ_WAKE_THREAD; + } eicr = wx_misc_isb(wx, WX_ISB_VEC0); if (!eicr) { @@ -129,6 +137,8 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) q_vector = wx->q_vector[0]; napi_schedule_irqoff(&q_vector->napi); + txgbe->eicr = wx_misc_isb(wx, WX_ISB_MISC); + return IRQ_WAKE_THREAD; } @@ -140,7 +150,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) unsigned int sub_irq; u32 eicr; - eicr = wx_misc_isb(wx, WX_ISB_MISC); + eicr = txgbe->eicr; if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_ETH_AN)) { sub_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_LINK); @@ -183,7 +193,7 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) if (wx->mac.type == wx_mac_aml) goto skip_sp_irq; - txgbe->misc.nirqs = 1; + txgbe->misc.nirqs = TXGBE_IRQ_MAX; txgbe->misc.domain = irq_domain_add_simple(NULL, txgbe->misc.nirqs, 0, &txgbe_misc_irq_domain_ops, txgbe); if (!txgbe->misc.domain) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 38206a46693b..f57d84628e07 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/etherdevice.h> #include <linux/phylink.h> +#include <net/udp_tunnel.h> #include <net/ip.h> #include <linux/if_vlan.h> @@ -15,6 +16,8 @@ #include "../libwx/wx_lib.h" #include "../libwx/wx_ptp.h" #include "../libwx/wx_hw.h" +#include "../libwx/wx_mbx.h" +#include "../libwx/wx_sriov.h" #include "txgbe_type.h" #include "txgbe_hw.h" #include "txgbe_phy.h" @@ -117,6 +120,12 @@ static void txgbe_up_complete(struct wx *wx) /* enable transmits */ netif_tx_start_all_queues(netdev); + + /* Set PF Reset Done bit so PF/VF Mail Ops can work */ + wr32m(wx, WX_CFG_PORT_CTL, WX_CFG_PORT_CTL_PFRSTD, + WX_CFG_PORT_CTL_PFRSTD); + /* update setting rx tx for all active vfs */ + wx_set_all_vfs(wx); } static void txgbe_reset(struct wx *wx) @@ -165,6 +174,16 @@ static void txgbe_disable_device(struct wx *wx) wx_err(wx, "%s: invalid bus lan id %d\n", __func__, wx->bus.func); + if (wx->num_vfs) { + /* Clear EITR Select mapping */ + wr32(wx, WX_PX_ITRSEL, 0); + /* Mark all the VFs as inactive */ + for (i = 0; i < wx->num_vfs; i++) + wx->vfinfo[i].clear_to_send = 0; + /* update setting rx tx for all active vfs */ + wx_set_all_vfs(wx); + } + if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || ((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) { /* disable mac transmiter */ @@ -307,12 +326,15 @@ static int txgbe_sw_init(struct wx *wx) /* set default ring sizes */ wx->tx_ring_count = TXGBE_DEFAULT_TXD; wx->rx_ring_count = TXGBE_DEFAULT_RXD; + wx->mbx.size = WX_VXMAILBOX_SIZE; /* set default work limits */ wx->tx_work_limit = TXGBE_DEFAULT_TX_WORK; wx->rx_work_limit = TXGBE_DEFAULT_RX_WORK; + wx->setup_tc = txgbe_setup_tc; wx->do_reset = txgbe_do_reset; + set_bit(0, &wx->fwd_bitmask); switch (wx->mac.type) { case wx_mac_sp: @@ -516,6 +538,39 @@ void txgbe_do_reset(struct net_device *netdev) txgbe_reset(wx); } +static int txgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table) +{ + struct wx *wx = netdev_priv(dev); + struct udp_tunnel_info ti; + + udp_tunnel_nic_get_port(dev, table, 0, &ti); + switch (ti.type) { + case UDP_TUNNEL_TYPE_VXLAN: + wr32(wx, TXGBE_CFG_VXLAN, ntohs(ti.port)); + break; + case UDP_TUNNEL_TYPE_VXLAN_GPE: + wr32(wx, TXGBE_CFG_VXLAN_GPE, ntohs(ti.port)); + break; + case UDP_TUNNEL_TYPE_GENEVE: + wr32(wx, TXGBE_CFG_GENEVE, ntohs(ti.port)); + break; + default: + break; + } + + return 0; +} + +static const struct udp_tunnel_nic_info txgbe_udp_tunnels = { + .sync_table = txgbe_udp_tunnel_sync, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .tables = { + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN_GPE, }, + { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, + }, +}; + static const struct net_device_ops txgbe_netdev_ops = { .ndo_open = txgbe_open, .ndo_stop = txgbe_close, @@ -524,6 +579,7 @@ static const struct net_device_ops txgbe_netdev_ops = { .ndo_set_rx_mode = wx_set_rx_mode, .ndo_set_features = wx_set_features, .ndo_fix_features = wx_fix_features, + .ndo_features_check = wx_features_check, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, @@ -604,9 +660,14 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_pci_release_regions; } + /* The sapphire supports up to 63 VFs per pf, but physical + * function also need one pool for basic networking. + */ + pci_sriov_set_totalvfs(pdev, TXGBE_MAX_VFS_DRV_LIMIT); wx->driver_name = txgbe_driver_name; txgbe_set_ethtool_ops(netdev); netdev->netdev_ops = &txgbe_netdev_ops; + netdev->udp_tunnel_nic_info = &txgbe_udp_tunnels; /* setup the private structure */ err = txgbe_sw_init(wx); @@ -652,6 +713,7 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; netdev->hw_features |= NETIF_F_GRO; netdev->features |= NETIF_F_GRO; + netdev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; @@ -795,6 +857,7 @@ static void txgbe_remove(struct pci_dev *pdev) struct net_device *netdev; netdev = wx->netdev; + wx_disable_sriov(wx); unregister_netdev(netdev); txgbe_remove_phy(txgbe); @@ -817,11 +880,12 @@ static struct pci_driver txgbe_driver = { .probe = txgbe_probe, .remove = txgbe_remove, .shutdown = txgbe_shutdown, + .sriov_configure = wx_pci_sriov_configure, }; module_pci_driver(txgbe_driver); MODULE_DEVICE_TABLE(pci, txgbe_pci_tbl); MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <software@trustnetic.com>"); -MODULE_DESCRIPTION("WangXun(R) 10 Gigabit PCI Express Network Driver"); +MODULE_DESCRIPTION("WangXun(R) 10/25/40 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 85f022ceef4f..1863cfd27ee7 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -16,6 +16,8 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_lib.h" #include "../libwx/wx_ptp.h" +#include "../libwx/wx_sriov.h" +#include "../libwx/wx_mbx.h" #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_phy.h" @@ -184,6 +186,8 @@ static void txgbe_mac_link_down(struct phylink_config *config, wx->speed = SPEED_UNKNOWN; if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) wx_ptp_reset_cyclecounter(wx); + /* ping all the active vfs to let them know we are going down */ + wx_ping_all_vfs_with_link_status(wx, false); } static void txgbe_mac_link_up(struct phylink_config *config, @@ -225,6 +229,8 @@ static void txgbe_mac_link_up(struct phylink_config *config, wx->last_rx_ptp_check = jiffies; if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) wx_ptp_reset_cyclecounter(wx); + /* ping all the active vfs to let them know we are going up */ + wx_ping_all_vfs_with_link_status(wx, true); } static int txgbe_mac_prepare(struct phylink_config *config, unsigned int mode, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h index 3938985355ed..a32b19d71ea2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h @@ -8,4 +8,4 @@ irqreturn_t txgbe_link_irq_handler(int irq, void *data); int txgbe_init_phy(struct txgbe *txgbe); void txgbe_remove_phy(struct txgbe *txgbe); -#endif /* _TXGBE_NODE_H_ */ +#endif /* _TXGBE_PHY_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index f423012dec22..261a83308568 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -77,15 +77,20 @@ #define TXGBE_PX_MISC_ETH_LK BIT(18) #define TXGBE_PX_MISC_ETH_AN BIT(19) #define TXGBE_PX_MISC_INT_ERR BIT(20) +#define TXGBE_PX_MISC_IC_VF_MBOX BIT(23) #define TXGBE_PX_MISC_GPIO BIT(26) #define TXGBE_PX_MISC_IEN_MASK \ (TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_DEV_RST | \ TXGBE_PX_MISC_ETH_EVENT | TXGBE_PX_MISC_ETH_LK | \ - TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR) + TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR | \ + TXGBE_PX_MISC_IC_VF_MBOX) /* Port cfg registers */ #define TXGBE_CFG_PORT_ST 0x14404 #define TXGBE_CFG_PORT_ST_LINK_UP BIT(0) +#define TXGBE_CFG_VXLAN 0x14410 +#define TXGBE_CFG_VXLAN_GPE 0x14414 +#define TXGBE_CFG_GENEVE 0x14418 /* I2C registers */ #define TXGBE_I2C_BASE 0x14900 @@ -176,6 +181,8 @@ #define TXGBE_SP_RX_PB_SIZE 512 #define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */ +#define TXGBE_MAX_VFS_DRV_LIMIT 63 + #define TXGBE_DEFAULT_ATR_SAMPLE_RATE 20 /* Software ATR hash keys */ @@ -350,6 +357,7 @@ struct txgbe { struct clk *clk; struct gpio_chip *gpio; unsigned int link_irq; + u32 eicr; /* flow director */ struct hlist_head fdir_filter_list; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 054abf283ab3..1b7a653c1f4e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2980,7 +2980,7 @@ static int axienet_probe(struct platform_device *pdev) } } if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) { - dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n"); + dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit architecture\n"); ret = -EINVAL; goto cleanup_clk; } diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index a2ab1c150822..e1e7f65553e7 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -394,16 +394,20 @@ static void ixp_tx_timestamp(struct port *port, struct sk_buff *skb) __raw_writel(TX_SNAPSHOT_LOCKED, ®s->channel[ch].ch_event); } -static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) +static int ixp4xx_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - struct hwtstamp_config cfg; struct ixp46x_ts_regs *regs; struct port *port = netdev_priv(netdev); int ret; int ch; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; + if (!cpu_is_ixp46x()) + return -EOPNOTSUPP; + + if (!netif_running(netdev)) + return -EINVAL; ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index); if (ret) @@ -412,10 +416,10 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) ch = PORT2CHANNEL(port); regs = port->timesync_regs; - if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON) + if (cfg->tx_type != HWTSTAMP_TX_OFF && cfg->tx_type != HWTSTAMP_TX_ON) return -ERANGE; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: port->hwts_rx_en = 0; break; @@ -431,39 +435,45 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr) return -ERANGE; } - port->hwts_tx_en = cfg.tx_type == HWTSTAMP_TX_ON; + port->hwts_tx_en = cfg->tx_type == HWTSTAMP_TX_ON; /* Clear out any old time stamps. */ __raw_writel(TX_SNAPSHOT_LOCKED | RX_SNAPSHOT_LOCKED, ®s->channel[ch].ch_event); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } -static int hwtstamp_get(struct net_device *netdev, struct ifreq *ifr) +static int ixp4xx_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *cfg) { - struct hwtstamp_config cfg; struct port *port = netdev_priv(netdev); - cfg.flags = 0; - cfg.tx_type = port->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + if (!cpu_is_ixp46x()) + return -EOPNOTSUPP; + + if (!netif_running(netdev)) + return -EINVAL; + + cfg->flags = 0; + cfg->tx_type = port->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; switch (port->hwts_rx_en) { case 0: - cfg.rx_filter = HWTSTAMP_FILTER_NONE; + cfg->rx_filter = HWTSTAMP_FILTER_NONE; break; case PTP_SLAVE_MODE: - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_SYNC; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_SYNC; break; case PTP_MASTER_MODE: - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; break; default: WARN_ON_ONCE(1); return -ERANGE; } - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int ixp4xx_mdio_cmd(struct mii_bus *bus, int phy_id, int location, @@ -985,21 +995,6 @@ static void eth_set_mcast_list(struct net_device *dev) } -static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd) -{ - if (!netif_running(dev)) - return -EINVAL; - - if (cpu_is_ixp46x()) { - if (cmd == SIOCSHWTSTAMP) - return hwtstamp_set(dev, req); - if (cmd == SIOCGHWTSTAMP) - return hwtstamp_get(dev, req); - } - - return phy_mii_ioctl(dev->phydev, req, cmd); -} - /* ethtool support */ static void ixp4xx_get_drvinfo(struct net_device *dev, @@ -1433,9 +1428,11 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_change_mtu = ixp4xx_eth_change_mtu, .ndo_start_xmit = eth_xmit, .ndo_set_rx_mode = eth_set_mcast_list, - .ndo_eth_ioctl = eth_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, + .ndo_hwtstamp_get = ixp4xx_hwtstamp_get, + .ndo_hwtstamp_set = ixp4xx_hwtstamp_set, }; static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 66e38ce9cd1d..ffc15a432689 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1946,22 +1946,14 @@ static __net_init int geneve_init_net(struct net *net) return 0; } -static void geneve_destroy_tunnels(struct net *net, struct list_head *head) +static void __net_exit geneve_exit_rtnl_net(struct net *net, + struct list_head *dev_to_kill) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) - geneve_dellink(geneve->dev, head); -} - -static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) -{ - struct net *net; - - list_for_each_entry(net, net_list, exit_list) - geneve_destroy_tunnels(net, dev_to_kill); + geneve_dellink(geneve->dev, dev_to_kill); } static void __net_exit geneve_exit_net(struct net *net) @@ -1973,7 +1965,7 @@ static void __net_exit geneve_exit_net(struct net *net) static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit_batch_rtnl = geneve_exit_batch_rtnl, + .exit_rtnl = geneve_exit_rtnl_net, .exit = geneve_exit_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index ef793607890d..d4dec741c7f4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -2475,23 +2475,19 @@ static int __net_init gtp_net_init(struct net *net) return 0; } -static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) +static void __net_exit gtp_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - struct net *net; - - list_for_each_entry(net, net_list, exit_list) { - struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp, *gtp_next; + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp, *gtp_next; - list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) - gtp_dellink(gtp->dev, dev_to_kill); - } + list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) + gtp_dellink(gtp->dev, dev_to_kill); } static struct pernet_operations gtp_net_ops = { .init = gtp_net_init, - .exit_batch_rtnl = gtp_net_exit_batch_rtnl, + .exit_rtnl = gtp_net_exit_rtnl, .id = >p_net_id, .size = sizeof(struct gtp_net), }; diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 9e366f275406..5fda7a0fcce0 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -1074,7 +1074,7 @@ static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr, return 0; case HDLCDRVCTL_DRIVERNAME: - strscpy_pad(hi.data.drivername, "baycom_epp", sizeof(hi.data.drivername)); + strscpy_pad(hi.data.drivername, "baycom_epp"); break; case HDLCDRVCTL_GETMODE: @@ -1091,8 +1091,7 @@ static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr, return baycom_setmode(bc, hi.data.modename); case HDLCDRVCTL_MODELIST: - strscpy_pad(hi.data.modename, "intclk,extclk,intmodem,extmodem,divider=x", - sizeof(hi.data.modename)); + strscpy_pad(hi.data.modename, "intclk,extclk,intmodem,extmodem,divider=x"); break; case HDLCDRVCTL_MODEMPARMASK: diff --git a/drivers/net/ipa/data/ipa_data-v3.1.c b/drivers/net/ipa/data/ipa_data-v3.1.c index e902d731776d..65dba4729155 100644 --- a/drivers/net/ipa/data/ipa_data-v3.1.c +++ b/drivers/net/ipa/data/ipa_data-v3.1.c @@ -493,7 +493,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146bd000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00002000, }; diff --git a/drivers/net/ipa/data/ipa_data-v3.5.1.c b/drivers/net/ipa/data/ipa_data-v3.5.1.c index f632aab56f4c..315e617a8eeb 100644 --- a/drivers/net/ipa/data/ipa_data-v3.5.1.c +++ b/drivers/net/ipa/data/ipa_data-v3.5.1.c @@ -374,7 +374,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146bd000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00002000, }; diff --git a/drivers/net/ipa/data/ipa_data-v4.11.c b/drivers/net/ipa/data/ipa_data-v4.11.c index c1428483ca34..f5d66779c2fb 100644 --- a/drivers/net/ipa/data/ipa_data-v4.11.c +++ b/drivers/net/ipa/data/ipa_data-v4.11.c @@ -367,7 +367,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146a8000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00009000, }; diff --git a/drivers/net/ipa/data/ipa_data-v4.2.c b/drivers/net/ipa/data/ipa_data-v4.2.c index 2c7e8cb429b9..f5ed5d745aeb 100644 --- a/drivers/net/ipa/data/ipa_data-v4.2.c +++ b/drivers/net/ipa/data/ipa_data-v4.2.c @@ -340,7 +340,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146a8000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00002000, }; diff --git a/drivers/net/ipa/data/ipa_data-v4.5.c b/drivers/net/ipa/data/ipa_data-v4.5.c index 57dc78c526b0..730d8c43a45c 100644 --- a/drivers/net/ipa/data/ipa_data-v4.5.c +++ b/drivers/net/ipa/data/ipa_data-v4.5.c @@ -418,7 +418,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x14688000, .imem_size = 0x00003000, - .smem_id = 497, .smem_size = 0x00009000, }; diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 41f212209993..5e1d9049c62b 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -360,7 +360,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146a8000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00009000, }; diff --git a/drivers/net/ipa/data/ipa_data-v4.9.c b/drivers/net/ipa/data/ipa_data-v4.9.c index 4eb9c909d5b3..da472a2a2e29 100644 --- a/drivers/net/ipa/data/ipa_data-v4.9.c +++ b/drivers/net/ipa/data/ipa_data-v4.9.c @@ -416,7 +416,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x146bd000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x00009000, }; diff --git a/drivers/net/ipa/data/ipa_data-v5.0.c b/drivers/net/ipa/data/ipa_data-v5.0.c index 050580c99b65..bc5722e4b053 100644 --- a/drivers/net/ipa/data/ipa_data-v5.0.c +++ b/drivers/net/ipa/data/ipa_data-v5.0.c @@ -442,7 +442,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x14688000, .imem_size = 0x00003000, - .smem_id = 497, .smem_size = 0x00009000, }; diff --git a/drivers/net/ipa/data/ipa_data-v5.5.c b/drivers/net/ipa/data/ipa_data-v5.5.c index 0e6663e22533..741ae21d9d78 100644 --- a/drivers/net/ipa/data/ipa_data-v5.5.c +++ b/drivers/net/ipa/data/ipa_data-v5.5.c @@ -448,7 +448,6 @@ static const struct ipa_mem_data ipa_mem_data = { .local = ipa_mem_local_data, .imem_addr = 0x14688000, .imem_size = 0x00002000, - .smem_id = 497, .smem_size = 0x0000b000, }; diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index d88cbbbf18b7..2fd03f0799b2 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -180,7 +180,6 @@ struct ipa_resource_data { * @local: array of IPA-local memory region descriptors * @imem_addr: physical address of IPA region within IMEM * @imem_size: size in bytes of IPA IMEM region - * @smem_id: item identifier for IPA region within SMEM memory * @smem_size: size in bytes of the IPA SMEM region */ struct ipa_mem_data { @@ -188,7 +187,6 @@ struct ipa_mem_data { const struct ipa_mem *local; u32 imem_addr; u32 imem_size; - u32 smem_id; u32 smem_size; }; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index dee985eb08cb..835a3c9c1fd4 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -26,6 +26,8 @@ /* SMEM host id representing the modem. */ #define QCOM_SMEM_HOST_MODEM 1 +#define SMEM_IPA_FILTER_TABLE 497 + const struct ipa_mem *ipa_mem_find(struct ipa *ipa, enum ipa_mem_id mem_id) { u32 i; @@ -509,7 +511,6 @@ static void ipa_imem_exit(struct ipa *ipa) /** * ipa_smem_init() - Initialize SMEM memory used by the IPA * @ipa: IPA pointer - * @item: Item ID of SMEM memory * @size: Size (bytes) of SMEM memory region * * SMEM is a managed block of shared DRAM, from which numbered "items" @@ -523,7 +524,7 @@ static void ipa_imem_exit(struct ipa *ipa) * * Note: @size and the item address are is not guaranteed to be page-aligned. */ -static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) +static int ipa_smem_init(struct ipa *ipa, size_t size) { struct device *dev = ipa->dev; struct iommu_domain *domain; @@ -545,25 +546,25 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) * The item might have already been allocated, in which case we * use it unless the size isn't what we expect. */ - ret = qcom_smem_alloc(QCOM_SMEM_HOST_MODEM, item, size); + ret = qcom_smem_alloc(QCOM_SMEM_HOST_MODEM, SMEM_IPA_FILTER_TABLE, size); if (ret && ret != -EEXIST) { - dev_err(dev, "error %d allocating size %zu SMEM item %u\n", - ret, size, item); + dev_err(dev, "error %d allocating size %zu SMEM item\n", + ret, size); return ret; } /* Now get the address of the SMEM memory region */ - virt = qcom_smem_get(QCOM_SMEM_HOST_MODEM, item, &actual); + virt = qcom_smem_get(QCOM_SMEM_HOST_MODEM, SMEM_IPA_FILTER_TABLE, &actual); if (IS_ERR(virt)) { ret = PTR_ERR(virt); - dev_err(dev, "error %d getting SMEM item %u\n", ret, item); + dev_err(dev, "error %d getting SMEM item\n", ret); return ret; } /* In case the region was already allocated, verify the size */ if (ret && actual != size) { - dev_err(dev, "SMEM item %u has size %zu, expected %zu\n", - item, actual, size); + dev_err(dev, "SMEM item has size %zu, expected %zu\n", + actual, size); return -EINVAL; } @@ -659,7 +660,7 @@ int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, if (ret) goto err_unmap; - ret = ipa_smem_init(ipa, mem_data->smem_id, mem_data->smem_size); + ret = ipa_smem_init(ipa, mem_data->smem_size); if (ret) goto err_imem_exit; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index ca62188a317a..e3e65772c599 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -219,7 +219,7 @@ void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) unsigned int ipvlan_mac_hash(const unsigned char *addr) { - u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), + u32 hash = jhash_1word(get_unaligned((u32 *)(addr + 2)), ipvlan_jhash_secret); return hash & IPVLAN_MAC_FILTER_MASK; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d0dfa6bca6cc..7045b1d58754 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -254,7 +254,7 @@ static u32 macvlan_hash_mix(const struct macvlan_dev *vlan) static unsigned int mc_hash(const struct macvlan_dev *vlan, const unsigned char *addr) { - u32 val = __get_unaligned_cpu32(addr + 2); + u32 val = get_unaligned((u32 *)(addr + 2)); val ^= macvlan_hash_mix(vlan); return hash_32(val, MACVLAN_MC_FILTER_BITS); diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig index 4a7a303be2f7..d3219ca194ec 100644 --- a/drivers/net/mdio/Kconfig +++ b/drivers/net/mdio/Kconfig @@ -19,33 +19,25 @@ config MDIO_BUS reflects whether the mdio_bus/mdio_device code is built as a loadable module or built-in. +if PHYLIB + config FWNODE_MDIO - def_tristate PHYLIB - depends on (ACPI || OF) || COMPILE_TEST + def_tristate (ACPI || OF) || COMPILE_TEST select FIXED_PHY help FWNODE MDIO bus (Ethernet PHY) accessors config OF_MDIO - def_tristate PHYLIB - depends on OF - depends on PHYLIB + def_tristate OF select FIXED_PHY help OpenFirmware MDIO bus (Ethernet PHY) accessors config ACPI_MDIO - def_tristate PHYLIB - depends on ACPI - depends on PHYLIB + def_tristate ACPI help ACPI MDIO bus (Ethernet PHY) accessors -if MDIO_BUS - -config MDIO_DEVRES - tristate - config MDIO_SUN4I tristate "Allwinner sun4i MDIO interface support" depends on ARCH_SUNXI || COMPILE_TEST @@ -65,7 +57,6 @@ config MDIO_ASPEED tristate "ASPEED MDIO bus controller" depends on ARCH_ASPEED || COMPILE_TEST depends on OF_MDIO && HAS_IOMEM - depends on MDIO_DEVRES help This module provides a driver for the independent MDIO bus controllers found in the ASPEED AST2600 SoC. This is a driver for the @@ -135,7 +126,6 @@ config MDIO_I2C config MDIO_MVUSB tristate "Marvell USB to MDIO Adapter" depends on USB - select MDIO_DEVRES help A USB to MDIO converter present on development boards for Marvell's Link Street family of Ethernet switches. @@ -143,7 +133,6 @@ config MDIO_MVUSB config MDIO_MSCC_MIIM tristate "Microsemi MIIM interface support" depends on HAS_IOMEM && REGMAP_MMIO - select MDIO_DEVRES help This driver supports the MIIM (MDIO) interface found in the network switches of the Microsemi SoCs; it is recommended to switch on @@ -161,7 +150,6 @@ config MDIO_OCTEON depends on (64BIT && OF_MDIO) || COMPILE_TEST depends on HAS_IOMEM select MDIO_CAVIUM - select MDIO_DEVRES help This module provides a driver for the Octeon and ThunderX MDIO buses. It is required by the Octeon and ThunderX ethernet device @@ -171,7 +159,6 @@ config MDIO_IPQ4019 tristate "Qualcomm IPQ4019 MDIO interface support" depends on HAS_IOMEM && OF_MDIO depends on COMMON_CLK - depends on MDIO_DEVRES help This driver supports the MDIO interface found in Qualcomm IPQ40xx, IPQ60xx, IPQ807x and IPQ50xx series Soc-s. @@ -180,11 +167,17 @@ config MDIO_IPQ8064 tristate "Qualcomm IPQ8064 MDIO interface support" depends on HAS_IOMEM && OF_MDIO depends on MFD_SYSCON - depends on MDIO_DEVRES help This driver supports the MDIO interface found in the network interface units of the IPQ8064 SoC +config MDIO_REALTEK_RTL9300 + tristate "Realtek RTL9300 MDIO interface support" + depends on MACH_REALTEK_RTL || COMPILE_TEST + help + This driver supports the MDIO interface found in the Realtek + RTL9300 family of Ethernet switches with integrated SoC. + config MDIO_REGMAP tristate help @@ -201,7 +194,6 @@ config MDIO_THUNDER depends on 64BIT depends on PCI select MDIO_CAVIUM - select MDIO_DEVRES help This driver supports the MDIO interfaces found on Cavium ThunderX SoCs when the MDIO bus device appears as a PCI diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile index 1015f0db4531..c23778e73890 100644 --- a/drivers/net/mdio/Makefile +++ b/drivers/net/mdio/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o obj-$(CONFIG_MDIO_MSCC_MIIM) += mdio-mscc-miim.o obj-$(CONFIG_MDIO_MVUSB) += mdio-mvusb.o obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o +obj-$(CONFIG_MDIO_REALTEK_RTL9300) += mdio-realtek-rtl9300.o obj-$(CONFIG_MDIO_REGMAP) += mdio-regmap.o obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index 074d96328f41..b6e30bdf5325 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -334,9 +334,9 @@ static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops, NULL, unimac_mdio_resume); static const struct of_device_id unimac_mdio_ids[] = { + { .compatible = "brcm,asp-v3.0-mdio", }, { .compatible = "brcm,asp-v2.2-mdio", }, { .compatible = "brcm,asp-v2.1-mdio", }, - { .compatible = "brcm,asp-v2.0-mdio", }, { .compatible = "brcm,bcm6846-mdio", }, { .compatible = "brcm,genet-mdio-v5", }, { .compatible = "brcm,genet-mdio-v4", }, diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c new file mode 100644 index 000000000000..33694c3ff9a7 --- /dev/null +++ b/drivers/net/mdio/mdio-realtek-rtl9300.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MDIO controller for RTL9300 switches with integrated SoC. + * + * The MDIO communication is abstracted by the switch. At the software level + * communication uses the switch port to address the PHY. We work out the + * mapping based on the MDIO bus described in device tree and phandles on the + * ethernet-ports property. + */ + +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bits.h> +#include <linux/find.h> +#include <linux/mdio.h> +#include <linux/mfd/syscon.h> +#include <linux/mod_devicetable.h> +#include <linux/mutex.h> +#include <linux/of_mdio.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> + +#define SMI_GLB_CTRL 0xca00 +#define GLB_CTRL_INTF_SEL(intf) BIT(16 + (intf)) +#define SMI_PORT0_15_POLLING_SEL 0xca08 +#define SMI_ACCESS_PHY_CTRL_0 0xcb70 +#define SMI_ACCESS_PHY_CTRL_1 0xcb74 +#define PHY_CTRL_REG_ADDR GENMASK(24, 20) +#define PHY_CTRL_PARK_PAGE GENMASK(19, 15) +#define PHY_CTRL_MAIN_PAGE GENMASK(14, 3) +#define PHY_CTRL_WRITE BIT(2) +#define PHY_CTRL_READ 0 +#define PHY_CTRL_TYPE_C45 BIT(1) +#define PHY_CTRL_TYPE_C22 0 +#define PHY_CTRL_CMD BIT(0) +#define PHY_CTRL_FAIL BIT(25) +#define SMI_ACCESS_PHY_CTRL_2 0xcb78 +#define PHY_CTRL_INDATA GENMASK(31, 16) +#define PHY_CTRL_DATA GENMASK(15, 0) +#define SMI_ACCESS_PHY_CTRL_3 0xcb7c +#define PHY_CTRL_MMD_DEVAD GENMASK(20, 16) +#define PHY_CTRL_MMD_REG GENMASK(15, 0) +#define SMI_PORT0_5_ADDR_CTRL 0xcb80 + +#define MAX_PORTS 28 +#define MAX_SMI_BUSSES 4 +#define MAX_SMI_ADDR 0x1f + +struct rtl9300_mdio_priv { + struct regmap *regmap; + struct mutex lock; /* protect HW access */ + DECLARE_BITMAP(valid_ports, MAX_PORTS); + u8 smi_bus[MAX_PORTS]; + u8 smi_addr[MAX_PORTS]; + bool smi_bus_is_c45[MAX_SMI_BUSSES]; + struct mii_bus *bus[MAX_SMI_BUSSES]; +}; + +struct rtl9300_mdio_chan { + struct rtl9300_mdio_priv *priv; + u8 mdio_bus; +}; + +static int rtl9300_mdio_phy_to_port(struct mii_bus *bus, int phy_id) +{ + struct rtl9300_mdio_chan *chan = bus->priv; + struct rtl9300_mdio_priv *priv; + int i; + + priv = chan->priv; + + for_each_set_bit(i, priv->valid_ports, MAX_PORTS) + if (priv->smi_bus[i] == chan->mdio_bus && + priv->smi_addr[i] == phy_id) + return i; + + return -ENOENT; +} + +static int rtl9300_mdio_wait_ready(struct rtl9300_mdio_priv *priv) +{ + struct regmap *regmap = priv->regmap; + u32 val; + + lockdep_assert_held(&priv->lock); + + return regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1, + val, !(val & PHY_CTRL_CMD), 10, 1000); +} + +static int rtl9300_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum) +{ + struct rtl9300_mdio_chan *chan = bus->priv; + struct rtl9300_mdio_priv *priv; + struct regmap *regmap; + int port; + u32 val; + int err; + + priv = chan->priv; + regmap = priv->regmap; + + port = rtl9300_mdio_phy_to_port(bus, phy_id); + if (port < 0) + return port; + + mutex_lock(&priv->lock); + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, FIELD_PREP(PHY_CTRL_INDATA, port)); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_REG_ADDR, regnum) | + FIELD_PREP(PHY_CTRL_PARK_PAGE, 0x1f) | + FIELD_PREP(PHY_CTRL_MAIN_PAGE, 0xfff) | + PHY_CTRL_READ | PHY_CTRL_TYPE_C22 | PHY_CTRL_CMD; + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, val); + if (err) + goto out_err; + + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + err = regmap_read(regmap, SMI_ACCESS_PHY_CTRL_2, &val); + if (err) + goto out_err; + + mutex_unlock(&priv->lock); + return FIELD_GET(PHY_CTRL_DATA, val); + +out_err: + mutex_unlock(&priv->lock); + return err; +} + +static int rtl9300_mdio_write_c22(struct mii_bus *bus, int phy_id, int regnum, u16 value) +{ + struct rtl9300_mdio_chan *chan = bus->priv; + struct rtl9300_mdio_priv *priv; + struct regmap *regmap; + int port; + u32 val; + int err; + + priv = chan->priv; + regmap = priv->regmap; + + port = rtl9300_mdio_phy_to_port(bus, phy_id); + if (port < 0) + return port; + + mutex_lock(&priv->lock); + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_0, BIT(port)); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, FIELD_PREP(PHY_CTRL_INDATA, value)); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_REG_ADDR, regnum) | + FIELD_PREP(PHY_CTRL_PARK_PAGE, 0x1f) | + FIELD_PREP(PHY_CTRL_MAIN_PAGE, 0xfff) | + PHY_CTRL_WRITE | PHY_CTRL_TYPE_C22 | PHY_CTRL_CMD; + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, val); + if (err) + goto out_err; + + err = regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1, + val, !(val & PHY_CTRL_CMD), 10, 100); + if (err) + goto out_err; + + if (val & PHY_CTRL_FAIL) { + err = -ENXIO; + goto out_err; + } + + mutex_unlock(&priv->lock); + return 0; + +out_err: + mutex_unlock(&priv->lock); + return err; +} + +static int rtl9300_mdio_read_c45(struct mii_bus *bus, int phy_id, int dev_addr, int regnum) +{ + struct rtl9300_mdio_chan *chan = bus->priv; + struct rtl9300_mdio_priv *priv; + struct regmap *regmap; + int port; + u32 val; + int err; + + priv = chan->priv; + regmap = priv->regmap; + + port = rtl9300_mdio_phy_to_port(bus, phy_id); + if (port < 0) + return port; + + mutex_lock(&priv->lock); + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_INDATA, port); + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, val); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) | + FIELD_PREP(PHY_CTRL_MMD_REG, regnum); + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_3, val); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, + PHY_CTRL_READ | PHY_CTRL_TYPE_C45 | PHY_CTRL_CMD); + if (err) + goto out_err; + + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + err = regmap_read(regmap, SMI_ACCESS_PHY_CTRL_2, &val); + if (err) + goto out_err; + + mutex_unlock(&priv->lock); + return FIELD_GET(PHY_CTRL_DATA, val); + +out_err: + mutex_unlock(&priv->lock); + return err; +} + +static int rtl9300_mdio_write_c45(struct mii_bus *bus, int phy_id, int dev_addr, + int regnum, u16 value) +{ + struct rtl9300_mdio_chan *chan = bus->priv; + struct rtl9300_mdio_priv *priv; + struct regmap *regmap; + int port; + u32 val; + int err; + + priv = chan->priv; + regmap = priv->regmap; + + port = rtl9300_mdio_phy_to_port(bus, phy_id); + if (port < 0) + return port; + + mutex_lock(&priv->lock); + err = rtl9300_mdio_wait_ready(priv); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_0, BIT(port)); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_INDATA, value); + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, val); + if (err) + goto out_err; + + val = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) | + FIELD_PREP(PHY_CTRL_MMD_REG, regnum); + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_3, val); + if (err) + goto out_err; + + err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, + PHY_CTRL_TYPE_C45 | PHY_CTRL_WRITE | PHY_CTRL_CMD); + if (err) + goto out_err; + + err = regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1, + val, !(val & PHY_CTRL_CMD), 10, 100); + if (err) + goto out_err; + + if (val & PHY_CTRL_FAIL) { + err = -ENXIO; + goto out_err; + } + + mutex_unlock(&priv->lock); + return 0; + +out_err: + mutex_unlock(&priv->lock); + return err; +} + +static int rtl9300_mdiobus_init(struct rtl9300_mdio_priv *priv) +{ + u32 glb_ctrl_mask = 0, glb_ctrl_val = 0; + struct regmap *regmap = priv->regmap; + u32 port_addr[5] = { 0 }; + u32 poll_sel[2] = { 0 }; + int i, err; + + /* Associate the port with the SMI interface and PHY */ + for_each_set_bit(i, priv->valid_ports, MAX_PORTS) { + int pos; + + pos = (i % 6) * 5; + port_addr[i / 6] |= (priv->smi_addr[i] & 0x1f) << pos; + + pos = (i % 16) * 2; + poll_sel[i / 16] |= (priv->smi_bus[i] & 0x3) << pos; + } + + /* Put the interfaces into C45 mode if required */ + glb_ctrl_mask = GENMASK(19, 16); + for (i = 0; i < MAX_SMI_BUSSES; i++) + if (priv->smi_bus_is_c45[i]) + glb_ctrl_val |= GLB_CTRL_INTF_SEL(i); + + err = regmap_bulk_write(regmap, SMI_PORT0_5_ADDR_CTRL, + port_addr, 5); + if (err) + return err; + + err = regmap_bulk_write(regmap, SMI_PORT0_15_POLLING_SEL, + poll_sel, 2); + if (err) + return err; + + err = regmap_update_bits(regmap, SMI_GLB_CTRL, + glb_ctrl_mask, glb_ctrl_val); + if (err) + return err; + + return 0; +} + +static int rtl9300_mdiobus_probe_one(struct device *dev, struct rtl9300_mdio_priv *priv, + struct fwnode_handle *node) +{ + struct rtl9300_mdio_chan *chan; + struct fwnode_handle *child; + struct mii_bus *bus; + u32 mdio_bus; + int err; + + err = fwnode_property_read_u32(node, "reg", &mdio_bus); + if (err) + return err; + + /* The MDIO accesses from the kernel work with the PHY polling unit in + * the switch. We need to tell the PPU to operate either in GPHY (i.e. + * clause 22) or 10GPHY mode (i.e. clause 45). + * + * We select 10GPHY mode if there is at least one PHY that declares + * compatible = "ethernet-phy-ieee802.3-c45". This does mean we can't + * support both c45 and c22 on the same MDIO bus. + */ + fwnode_for_each_child_node(node, child) + if (fwnode_device_is_compatible(child, "ethernet-phy-ieee802.3-c45")) + priv->smi_bus_is_c45[mdio_bus] = true; + + bus = devm_mdiobus_alloc_size(dev, sizeof(*chan)); + if (!bus) + return -ENOMEM; + + bus->name = "Realtek Switch MDIO Bus"; + if (priv->smi_bus_is_c45[mdio_bus]) { + bus->read_c45 = rtl9300_mdio_read_c45; + bus->write_c45 = rtl9300_mdio_write_c45; + } else { + bus->read = rtl9300_mdio_read_c22; + bus->write = rtl9300_mdio_write_c22; + } + bus->parent = dev; + chan = bus->priv; + chan->mdio_bus = mdio_bus; + chan->priv = priv; + + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", dev_name(dev), mdio_bus); + + err = devm_of_mdiobus_register(dev, bus, to_of_node(node)); + if (err) + return dev_err_probe(dev, err, "cannot register MDIO bus\n"); + + return 0; +} + +/* The mdio-controller is part of a switch block so we parse the sibling + * ethernet-ports node and build a mapping of the switch port to MDIO bus/addr + * based on the phy-handle. + */ +static int rtl9300_mdiobus_map_ports(struct device *dev) +{ + struct rtl9300_mdio_priv *priv = dev_get_drvdata(dev); + struct device *parent = dev->parent; + struct fwnode_handle *port; + int err; + + struct fwnode_handle *ports __free(fwnode_handle) = + device_get_named_child_node(parent, "ethernet-ports"); + if (!ports) + return dev_err_probe(dev, -EINVAL, "%pfwP missing ethernet-ports\n", + dev_fwnode(parent)); + + fwnode_for_each_child_node(ports, port) { + struct device_node *mdio_dn; + u32 addr; + u32 bus; + u32 pn; + + struct device_node *phy_dn __free(device_node) = + of_parse_phandle(to_of_node(port), "phy-handle", 0); + /* skip ports without phys */ + if (!phy_dn) + continue; + + mdio_dn = phy_dn->parent; + /* only map ports that are connected to this mdio-controller */ + if (mdio_dn->parent != dev->of_node) + continue; + + err = fwnode_property_read_u32(port, "reg", &pn); + if (err) + return err; + + if (pn >= MAX_PORTS) + return dev_err_probe(dev, -EINVAL, "illegal port number %d\n", pn); + + if (test_bit(pn, priv->valid_ports)) + return dev_err_probe(dev, -EINVAL, "duplicated port number %d\n", pn); + + err = of_property_read_u32(mdio_dn, "reg", &bus); + if (err) + return err; + + if (bus >= MAX_SMI_BUSSES) + return dev_err_probe(dev, -EINVAL, "illegal smi bus number %d\n", bus); + + err = of_property_read_u32(phy_dn, "reg", &addr); + if (err) + return err; + + __set_bit(pn, priv->valid_ports); + priv->smi_bus[pn] = bus; + priv->smi_addr[pn] = addr; + } + + return 0; +} + +static int rtl9300_mdiobus_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rtl9300_mdio_priv *priv; + struct fwnode_handle *child; + int err; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + err = devm_mutex_init(dev, &priv->lock); + if (err) + return err; + + priv->regmap = syscon_node_to_regmap(dev->parent->of_node); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + + platform_set_drvdata(pdev, priv); + + err = rtl9300_mdiobus_map_ports(dev); + if (err) + return err; + + device_for_each_child_node(dev, child) { + err = rtl9300_mdiobus_probe_one(dev, priv, child); + if (err) + return err; + } + + err = rtl9300_mdiobus_init(priv); + if (err) + return dev_err_probe(dev, err, "failed to initialise MDIO bus controller\n"); + + return 0; +} + +static const struct of_device_id rtl9300_mdio_ids[] = { + { .compatible = "realtek,rtl9301-mdio" }, + {} +}; +MODULE_DEVICE_TABLE(of, rtl9300_mdio_ids); + +static struct platform_driver rtl9300_mdio_driver = { + .probe = rtl9300_mdiobus_probe, + .driver = { + .name = "mdio-rtl9300", + .of_match_table = rtl9300_mdio_ids, + }, +}; + +module_platform_driver(rtl9300_mdio_driver); + +MODULE_DESCRIPTION("RTL9300 MDIO driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index 1e1aa72b1eff..a3047f7258a7 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -40,16 +40,16 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, return err; } - err = pci_request_regions(pdev, KBUILD_MODNAME); + err = pcim_request_all_regions(pdev, KBUILD_MODNAME); if (err) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); + dev_err(&pdev->dev, "pcim_request_all_regions failed\n"); goto err_disable_device; } nexus->bar0 = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0)); if (!nexus->bar0) { err = -ENOMEM; - goto err_release_regions; + goto err_disable_device; } i = 0; @@ -107,9 +107,6 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, } return 0; -err_release_regions: - pci_release_regions(pdev); - err_disable_device: pci_set_drvdata(pdev, NULL); return err; @@ -129,7 +126,6 @@ static void thunder_mdiobus_pci_remove(struct pci_dev *pdev) mdiobus_unregister(bus->mii_bus); oct_mdio_writeq(0, bus->register_base + SMI_EN); } - pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0e0321a7ddd7..2aa999345fe1 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -29,6 +29,7 @@ #include <net/pkt_cls.h> #include <net/rtnetlink.h> #include <net/udp_tunnel.h> +#include <net/busy_poll.h> #include "netdevsim.h" @@ -357,6 +358,7 @@ static int nsim_rcv(struct nsim_rq *rq, int budget) break; skb = skb_dequeue(&rq->skb_queue); + skb_mark_napi_id(skb, &rq->napi); netif_receive_skb(skb); } diff --git a/drivers/net/ovpn/Makefile b/drivers/net/ovpn/Makefile new file mode 100644 index 000000000000..229be66167e1 --- /dev/null +++ b/drivers/net/ovpn/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# ovpn -- OpenVPN data channel offload in kernel space +# +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +obj-$(CONFIG_OVPN) := ovpn.o +ovpn-y += bind.o +ovpn-y += crypto.o +ovpn-y += crypto_aead.o +ovpn-y += main.o +ovpn-y += io.o +ovpn-y += netlink.o +ovpn-y += netlink-gen.o +ovpn-y += peer.o +ovpn-y += pktid.o +ovpn-y += socket.o +ovpn-y += stats.o +ovpn-y += tcp.o +ovpn-y += udp.o diff --git a/drivers/net/ovpn/bind.c b/drivers/net/ovpn/bind.c new file mode 100644 index 000000000000..24d2788a277e --- /dev/null +++ b/drivers/net/ovpn/bind.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2012-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/netdevice.h> +#include <linux/socket.h> + +#include "ovpnpriv.h" +#include "bind.h" +#include "peer.h" + +/** + * ovpn_bind_from_sockaddr - retrieve binding matching sockaddr + * @ss: the sockaddr to match + * + * Return: the bind matching the passed sockaddr if found, NULL otherwise + */ +struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *ss) +{ + struct ovpn_bind *bind; + size_t sa_len; + + if (ss->ss_family == AF_INET) + sa_len = sizeof(struct sockaddr_in); + else if (ss->ss_family == AF_INET6) + sa_len = sizeof(struct sockaddr_in6); + else + return ERR_PTR(-EAFNOSUPPORT); + + bind = kzalloc(sizeof(*bind), GFP_ATOMIC); + if (unlikely(!bind)) + return ERR_PTR(-ENOMEM); + + memcpy(&bind->remote, ss, sa_len); + + return bind; +} + +/** + * ovpn_bind_reset - assign new binding to peer + * @peer: the peer whose binding has to be replaced + * @new: the new bind to assign + */ +void ovpn_bind_reset(struct ovpn_peer *peer, struct ovpn_bind *new) +{ + lockdep_assert_held(&peer->lock); + + kfree_rcu(rcu_replace_pointer(peer->bind, new, + lockdep_is_held(&peer->lock)), rcu); +} diff --git a/drivers/net/ovpn/bind.h b/drivers/net/ovpn/bind.h new file mode 100644 index 000000000000..4e0b8398bfd9 --- /dev/null +++ b/drivers/net/ovpn/bind.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2012-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNBIND_H_ +#define _NET_OVPN_OVPNBIND_H_ + +#include <net/ip.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> + +struct ovpn_peer; + +/** + * union ovpn_sockaddr - basic transport layer address + * @in4: IPv4 address + * @in6: IPv6 address + */ +union ovpn_sockaddr { + struct sockaddr_in in4; + struct sockaddr_in6 in6; +}; + +/** + * struct ovpn_bind - remote peer binding + * @remote: the remote peer sockaddress + * @local: local endpoint used to talk to the peer + * @local.ipv4: local IPv4 used to talk to the peer + * @local.ipv6: local IPv6 used to talk to the peer + * @rcu: used to schedule RCU cleanup job + */ +struct ovpn_bind { + union ovpn_sockaddr remote; /* remote sockaddr */ + + union { + struct in_addr ipv4; + struct in6_addr ipv6; + } local; + + struct rcu_head rcu; +}; + +/** + * ovpn_bind_skb_src_match - match packet source with binding + * @bind: the binding to match + * @skb: the packet to match + * + * Return: true if the packet source matches the remote peer sockaddr + * in the binding + */ +static inline bool ovpn_bind_skb_src_match(const struct ovpn_bind *bind, + const struct sk_buff *skb) +{ + const union ovpn_sockaddr *remote; + + if (unlikely(!bind)) + return false; + + remote = &bind->remote; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (unlikely(remote->in4.sin_family != AF_INET)) + return false; + + if (unlikely(remote->in4.sin_addr.s_addr != ip_hdr(skb)->saddr)) + return false; + + if (unlikely(remote->in4.sin_port != udp_hdr(skb)->source)) + return false; + break; + case htons(ETH_P_IPV6): + if (unlikely(remote->in6.sin6_family != AF_INET6)) + return false; + + if (unlikely(!ipv6_addr_equal(&remote->in6.sin6_addr, + &ipv6_hdr(skb)->saddr))) + return false; + + if (unlikely(remote->in6.sin6_port != udp_hdr(skb)->source)) + return false; + break; + default: + return false; + } + + return true; +} + +struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *sa); +void ovpn_bind_reset(struct ovpn_peer *peer, struct ovpn_bind *bind); + +#endif /* _NET_OVPN_OVPNBIND_H_ */ diff --git a/drivers/net/ovpn/crypto.c b/drivers/net/ovpn/crypto.c new file mode 100644 index 000000000000..90580e32052f --- /dev/null +++ b/drivers/net/ovpn/crypto.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/types.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <uapi/linux/ovpn.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "pktid.h" +#include "crypto_aead.h" +#include "crypto.h" + +static void ovpn_ks_destroy_rcu(struct rcu_head *head) +{ + struct ovpn_crypto_key_slot *ks; + + ks = container_of(head, struct ovpn_crypto_key_slot, rcu); + ovpn_aead_crypto_key_slot_destroy(ks); +} + +void ovpn_crypto_key_slot_release(struct kref *kref) +{ + struct ovpn_crypto_key_slot *ks; + + ks = container_of(kref, struct ovpn_crypto_key_slot, refcount); + call_rcu(&ks->rcu, ovpn_ks_destroy_rcu); +} + +/* can only be invoked when all peer references have been dropped (i.e. RCU + * release routine) + */ +void ovpn_crypto_state_release(struct ovpn_crypto_state *cs) +{ + struct ovpn_crypto_key_slot *ks; + + ks = rcu_access_pointer(cs->slots[0]); + if (ks) { + RCU_INIT_POINTER(cs->slots[0], NULL); + ovpn_crypto_key_slot_put(ks); + } + + ks = rcu_access_pointer(cs->slots[1]); + if (ks) { + RCU_INIT_POINTER(cs->slots[1], NULL); + ovpn_crypto_key_slot_put(ks); + } +} + +/* removes the key matching the specified id from the crypto context */ +bool ovpn_crypto_kill_key(struct ovpn_crypto_state *cs, u8 key_id) +{ + struct ovpn_crypto_key_slot *ks = NULL; + + spin_lock_bh(&cs->lock); + if (rcu_access_pointer(cs->slots[0])->key_id == key_id) { + ks = rcu_replace_pointer(cs->slots[0], NULL, + lockdep_is_held(&cs->lock)); + } else if (rcu_access_pointer(cs->slots[1])->key_id == key_id) { + ks = rcu_replace_pointer(cs->slots[1], NULL, + lockdep_is_held(&cs->lock)); + } + spin_unlock_bh(&cs->lock); + + if (ks) + ovpn_crypto_key_slot_put(ks); + + /* let the caller know if a key was actually killed */ + return ks; +} + +/* Reset the ovpn_crypto_state object in a way that is atomic + * to RCU readers. + */ +int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs, + const struct ovpn_peer_key_reset *pkr) +{ + struct ovpn_crypto_key_slot *old = NULL, *new; + u8 idx; + + if (pkr->slot != OVPN_KEY_SLOT_PRIMARY && + pkr->slot != OVPN_KEY_SLOT_SECONDARY) + return -EINVAL; + + new = ovpn_aead_crypto_key_slot_new(&pkr->key); + if (IS_ERR(new)) + return PTR_ERR(new); + + spin_lock_bh(&cs->lock); + idx = cs->primary_idx; + switch (pkr->slot) { + case OVPN_KEY_SLOT_PRIMARY: + old = rcu_replace_pointer(cs->slots[idx], new, + lockdep_is_held(&cs->lock)); + break; + case OVPN_KEY_SLOT_SECONDARY: + old = rcu_replace_pointer(cs->slots[!idx], new, + lockdep_is_held(&cs->lock)); + break; + } + spin_unlock_bh(&cs->lock); + + if (old) + ovpn_crypto_key_slot_put(old); + + return 0; +} + +void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot) +{ + struct ovpn_crypto_key_slot *ks = NULL; + u8 idx; + + if (slot != OVPN_KEY_SLOT_PRIMARY && + slot != OVPN_KEY_SLOT_SECONDARY) { + pr_warn("Invalid slot to release: %u\n", slot); + return; + } + + spin_lock_bh(&cs->lock); + idx = cs->primary_idx; + switch (slot) { + case OVPN_KEY_SLOT_PRIMARY: + ks = rcu_replace_pointer(cs->slots[idx], NULL, + lockdep_is_held(&cs->lock)); + break; + case OVPN_KEY_SLOT_SECONDARY: + ks = rcu_replace_pointer(cs->slots[!idx], NULL, + lockdep_is_held(&cs->lock)); + break; + } + spin_unlock_bh(&cs->lock); + + if (!ks) { + pr_debug("Key slot already released: %u\n", slot); + return; + } + + pr_debug("deleting key slot %u, key_id=%u\n", slot, ks->key_id); + ovpn_crypto_key_slot_put(ks); +} + +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs) +{ + const struct ovpn_crypto_key_slot *old_primary, *old_secondary; + u8 idx; + + spin_lock_bh(&cs->lock); + idx = cs->primary_idx; + old_primary = rcu_dereference_protected(cs->slots[idx], + lockdep_is_held(&cs->lock)); + old_secondary = rcu_dereference_protected(cs->slots[!idx], + lockdep_is_held(&cs->lock)); + /* perform real swap by switching the index of the primary key */ + WRITE_ONCE(cs->primary_idx, !cs->primary_idx); + + pr_debug("key swapped: (old primary) %d <-> (new primary) %d\n", + old_primary ? old_primary->key_id : -1, + old_secondary ? old_secondary->key_id : -1); + + spin_unlock_bh(&cs->lock); +} + +/** + * ovpn_crypto_config_get - populate keyconf object with non-sensible key data + * @cs: the crypto state to extract the key data from + * @slot: the specific slot to inspect + * @keyconf: the output object to populate + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_crypto_config_get(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot, + struct ovpn_key_config *keyconf) +{ + struct ovpn_crypto_key_slot *ks; + int idx; + + switch (slot) { + case OVPN_KEY_SLOT_PRIMARY: + idx = cs->primary_idx; + break; + case OVPN_KEY_SLOT_SECONDARY: + idx = !cs->primary_idx; + break; + default: + return -EINVAL; + } + + rcu_read_lock(); + ks = rcu_dereference(cs->slots[idx]); + if (!ks) { + rcu_read_unlock(); + return -ENOENT; + } + + keyconf->cipher_alg = ovpn_aead_crypto_alg(ks); + keyconf->key_id = ks->key_id; + rcu_read_unlock(); + + return 0; +} diff --git a/drivers/net/ovpn/crypto.h b/drivers/net/ovpn/crypto.h new file mode 100644 index 000000000000..0e284fec3a75 --- /dev/null +++ b/drivers/net/ovpn/crypto.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNCRYPTO_H_ +#define _NET_OVPN_OVPNCRYPTO_H_ + +#include "pktid.h" +#include "proto.h" + +/* info needed for both encrypt and decrypt directions */ +struct ovpn_key_direction { + const u8 *cipher_key; + size_t cipher_key_size; + const u8 *nonce_tail; /* only needed for GCM modes */ + size_t nonce_tail_size; /* only needed for GCM modes */ +}; + +/* all info for a particular symmetric key (primary or secondary) */ +struct ovpn_key_config { + enum ovpn_cipher_alg cipher_alg; + u8 key_id; + struct ovpn_key_direction encrypt; + struct ovpn_key_direction decrypt; +}; + +/* used to pass settings from netlink to the crypto engine */ +struct ovpn_peer_key_reset { + enum ovpn_key_slot slot; + struct ovpn_key_config key; +}; + +struct ovpn_crypto_key_slot { + u8 key_id; + + struct crypto_aead *encrypt; + struct crypto_aead *decrypt; + u8 nonce_tail_xmit[OVPN_NONCE_TAIL_SIZE]; + u8 nonce_tail_recv[OVPN_NONCE_TAIL_SIZE]; + + struct ovpn_pktid_recv pid_recv ____cacheline_aligned_in_smp; + struct ovpn_pktid_xmit pid_xmit ____cacheline_aligned_in_smp; + struct kref refcount; + struct rcu_head rcu; +}; + +struct ovpn_crypto_state { + struct ovpn_crypto_key_slot __rcu *slots[2]; + u8 primary_idx; + + /* protects primary and secondary slots */ + spinlock_t lock; +}; + +static inline bool ovpn_crypto_key_slot_hold(struct ovpn_crypto_key_slot *ks) +{ + return kref_get_unless_zero(&ks->refcount); +} + +static inline void ovpn_crypto_state_init(struct ovpn_crypto_state *cs) +{ + RCU_INIT_POINTER(cs->slots[0], NULL); + RCU_INIT_POINTER(cs->slots[1], NULL); + cs->primary_idx = 0; + spin_lock_init(&cs->lock); +} + +static inline struct ovpn_crypto_key_slot * +ovpn_crypto_key_id_to_slot(const struct ovpn_crypto_state *cs, u8 key_id) +{ + struct ovpn_crypto_key_slot *ks; + u8 idx; + + if (unlikely(!cs)) + return NULL; + + rcu_read_lock(); + idx = READ_ONCE(cs->primary_idx); + ks = rcu_dereference(cs->slots[idx]); + if (ks && ks->key_id == key_id) { + if (unlikely(!ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + goto out; + } + + ks = rcu_dereference(cs->slots[!idx]); + if (ks && ks->key_id == key_id) { + if (unlikely(!ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + goto out; + } + + /* when both key slots are occupied but no matching key ID is found, ks + * has to be reset to NULL to avoid carrying a stale pointer + */ + ks = NULL; +out: + rcu_read_unlock(); + + return ks; +} + +static inline struct ovpn_crypto_key_slot * +ovpn_crypto_key_slot_primary(const struct ovpn_crypto_state *cs) +{ + struct ovpn_crypto_key_slot *ks; + + rcu_read_lock(); + ks = rcu_dereference(cs->slots[cs->primary_idx]); + if (unlikely(ks && !ovpn_crypto_key_slot_hold(ks))) + ks = NULL; + rcu_read_unlock(); + + return ks; +} + +void ovpn_crypto_key_slot_release(struct kref *kref); + +static inline void ovpn_crypto_key_slot_put(struct ovpn_crypto_key_slot *ks) +{ + kref_put(&ks->refcount, ovpn_crypto_key_slot_release); +} + +int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs, + const struct ovpn_peer_key_reset *pkr); + +void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot); + +void ovpn_crypto_state_release(struct ovpn_crypto_state *cs); + +void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs); + +int ovpn_crypto_config_get(struct ovpn_crypto_state *cs, + enum ovpn_key_slot slot, + struct ovpn_key_config *keyconf); + +bool ovpn_crypto_kill_key(struct ovpn_crypto_state *cs, u8 key_id); + +#endif /* _NET_OVPN_OVPNCRYPTO_H_ */ diff --git a/drivers/net/ovpn/crypto_aead.c b/drivers/net/ovpn/crypto_aead.c new file mode 100644 index 000000000000..74ee639ac868 --- /dev/null +++ b/drivers/net/ovpn/crypto_aead.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <crypto/aead.h> +#include <linux/skbuff.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/udp.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "io.h" +#include "pktid.h" +#include "crypto_aead.h" +#include "crypto.h" +#include "peer.h" +#include "proto.h" +#include "skb.h" + +#define OVPN_AUTH_TAG_SIZE 16 +#define OVPN_AAD_SIZE (OVPN_OPCODE_SIZE + OVPN_NONCE_WIRE_SIZE) + +#define ALG_NAME_AES "gcm(aes)" +#define ALG_NAME_CHACHAPOLY "rfc7539(chacha20,poly1305)" + +static int ovpn_aead_encap_overhead(const struct ovpn_crypto_key_slot *ks) +{ + return OVPN_OPCODE_SIZE + /* OP header size */ + sizeof(u32) + /* Packet ID */ + crypto_aead_authsize(ks->encrypt); /* Auth Tag */ +} + +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb) +{ + const unsigned int tag_size = crypto_aead_authsize(ks->encrypt); + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + int nfrags, ret; + u32 pktid, op; + u8 *iv; + + ovpn_skb_cb(skb)->peer = peer; + ovpn_skb_cb(skb)->ks = ks; + + /* Sample AEAD header format: + * 48000001 00000005 7e7046bd 444a7e28 cc6387b1 64a4d6c1 380275a... + * [ OP32 ] [seq # ] [ auth tag ] [ payload ... ] + * [4-byte + * IV head] + */ + + /* check that there's enough headroom in the skb for packet + * encapsulation + */ + if (unlikely(skb_cow_head(skb, OVPN_HEAD_ROOM))) + return -ENOBUFS; + + /* get number of skb frags and ensure that packet data is writable */ + nfrags = skb_cow_data(skb, 0, &trailer); + if (unlikely(nfrags < 0)) + return nfrags; + + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) + return -ENOSPC; + + /* sg may be required by async crypto */ + ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) * + (nfrags + 2), GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->sg)) + return -ENOMEM; + + sg = ovpn_skb_cb(skb)->sg; + + /* sg table: + * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+OVPN_NONCE_WIRE_SIZE), + * 1, 2, 3, ..., n: payload, + * n+1: auth_tag (len=tag_size) + */ + sg_init_table(sg, nfrags + 2); + + /* build scatterlist to encrypt packet payload */ + ret = skb_to_sgvec_nomark(skb, sg + 1, 0, skb->len); + if (unlikely(nfrags != ret)) + return -EINVAL; + + /* append auth_tag onto scatterlist */ + __skb_push(skb, tag_size); + sg_set_buf(sg + nfrags + 1, skb->data, tag_size); + + /* obtain packet ID, which is used both as a first + * 4 bytes of nonce and last 4 bytes of associated data. + */ + ret = ovpn_pktid_xmit_next(&ks->pid_xmit, &pktid); + if (unlikely(ret < 0)) + return ret; + + /* iv may be required by async crypto */ + ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->iv)) + return -ENOMEM; + + iv = ovpn_skb_cb(skb)->iv; + + /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes + * nonce + */ + ovpn_pktid_aead_write(pktid, ks->nonce_tail_xmit, iv); + + /* make space for packet id and push it to the front */ + __skb_push(skb, OVPN_NONCE_WIRE_SIZE); + memcpy(skb->data, iv, OVPN_NONCE_WIRE_SIZE); + + /* add packet op as head of additional data */ + op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id); + __skb_push(skb, OVPN_OPCODE_SIZE); + BUILD_BUG_ON(sizeof(op) != OVPN_OPCODE_SIZE); + *((__force __be32 *)skb->data) = htonl(op); + + /* AEAD Additional data */ + sg_set_buf(sg, skb->data, OVPN_AAD_SIZE); + + req = aead_request_alloc(ks->encrypt, GFP_ATOMIC); + if (unlikely(!req)) + return -ENOMEM; + + ovpn_skb_cb(skb)->req = req; + + /* setup async crypto operation */ + aead_request_set_tfm(req, ks->encrypt); + aead_request_set_callback(req, 0, ovpn_encrypt_post, skb); + aead_request_set_crypt(req, sg, sg, + skb->len - ovpn_aead_encap_overhead(ks), iv); + aead_request_set_ad(req, OVPN_AAD_SIZE); + + /* encrypt it */ + return crypto_aead_encrypt(req); +} + +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb) +{ + const unsigned int tag_size = crypto_aead_authsize(ks->decrypt); + int ret, payload_len, nfrags; + unsigned int payload_offset; + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + u8 *iv; + + payload_offset = OVPN_AAD_SIZE + tag_size; + payload_len = skb->len - payload_offset; + + ovpn_skb_cb(skb)->payload_offset = payload_offset; + ovpn_skb_cb(skb)->peer = peer; + ovpn_skb_cb(skb)->ks = ks; + + /* sanity check on packet size, payload size must be >= 0 */ + if (unlikely(payload_len < 0)) + return -EINVAL; + + /* Prepare the skb data buffer to be accessed up until the auth tag. + * This is required because this area is directly mapped into the sg + * list. + */ + if (unlikely(!pskb_may_pull(skb, payload_offset))) + return -ENODATA; + + /* get number of skb frags and ensure that packet data is writable */ + nfrags = skb_cow_data(skb, 0, &trailer); + if (unlikely(nfrags < 0)) + return nfrags; + + if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) + return -ENOSPC; + + /* sg may be required by async crypto */ + ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) * + (nfrags + 2), GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->sg)) + return -ENOMEM; + + sg = ovpn_skb_cb(skb)->sg; + + /* sg table: + * 0: op, wire nonce (AD, len=OVPN_OPCODE_SIZE+OVPN_NONCE_WIRE_SIZE), + * 1, 2, 3, ..., n: payload, + * n+1: auth_tag (len=tag_size) + */ + sg_init_table(sg, nfrags + 2); + + /* packet op is head of additional data */ + sg_set_buf(sg, skb->data, OVPN_AAD_SIZE); + + /* build scatterlist to decrypt packet payload */ + ret = skb_to_sgvec_nomark(skb, sg + 1, payload_offset, payload_len); + if (unlikely(nfrags != ret)) + return -EINVAL; + + /* append auth_tag onto scatterlist */ + sg_set_buf(sg + nfrags + 1, skb->data + OVPN_AAD_SIZE, tag_size); + + /* iv may be required by async crypto */ + ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC); + if (unlikely(!ovpn_skb_cb(skb)->iv)) + return -ENOMEM; + + iv = ovpn_skb_cb(skb)->iv; + + /* copy nonce into IV buffer */ + memcpy(iv, skb->data + OVPN_OPCODE_SIZE, OVPN_NONCE_WIRE_SIZE); + memcpy(iv + OVPN_NONCE_WIRE_SIZE, ks->nonce_tail_recv, + OVPN_NONCE_TAIL_SIZE); + + req = aead_request_alloc(ks->decrypt, GFP_ATOMIC); + if (unlikely(!req)) + return -ENOMEM; + + ovpn_skb_cb(skb)->req = req; + + /* setup async crypto operation */ + aead_request_set_tfm(req, ks->decrypt); + aead_request_set_callback(req, 0, ovpn_decrypt_post, skb); + aead_request_set_crypt(req, sg, sg, payload_len + tag_size, iv); + + aead_request_set_ad(req, OVPN_AAD_SIZE); + + /* decrypt it */ + return crypto_aead_decrypt(req); +} + +/* Initialize a struct crypto_aead object */ +static struct crypto_aead *ovpn_aead_init(const char *title, + const char *alg_name, + const unsigned char *key, + unsigned int keylen) +{ + struct crypto_aead *aead; + int ret; + + aead = crypto_alloc_aead(alg_name, 0, 0); + if (IS_ERR(aead)) { + ret = PTR_ERR(aead); + pr_err("%s crypto_alloc_aead failed, err=%d\n", title, ret); + aead = NULL; + goto error; + } + + ret = crypto_aead_setkey(aead, key, keylen); + if (ret) { + pr_err("%s crypto_aead_setkey size=%u failed, err=%d\n", title, + keylen, ret); + goto error; + } + + ret = crypto_aead_setauthsize(aead, OVPN_AUTH_TAG_SIZE); + if (ret) { + pr_err("%s crypto_aead_setauthsize failed, err=%d\n", title, + ret); + goto error; + } + + /* basic AEAD assumption */ + if (crypto_aead_ivsize(aead) != OVPN_NONCE_SIZE) { + pr_err("%s IV size must be %d\n", title, OVPN_NONCE_SIZE); + ret = -EINVAL; + goto error; + } + + pr_debug("********* Cipher %s (%s)\n", alg_name, title); + pr_debug("*** IV size=%u\n", crypto_aead_ivsize(aead)); + pr_debug("*** req size=%u\n", crypto_aead_reqsize(aead)); + pr_debug("*** block size=%u\n", crypto_aead_blocksize(aead)); + pr_debug("*** auth size=%u\n", crypto_aead_authsize(aead)); + pr_debug("*** alignmask=0x%x\n", crypto_aead_alignmask(aead)); + + return aead; + +error: + crypto_free_aead(aead); + return ERR_PTR(ret); +} + +void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks) +{ + if (!ks) + return; + + crypto_free_aead(ks->encrypt); + crypto_free_aead(ks->decrypt); + kfree(ks); +} + +struct ovpn_crypto_key_slot * +ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc) +{ + struct ovpn_crypto_key_slot *ks = NULL; + const char *alg_name; + int ret; + + /* validate crypto alg */ + switch (kc->cipher_alg) { + case OVPN_CIPHER_ALG_AES_GCM: + alg_name = ALG_NAME_AES; + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + alg_name = ALG_NAME_CHACHAPOLY; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + if (kc->encrypt.nonce_tail_size != OVPN_NONCE_TAIL_SIZE || + kc->decrypt.nonce_tail_size != OVPN_NONCE_TAIL_SIZE) + return ERR_PTR(-EINVAL); + + /* build the key slot */ + ks = kmalloc(sizeof(*ks), GFP_KERNEL); + if (!ks) + return ERR_PTR(-ENOMEM); + + ks->encrypt = NULL; + ks->decrypt = NULL; + kref_init(&ks->refcount); + ks->key_id = kc->key_id; + + ks->encrypt = ovpn_aead_init("encrypt", alg_name, + kc->encrypt.cipher_key, + kc->encrypt.cipher_key_size); + if (IS_ERR(ks->encrypt)) { + ret = PTR_ERR(ks->encrypt); + ks->encrypt = NULL; + goto destroy_ks; + } + + ks->decrypt = ovpn_aead_init("decrypt", alg_name, + kc->decrypt.cipher_key, + kc->decrypt.cipher_key_size); + if (IS_ERR(ks->decrypt)) { + ret = PTR_ERR(ks->decrypt); + ks->decrypt = NULL; + goto destroy_ks; + } + + memcpy(ks->nonce_tail_xmit, kc->encrypt.nonce_tail, + OVPN_NONCE_TAIL_SIZE); + memcpy(ks->nonce_tail_recv, kc->decrypt.nonce_tail, + OVPN_NONCE_TAIL_SIZE); + + /* init packet ID generation/validation */ + ovpn_pktid_xmit_init(&ks->pid_xmit); + ovpn_pktid_recv_init(&ks->pid_recv); + + return ks; + +destroy_ks: + ovpn_aead_crypto_key_slot_destroy(ks); + return ERR_PTR(ret); +} + +enum ovpn_cipher_alg ovpn_aead_crypto_alg(struct ovpn_crypto_key_slot *ks) +{ + const char *alg_name; + + if (!ks->encrypt) + return OVPN_CIPHER_ALG_NONE; + + alg_name = crypto_tfm_alg_name(crypto_aead_tfm(ks->encrypt)); + + if (!strcmp(alg_name, ALG_NAME_AES)) + return OVPN_CIPHER_ALG_AES_GCM; + else if (!strcmp(alg_name, ALG_NAME_CHACHAPOLY)) + return OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else + return OVPN_CIPHER_ALG_NONE; +} diff --git a/drivers/net/ovpn/crypto_aead.h b/drivers/net/ovpn/crypto_aead.h new file mode 100644 index 000000000000..65a2ff307898 --- /dev/null +++ b/drivers/net/ovpn/crypto_aead.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNAEAD_H_ +#define _NET_OVPN_OVPNAEAD_H_ + +#include "crypto.h" + +#include <asm/types.h> +#include <linux/skbuff.h> + +int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb); +int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, + struct sk_buff *skb); + +struct ovpn_crypto_key_slot * +ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc); +void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks); + +enum ovpn_cipher_alg ovpn_aead_crypto_alg(struct ovpn_crypto_key_slot *ks); + +#endif /* _NET_OVPN_OVPNAEAD_H_ */ diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c new file mode 100644 index 000000000000..dd8a8055d967 --- /dev/null +++ b/drivers/net/ovpn/io.c @@ -0,0 +1,446 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <crypto/aead.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/gro_cells.h> +#include <net/gso.h> +#include <net/ip.h> + +#include "ovpnpriv.h" +#include "peer.h" +#include "io.h" +#include "bind.h" +#include "crypto.h" +#include "crypto_aead.h" +#include "netlink.h" +#include "proto.h" +#include "tcp.h" +#include "udp.h" +#include "skb.h" +#include "socket.h" + +const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE] = { + 0x2a, 0x18, 0x7b, 0xf3, 0x64, 0x1e, 0xb4, 0xcb, + 0x07, 0xed, 0x2d, 0x0a, 0x98, 0x1f, 0xc7, 0x48 +}; + +/** + * ovpn_is_keepalive - check if skb contains a keepalive message + * @skb: packet to check + * + * Assumes that the first byte of skb->data is defined. + * + * Return: true if skb contains a keepalive or false otherwise + */ +static bool ovpn_is_keepalive(struct sk_buff *skb) +{ + if (*skb->data != ovpn_keepalive_message[0]) + return false; + + if (skb->len != OVPN_KEEPALIVE_SIZE) + return false; + + if (!pskb_may_pull(skb, OVPN_KEEPALIVE_SIZE)) + return false; + + return !memcmp(skb->data, ovpn_keepalive_message, OVPN_KEEPALIVE_SIZE); +} + +/* Called after decrypt to write the IP packet to the device. + * This method is expected to manage/free the skb. + */ +static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) +{ + unsigned int pkt_len; + int ret; + + /* we can't guarantee the packet wasn't corrupted before entering the + * VPN, therefore we give other layers a chance to check that + */ + skb->ip_summed = CHECKSUM_NONE; + + /* skb hash for transport packet no longer valid after decapsulation */ + skb_clear_hash(skb); + + /* post-decrypt scrub -- prepare to inject encapsulated packet onto the + * interface, based on __skb_tunnel_rx() in dst.h + */ + skb->dev = peer->ovpn->dev; + skb_set_queue_mapping(skb, 0); + skb_scrub_packet(skb, true); + + /* network header reset in ovpn_decrypt_post() */ + skb_reset_transport_header(skb); + skb_reset_inner_headers(skb); + + /* cause packet to be "received" by the interface */ + pkt_len = skb->len; + ret = gro_cells_receive(&peer->ovpn->gro_cells, skb); + if (likely(ret == NET_RX_SUCCESS)) { + /* update RX stats with the size of decrypted packet */ + ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len); + dev_dstats_rx_add(peer->ovpn->dev, pkt_len); + } +} + +void ovpn_decrypt_post(void *data, int ret) +{ + struct ovpn_crypto_key_slot *ks; + unsigned int payload_offset = 0; + struct sk_buff *skb = data; + struct ovpn_socket *sock; + struct ovpn_peer *peer; + __be16 proto; + __be32 *pid; + + /* crypto is happening asynchronously. this function will be called + * again later by the crypto callback with a proper return code + */ + if (unlikely(ret == -EINPROGRESS)) + return; + + payload_offset = ovpn_skb_cb(skb)->payload_offset; + ks = ovpn_skb_cb(skb)->ks; + peer = ovpn_skb_cb(skb)->peer; + + /* crypto is done, cleanup skb CB and its members */ + kfree(ovpn_skb_cb(skb)->iv); + kfree(ovpn_skb_cb(skb)->sg); + aead_request_free(ovpn_skb_cb(skb)->req); + + if (unlikely(ret < 0)) + goto drop; + + /* PID sits after the op */ + pid = (__force __be32 *)(skb->data + OVPN_OPCODE_SIZE); + ret = ovpn_pktid_recv(&ks->pid_recv, ntohl(*pid), 0); + if (unlikely(ret < 0)) { + net_err_ratelimited("%s: PKT ID RX error for peer %u: %d\n", + netdev_name(peer->ovpn->dev), peer->id, + ret); + goto drop; + } + + /* keep track of last received authenticated packet for keepalive */ + WRITE_ONCE(peer->last_recv, ktime_get_real_seconds()); + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP) + /* check if this peer changed local or remote endpoint */ + ovpn_peer_endpoints_update(peer, skb); + rcu_read_unlock(); + + /* point to encapsulated IP packet */ + __skb_pull(skb, payload_offset); + + /* check if this is a valid datapacket that has to be delivered to the + * ovpn interface + */ + skb_reset_network_header(skb); + proto = ovpn_ip_check_protocol(skb); + if (unlikely(!proto)) { + /* check if null packet */ + if (unlikely(!pskb_may_pull(skb, 1))) { + net_info_ratelimited("%s: NULL packet received from peer %u\n", + netdev_name(peer->ovpn->dev), + peer->id); + goto drop; + } + + if (ovpn_is_keepalive(skb)) { + net_dbg_ratelimited("%s: ping received from peer %u\n", + netdev_name(peer->ovpn->dev), + peer->id); + /* we drop the packet, but this is not a failure */ + consume_skb(skb); + goto drop_nocount; + } + + net_info_ratelimited("%s: unsupported protocol received from peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + goto drop; + } + skb->protocol = proto; + + /* perform Reverse Path Filtering (RPF) */ + if (unlikely(!ovpn_peer_check_by_src(peer->ovpn, skb, peer))) { + if (skb->protocol == htons(ETH_P_IPV6)) + net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI6c\n", + netdev_name(peer->ovpn->dev), + peer->id, &ipv6_hdr(skb)->saddr); + else + net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI4\n", + netdev_name(peer->ovpn->dev), + peer->id, &ip_hdr(skb)->saddr); + goto drop; + } + + ovpn_netdev_write(peer, skb); + /* skb is passed to upper layer - don't free it */ + skb = NULL; +drop: + if (unlikely(skb)) + dev_dstats_rx_dropped(peer->ovpn->dev); + kfree_skb(skb); +drop_nocount: + if (likely(peer)) + ovpn_peer_put(peer); + if (likely(ks)) + ovpn_crypto_key_slot_put(ks); +} + +/* RX path entry point: decrypt packet and forward it to the device */ +void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb) +{ + struct ovpn_crypto_key_slot *ks; + u8 key_id; + + ovpn_peer_stats_increment_rx(&peer->link_stats, skb->len); + + /* get the key slot matching the key ID in the received packet */ + key_id = ovpn_key_id_from_skb(skb); + ks = ovpn_crypto_key_id_to_slot(&peer->crypto, key_id); + if (unlikely(!ks)) { + net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n", + netdev_name(peer->ovpn->dev), peer->id, + key_id); + dev_dstats_rx_dropped(peer->ovpn->dev); + kfree_skb(skb); + ovpn_peer_put(peer); + return; + } + + memset(ovpn_skb_cb(skb), 0, sizeof(struct ovpn_cb)); + ovpn_decrypt_post(skb, ovpn_aead_decrypt(peer, ks, skb)); +} + +void ovpn_encrypt_post(void *data, int ret) +{ + struct ovpn_crypto_key_slot *ks; + struct sk_buff *skb = data; + struct ovpn_socket *sock; + struct ovpn_peer *peer; + unsigned int orig_len; + + /* encryption is happening asynchronously. This function will be + * called later by the crypto callback with a proper return value + */ + if (unlikely(ret == -EINPROGRESS)) + return; + + ks = ovpn_skb_cb(skb)->ks; + peer = ovpn_skb_cb(skb)->peer; + + /* crypto is done, cleanup skb CB and its members */ + kfree(ovpn_skb_cb(skb)->iv); + kfree(ovpn_skb_cb(skb)->sg); + aead_request_free(ovpn_skb_cb(skb)->req); + + if (unlikely(ret == -ERANGE)) { + /* we ran out of IVs and we must kill the key as it can't be + * use anymore + */ + netdev_warn(peer->ovpn->dev, + "killing key %u for peer %u\n", ks->key_id, + peer->id); + if (ovpn_crypto_kill_key(&peer->crypto, ks->key_id)) + /* let userspace know so that a new key must be negotiated */ + ovpn_nl_key_swap_notify(peer, ks->key_id); + + goto err; + } + + if (unlikely(ret < 0)) + goto err; + + skb_mark_not_on_list(skb); + orig_len = skb->len; + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (unlikely(!sock)) + goto err_unlock; + + switch (sock->sock->sk->sk_protocol) { + case IPPROTO_UDP: + ovpn_udp_send_skb(peer, sock->sock, skb); + break; + case IPPROTO_TCP: + ovpn_tcp_send_skb(peer, sock->sock, skb); + break; + default: + /* no transport configured yet */ + goto err_unlock; + } + + ovpn_peer_stats_increment_tx(&peer->link_stats, orig_len); + /* keep track of last sent packet for keepalive */ + WRITE_ONCE(peer->last_sent, ktime_get_real_seconds()); + /* skb passed down the stack - don't free it */ + skb = NULL; +err_unlock: + rcu_read_unlock(); +err: + if (unlikely(skb)) + dev_dstats_tx_dropped(peer->ovpn->dev); + if (likely(peer)) + ovpn_peer_put(peer); + if (likely(ks)) + ovpn_crypto_key_slot_put(ks); + kfree_skb(skb); +} + +static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb) +{ + struct ovpn_crypto_key_slot *ks; + + /* get primary key to be used for encrypting data */ + ks = ovpn_crypto_key_slot_primary(&peer->crypto); + if (unlikely(!ks)) + return false; + + /* take a reference to the peer because the crypto code may run async. + * ovpn_encrypt_post() will release it upon completion + */ + if (unlikely(!ovpn_peer_hold(peer))) { + DEBUG_NET_WARN_ON_ONCE(1); + ovpn_crypto_key_slot_put(ks); + return false; + } + + memset(ovpn_skb_cb(skb), 0, sizeof(struct ovpn_cb)); + ovpn_encrypt_post(skb, ovpn_aead_encrypt(peer, ks, skb)); + return true; +} + +/* send skb to connected peer, if any */ +static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb, + struct ovpn_peer *peer) +{ + struct sk_buff *curr, *next; + + /* this might be a GSO-segmented skb list: process each skb + * independently + */ + skb_list_walk_safe(skb, curr, next) { + if (unlikely(!ovpn_encrypt_one(peer, curr))) { + dev_dstats_tx_dropped(ovpn->dev); + kfree_skb(curr); + } + } + + ovpn_peer_put(peer); +} + +/* Send user data to the network + */ +netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + struct sk_buff *segments, *curr, *next; + struct sk_buff_head skb_list; + struct ovpn_peer *peer; + __be16 proto; + int ret; + + /* reset netfilter state */ + nf_reset_ct(skb); + + /* verify IP header size in network packet */ + proto = ovpn_ip_check_protocol(skb); + if (unlikely(!proto || skb->protocol != proto)) + goto drop; + + if (skb_is_gso(skb)) { + segments = skb_gso_segment(skb, 0); + if (IS_ERR(segments)) { + ret = PTR_ERR(segments); + net_err_ratelimited("%s: cannot segment payload packet: %d\n", + netdev_name(dev), ret); + goto drop; + } + + consume_skb(skb); + skb = segments; + } + + /* from this moment on, "skb" might be a list */ + + __skb_queue_head_init(&skb_list); + skb_list_walk_safe(skb, curr, next) { + skb_mark_not_on_list(curr); + + curr = skb_share_check(curr, GFP_ATOMIC); + if (unlikely(!curr)) { + net_err_ratelimited("%s: skb_share_check failed for payload packet\n", + netdev_name(dev)); + dev_dstats_tx_dropped(ovpn->dev); + continue; + } + + __skb_queue_tail(&skb_list, curr); + } + skb_list.prev->next = NULL; + + /* retrieve peer serving the destination IP of this packet */ + peer = ovpn_peer_get_by_dst(ovpn, skb); + if (unlikely(!peer)) { + net_dbg_ratelimited("%s: no peer to send data to\n", + netdev_name(ovpn->dev)); + goto drop; + } + + ovpn_peer_stats_increment_tx(&peer->vpn_stats, skb->len); + ovpn_send(ovpn, skb_list.next, peer); + + return NETDEV_TX_OK; + +drop: + dev_dstats_tx_dropped(ovpn->dev); + skb_tx_error(skb); + kfree_skb_list(skb); + return NET_XMIT_DROP; +} + +/** + * ovpn_xmit_special - encrypt and transmit an out-of-band message to peer + * @peer: peer to send the message to + * @data: message content + * @len: message length + * + * Assumes that caller holds a reference to peer, which will be + * passed to ovpn_send() + */ +void ovpn_xmit_special(struct ovpn_peer *peer, const void *data, + const unsigned int len) +{ + struct ovpn_priv *ovpn; + struct sk_buff *skb; + + ovpn = peer->ovpn; + if (unlikely(!ovpn)) { + ovpn_peer_put(peer); + return; + } + + skb = alloc_skb(256 + len, GFP_ATOMIC); + if (unlikely(!skb)) { + ovpn_peer_put(peer); + return; + } + + skb_reserve(skb, 128); + skb->priority = TC_PRIO_BESTEFFORT; + __skb_put_data(skb, data, len); + + ovpn_send(ovpn, skb, peer); +} diff --git a/drivers/net/ovpn/io.h b/drivers/net/ovpn/io.h new file mode 100644 index 000000000000..db9e10f9077c --- /dev/null +++ b/drivers/net/ovpn/io.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPN_H_ +#define _NET_OVPN_OVPN_H_ + +/* DATA_V2 header size with AEAD encryption */ +#define OVPN_HEAD_ROOM (OVPN_OPCODE_SIZE + OVPN_NONCE_WIRE_SIZE + \ + 16 /* AEAD TAG length */ + \ + max(sizeof(struct udphdr), sizeof(struct tcphdr)) +\ + max(sizeof(struct ipv6hdr), sizeof(struct iphdr))) + +/* max padding required by encryption */ +#define OVPN_MAX_PADDING 16 + +#define OVPN_KEEPALIVE_SIZE 16 +extern const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE]; + +netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev); + +void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb); +void ovpn_xmit_special(struct ovpn_peer *peer, const void *data, + const unsigned int len); + +void ovpn_encrypt_post(void *data, int ret); +void ovpn_decrypt_post(void *data, int ret); + +#endif /* _NET_OVPN_OVPN_H_ */ diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c new file mode 100644 index 000000000000..0acb0934c1be --- /dev/null +++ b/drivers/net/ovpn/main.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#include <linux/ethtool.h> +#include <linux/genetlink.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <net/gro_cells.h> +#include <net/ip.h> +#include <net/rtnetlink.h> +#include <uapi/linux/if_arp.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "netlink.h" +#include "io.h" +#include "peer.h" +#include "proto.h" +#include "tcp.h" +#include "udp.h" + +static void ovpn_priv_free(struct net_device *net) +{ + struct ovpn_priv *ovpn = netdev_priv(net); + + kfree(ovpn->peers); +} + +static int ovpn_mp_alloc(struct ovpn_priv *ovpn) +{ + struct in_device *dev_v4; + int i; + + if (ovpn->mode != OVPN_MODE_MP) + return 0; + + dev_v4 = __in_dev_get_rtnl(ovpn->dev); + if (dev_v4) { + /* disable redirects as Linux gets confused by ovpn + * handling same-LAN routing. + * This happens because a multipeer interface is used as + * relay point between hosts in the same subnet, while + * in a classic LAN this would not be needed because the + * two hosts would be able to talk directly. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(ovpn->dev), SEND_REDIRECTS) = false; + } + + /* the peer container is fairly large, therefore we allocate it only in + * MP mode + */ + ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL); + if (!ovpn->peers) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_vpn_addr4[i], i); + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_vpn_addr6[i], i); + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], i); + } + + return 0; +} + +static int ovpn_net_init(struct net_device *dev) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + int err = gro_cells_init(&ovpn->gro_cells, dev); + + if (err < 0) + return err; + + err = ovpn_mp_alloc(ovpn); + if (err < 0) { + gro_cells_destroy(&ovpn->gro_cells); + return err; + } + + return 0; +} + +static void ovpn_net_uninit(struct net_device *dev) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + + gro_cells_destroy(&ovpn->gro_cells); +} + +static const struct net_device_ops ovpn_netdev_ops = { + .ndo_init = ovpn_net_init, + .ndo_uninit = ovpn_net_uninit, + .ndo_start_xmit = ovpn_net_xmit, +}; + +static const struct device_type ovpn_type = { + .name = OVPN_FAMILY_NAME, +}; + +static const struct nla_policy ovpn_policy[IFLA_OVPN_MAX + 1] = { + [IFLA_OVPN_MODE] = NLA_POLICY_RANGE(NLA_U8, OVPN_MODE_P2P, + OVPN_MODE_MP), +}; + +/** + * ovpn_dev_is_valid - check if the netdevice is of type 'ovpn' + * @dev: the interface to check + * + * Return: whether the netdevice is of type 'ovpn' + */ +bool ovpn_dev_is_valid(const struct net_device *dev) +{ + return dev->netdev_ops == &ovpn_netdev_ops; +} + +static void ovpn_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, "ovpn", sizeof(info->driver)); + strscpy(info->bus_info, "ovpn", sizeof(info->bus_info)); +} + +static const struct ethtool_ops ovpn_ethtool_ops = { + .get_drvinfo = ovpn_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, +}; + +static void ovpn_setup(struct net_device *dev) +{ + netdev_features_t feat = NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA; + + dev->needs_free_netdev = true; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; + + dev->ethtool_ops = &ovpn_ethtool_ops; + dev->netdev_ops = &ovpn_netdev_ops; + + dev->priv_destructor = ovpn_priv_free; + + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->mtu = ETH_DATA_LEN - OVPN_HEAD_ROOM; + dev->min_mtu = IPV4_MIN_MTU; + dev->max_mtu = IP_MAX_MTU - OVPN_HEAD_ROOM; + + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + + dev->lltx = true; + dev->features |= feat; + dev->hw_features |= feat; + dev->hw_enc_features |= feat; + + dev->needed_headroom = ALIGN(OVPN_HEAD_ROOM, 4); + dev->needed_tailroom = OVPN_MAX_PADDING; + + SET_NETDEV_DEVTYPE(dev, &ovpn_type); +} + +static int ovpn_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, + struct netlink_ext_ack *extack) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + struct nlattr **data = params->data; + enum ovpn_mode mode = OVPN_MODE_P2P; + + if (data && data[IFLA_OVPN_MODE]) { + mode = nla_get_u8(data[IFLA_OVPN_MODE]); + netdev_dbg(dev, "setting device mode: %u\n", mode); + } + + ovpn->dev = dev; + ovpn->mode = mode; + spin_lock_init(&ovpn->lock); + INIT_DELAYED_WORK(&ovpn->keepalive_work, ovpn_peer_keepalive_work); + + /* Set carrier explicitly after registration, this way state is + * clearly defined. + * + * In case of MP interfaces we keep the carrier always on. + * + * Carrier for P2P interfaces is initially off and it is then + * switched on and off when the remote peer is added or deleted. + */ + if (ovpn->mode == OVPN_MODE_MP) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + return register_netdevice(dev); +} + +static void ovpn_dellink(struct net_device *dev, struct list_head *head) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + + cancel_delayed_work_sync(&ovpn->keepalive_work); + ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN); + unregister_netdevice_queue(dev, head); +} + +static int ovpn_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ovpn_priv *ovpn = netdev_priv(dev); + + if (nla_put_u8(skb, IFLA_OVPN_MODE, ovpn->mode)) + return -EMSGSIZE; + + return 0; +} + +static struct rtnl_link_ops ovpn_link_ops = { + .kind = "ovpn", + .netns_refund = false, + .priv_size = sizeof(struct ovpn_priv), + .setup = ovpn_setup, + .policy = ovpn_policy, + .maxtype = IFLA_OVPN_MAX, + .newlink = ovpn_newlink, + .dellink = ovpn_dellink, + .fill_info = ovpn_fill_info, +}; + +static int __init ovpn_init(void) +{ + int err = rtnl_link_register(&ovpn_link_ops); + + if (err) { + pr_err("ovpn: can't register rtnl link ops: %d\n", err); + return err; + } + + err = ovpn_nl_register(); + if (err) { + pr_err("ovpn: can't register netlink family: %d\n", err); + goto unreg_rtnl; + } + + ovpn_tcp_init(); + + return 0; + +unreg_rtnl: + rtnl_link_unregister(&ovpn_link_ops); + return err; +} + +static __exit void ovpn_cleanup(void) +{ + ovpn_nl_unregister(); + rtnl_link_unregister(&ovpn_link_ops); + + rcu_barrier(); +} + +module_init(ovpn_init); +module_exit(ovpn_cleanup); + +MODULE_DESCRIPTION("OpenVPN data channel offload (ovpn)"); +MODULE_AUTHOR("Antonio Quartulli <antonio@openvpn.net>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ovpn/main.h b/drivers/net/ovpn/main.h new file mode 100644 index 000000000000..017cd0100765 --- /dev/null +++ b/drivers/net/ovpn/main.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_MAIN_H_ +#define _NET_OVPN_MAIN_H_ + +bool ovpn_dev_is_valid(const struct net_device *dev); + +#endif /* _NET_OVPN_MAIN_H_ */ diff --git a/drivers/net/ovpn/netlink-gen.c b/drivers/net/ovpn/netlink-gen.c new file mode 100644 index 000000000000..58e1a4342378 --- /dev/null +++ b/drivers/net/ovpn/netlink-gen.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ovpn.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "netlink-gen.h" + +#include <uapi/linux/ovpn.h> + +/* Integer value ranges */ +static const struct netlink_range_validation ovpn_a_peer_id_range = { + .max = 16777215ULL, +}; + +static const struct netlink_range_validation ovpn_a_keyconf_peer_id_range = { + .max = 16777215ULL, +}; + +/* Common nested types */ +const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1] = { + [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range), + [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1), + [OVPN_A_KEYCONF_KEY_ID] = NLA_POLICY_MAX(NLA_U32, 7), + [OVPN_A_KEYCONF_CIPHER_ALG] = NLA_POLICY_MAX(NLA_U32, 2), + [OVPN_A_KEYCONF_ENCRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy), + [OVPN_A_KEYCONF_DECRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy), +}; + +const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1] = { + [OVPN_A_KEYDIR_CIPHER_KEY] = NLA_POLICY_MAX_LEN(256), + [OVPN_A_KEYDIR_NONCE_TAIL] = NLA_POLICY_EXACT_LEN(OVPN_NONCE_TAIL_SIZE), +}; + +const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = { + [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), + [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, }, + [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1), + [OVPN_A_PEER_SOCKET] = { .type = NLA_U32, }, + [OVPN_A_PEER_SOCKET_NETNSID] = { .type = NLA_S32, }, + [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_LOCAL_PORT] = NLA_POLICY_MIN(NLA_BE16, 1), + [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, }, + [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, }, + [OVPN_A_PEER_DEL_REASON] = NLA_POLICY_MAX(NLA_U32, 4), + [OVPN_A_PEER_VPN_RX_BYTES] = { .type = NLA_UINT, }, + [OVPN_A_PEER_VPN_TX_BYTES] = { .type = NLA_UINT, }, + [OVPN_A_PEER_VPN_RX_PACKETS] = { .type = NLA_UINT, }, + [OVPN_A_PEER_VPN_TX_PACKETS] = { .type = NLA_UINT, }, + [OVPN_A_PEER_LINK_RX_BYTES] = { .type = NLA_UINT, }, + [OVPN_A_PEER_LINK_TX_BYTES] = { .type = NLA_UINT, }, + [OVPN_A_PEER_LINK_RX_PACKETS] = { .type = NLA_UINT, }, + [OVPN_A_PEER_LINK_TX_PACKETS] = { .type = NLA_UINT, }, +}; + +/* OVPN_CMD_PEER_NEW - do */ +static const struct nla_policy ovpn_peer_new_nl_policy[OVPN_A_PEER + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), +}; + +/* OVPN_CMD_PEER_SET - do */ +static const struct nla_policy ovpn_peer_set_nl_policy[OVPN_A_PEER + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), +}; + +/* OVPN_CMD_PEER_GET - do */ +static const struct nla_policy ovpn_peer_get_do_nl_policy[OVPN_A_PEER + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), +}; + +/* OVPN_CMD_PEER_GET - dump */ +static const struct nla_policy ovpn_peer_get_dump_nl_policy[OVPN_A_IFINDEX + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, +}; + +/* OVPN_CMD_PEER_DEL - do */ +static const struct nla_policy ovpn_peer_del_nl_policy[OVPN_A_PEER + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), +}; + +/* OVPN_CMD_KEY_NEW - do */ +static const struct nla_policy ovpn_key_new_nl_policy[OVPN_A_KEYCONF + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), +}; + +/* OVPN_CMD_KEY_GET - do */ +static const struct nla_policy ovpn_key_get_nl_policy[OVPN_A_KEYCONF + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), +}; + +/* OVPN_CMD_KEY_SWAP - do */ +static const struct nla_policy ovpn_key_swap_nl_policy[OVPN_A_KEYCONF + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), +}; + +/* OVPN_CMD_KEY_DEL - do */ +static const struct nla_policy ovpn_key_del_nl_policy[OVPN_A_KEYCONF + 1] = { + [OVPN_A_IFINDEX] = { .type = NLA_U32, }, + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), +}; + +/* Ops table for ovpn */ +static const struct genl_split_ops ovpn_nl_ops[] = { + { + .cmd = OVPN_CMD_PEER_NEW, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_peer_new_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_peer_new_nl_policy, + .maxattr = OVPN_A_PEER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_PEER_SET, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_peer_set_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_peer_set_nl_policy, + .maxattr = OVPN_A_PEER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_PEER_GET, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_peer_get_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_peer_get_do_nl_policy, + .maxattr = OVPN_A_PEER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_PEER_GET, + .dumpit = ovpn_nl_peer_get_dumpit, + .policy = ovpn_peer_get_dump_nl_policy, + .maxattr = OVPN_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = OVPN_CMD_PEER_DEL, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_peer_del_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_peer_del_nl_policy, + .maxattr = OVPN_A_PEER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_KEY_NEW, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_key_new_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_key_new_nl_policy, + .maxattr = OVPN_A_KEYCONF, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_KEY_GET, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_key_get_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_key_get_nl_policy, + .maxattr = OVPN_A_KEYCONF, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_KEY_SWAP, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_key_swap_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_key_swap_nl_policy, + .maxattr = OVPN_A_KEYCONF, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = OVPN_CMD_KEY_DEL, + .pre_doit = ovpn_nl_pre_doit, + .doit = ovpn_nl_key_del_doit, + .post_doit = ovpn_nl_post_doit, + .policy = ovpn_key_del_nl_policy, + .maxattr = OVPN_A_KEYCONF, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, +}; + +static const struct genl_multicast_group ovpn_nl_mcgrps[] = { + [OVPN_NLGRP_PEERS] = { "peers", }, +}; + +struct genl_family ovpn_nl_family __ro_after_init = { + .name = OVPN_FAMILY_NAME, + .version = OVPN_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = ovpn_nl_ops, + .n_split_ops = ARRAY_SIZE(ovpn_nl_ops), + .mcgrps = ovpn_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ovpn_nl_mcgrps), +}; diff --git a/drivers/net/ovpn/netlink-gen.h b/drivers/net/ovpn/netlink-gen.h new file mode 100644 index 000000000000..66a4e4a0a055 --- /dev/null +++ b/drivers/net/ovpn/netlink-gen.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ovpn.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_OVPN_GEN_H +#define _LINUX_OVPN_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/ovpn.h> + +/* Common nested types */ +extern const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1]; +extern const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1]; +extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1]; + +int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +ovpn_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); + +int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_peer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_key_new_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info); +int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info); + +enum { + OVPN_NLGRP_PEERS, +}; + +extern struct genl_family ovpn_nl_family; + +#endif /* _LINUX_OVPN_GEN_H */ diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c new file mode 100644 index 000000000000..bea03913bfb1 --- /dev/null +++ b/drivers/net/ovpn/netlink.c @@ -0,0 +1,1258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/netdevice.h> +#include <linux/types.h> +#include <net/genetlink.h> + +#include <uapi/linux/ovpn.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "netlink.h" +#include "netlink-gen.h" +#include "bind.h" +#include "crypto.h" +#include "peer.h" +#include "socket.h" + +MODULE_ALIAS_GENL_FAMILY(OVPN_FAMILY_NAME); + +/** + * ovpn_get_dev_from_attrs - retrieve the ovpn private data from the netdevice + * a netlink message is targeting + * @net: network namespace where to look for the interface + * @info: generic netlink info from the user request + * @tracker: tracker object to be used for the netdev reference acquisition + * + * Return: the ovpn private data, if found, or an error otherwise + */ +static struct ovpn_priv * +ovpn_get_dev_from_attrs(struct net *net, const struct genl_info *info, + netdevice_tracker *tracker) +{ + struct ovpn_priv *ovpn; + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_IFINDEX)) + return ERR_PTR(-EINVAL); + + ifindex = nla_get_u32(info->attrs[OVPN_A_IFINDEX]); + + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + NL_SET_ERR_MSG_MOD(info->extack, + "ifindex does not match any interface"); + return ERR_PTR(-ENODEV); + } + + if (!ovpn_dev_is_valid(dev)) { + rcu_read_unlock(); + NL_SET_ERR_MSG_MOD(info->extack, + "specified interface is not ovpn"); + NL_SET_BAD_ATTR(info->extack, info->attrs[OVPN_A_IFINDEX]); + return ERR_PTR(-EINVAL); + } + + ovpn = netdev_priv(dev); + netdev_hold(dev, tracker, GFP_ATOMIC); + rcu_read_unlock(); + + return ovpn; +} + +int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + netdevice_tracker *tracker = (netdevice_tracker *)&info->user_ptr[1]; + struct ovpn_priv *ovpn = ovpn_get_dev_from_attrs(genl_info_net(info), + info, tracker); + + if (IS_ERR(ovpn)) + return PTR_ERR(ovpn); + + info->user_ptr[0] = ovpn; + + return 0; +} + +void ovpn_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + netdevice_tracker *tracker = (netdevice_tracker *)&info->user_ptr[1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + + if (ovpn) + netdev_put(ovpn->dev, tracker); +} + +static bool ovpn_nl_attr_sockaddr_remote(struct nlattr **attrs, + struct sockaddr_storage *ss) +{ + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + struct in6_addr *in6; + __be16 port = 0; + __be32 *in; + + ss->ss_family = AF_UNSPEC; + + if (attrs[OVPN_A_PEER_REMOTE_PORT]) + port = nla_get_be16(attrs[OVPN_A_PEER_REMOTE_PORT]); + + if (attrs[OVPN_A_PEER_REMOTE_IPV4]) { + ss->ss_family = AF_INET; + in = nla_data(attrs[OVPN_A_PEER_REMOTE_IPV4]); + } else if (attrs[OVPN_A_PEER_REMOTE_IPV6]) { + ss->ss_family = AF_INET6; + in6 = nla_data(attrs[OVPN_A_PEER_REMOTE_IPV6]); + } else { + return false; + } + + switch (ss->ss_family) { + case AF_INET6: + /* If this is a regular IPv6 just break and move on, + * otherwise switch to AF_INET and extract the IPv4 accordingly + */ + if (!ipv6_addr_v4mapped(in6)) { + sin6 = (struct sockaddr_in6 *)ss; + sin6->sin6_port = port; + memcpy(&sin6->sin6_addr, in6, sizeof(*in6)); + break; + } + + /* v4-mapped-v6 address */ + ss->ss_family = AF_INET; + in = &in6->s6_addr32[3]; + fallthrough; + case AF_INET: + sin = (struct sockaddr_in *)ss; + sin->sin_port = port; + sin->sin_addr.s_addr = *in; + break; + } + + return true; +} + +static u8 *ovpn_nl_attr_local_ip(struct nlattr **attrs) +{ + u8 *addr6; + + if (!attrs[OVPN_A_PEER_LOCAL_IPV4] && !attrs[OVPN_A_PEER_LOCAL_IPV6]) + return NULL; + + if (attrs[OVPN_A_PEER_LOCAL_IPV4]) + return nla_data(attrs[OVPN_A_PEER_LOCAL_IPV4]); + + addr6 = nla_data(attrs[OVPN_A_PEER_LOCAL_IPV6]); + /* this is an IPv4-mapped IPv6 address, therefore extract the actual + * v4 address from the last 4 bytes + */ + if (ipv6_addr_v4mapped((struct in6_addr *)addr6)) + return addr6 + 12; + + return addr6; +} + +static sa_family_t ovpn_nl_family_get(struct nlattr *addr4, + struct nlattr *addr6) +{ + if (addr4) + return AF_INET; + + if (addr6) { + if (ipv6_addr_v4mapped((struct in6_addr *)nla_data(addr6))) + return AF_INET; + return AF_INET6; + } + + return AF_UNSPEC; +} + +static int ovpn_nl_peer_precheck(struct ovpn_priv *ovpn, + struct genl_info *info, + struct nlattr **attrs) +{ + sa_family_t local_fam, remote_fam; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs, + OVPN_A_PEER_ID)) + return -EINVAL; + + if (attrs[OVPN_A_PEER_REMOTE_IPV4] && attrs[OVPN_A_PEER_REMOTE_IPV6]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify both remote IPv4 or IPv6 address"); + return -EINVAL; + } + + if (!attrs[OVPN_A_PEER_REMOTE_IPV4] && + !attrs[OVPN_A_PEER_REMOTE_IPV6] && attrs[OVPN_A_PEER_REMOTE_PORT]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify remote port without IP address"); + return -EINVAL; + } + + if ((attrs[OVPN_A_PEER_REMOTE_IPV4] || + attrs[OVPN_A_PEER_REMOTE_IPV6]) && + !attrs[OVPN_A_PEER_REMOTE_PORT]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify remote IP address without port"); + return -EINVAL; + } + + if (!attrs[OVPN_A_PEER_REMOTE_IPV4] && + attrs[OVPN_A_PEER_LOCAL_IPV4]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify local IPv4 address without remote"); + return -EINVAL; + } + + if (!attrs[OVPN_A_PEER_REMOTE_IPV6] && + attrs[OVPN_A_PEER_LOCAL_IPV6]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify local IPV6 address without remote"); + return -EINVAL; + } + + /* check that local and remote address families are the same even + * after parsing v4mapped IPv6 addresses. + * (if addresses are not provided, family will be AF_UNSPEC and + * the check is skipped) + */ + local_fam = ovpn_nl_family_get(attrs[OVPN_A_PEER_LOCAL_IPV4], + attrs[OVPN_A_PEER_LOCAL_IPV6]); + remote_fam = ovpn_nl_family_get(attrs[OVPN_A_PEER_REMOTE_IPV4], + attrs[OVPN_A_PEER_REMOTE_IPV6]); + if (local_fam != AF_UNSPEC && remote_fam != AF_UNSPEC && + local_fam != remote_fam) { + NL_SET_ERR_MSG_MOD(info->extack, + "mismatching local and remote address families"); + return -EINVAL; + } + + if (remote_fam != AF_INET6 && attrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + NL_SET_ERR_MSG_MOD(info->extack, + "cannot specify scope id without remote IPv6 address"); + return -EINVAL; + } + + /* VPN IPs are needed only in MP mode for selecting the right peer */ + if (ovpn->mode == OVPN_MODE_P2P && (attrs[OVPN_A_PEER_VPN_IPV4] || + attrs[OVPN_A_PEER_VPN_IPV6])) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "unexpected VPN IP in P2P mode"); + return -EINVAL; + } + + if ((attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] && + !attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) || + (!attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] && + attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "keepalive interval and timeout are required together"); + return -EINVAL; + } + + return 0; +} + +/** + * ovpn_nl_peer_modify - modify the peer attributes according to the incoming msg + * @peer: the peer to modify + * @info: generic netlink info from the user request + * @attrs: the attributes from the user request + * + * Return: a negative error code in case of failure, 0 on success or 1 on + * success and the VPN IPs have been modified (requires rehashing in MP + * mode) + */ +static int ovpn_nl_peer_modify(struct ovpn_peer *peer, struct genl_info *info, + struct nlattr **attrs) +{ + struct sockaddr_storage ss = {}; + void *local_ip = NULL; + u32 interv, timeout; + bool rehash = false; + int ret; + + spin_lock_bh(&peer->lock); + + if (ovpn_nl_attr_sockaddr_remote(attrs, &ss)) { + /* we carry the local IP in a generic container. + * ovpn_peer_reset_sockaddr() will properly interpret it + * based on ss.ss_family + */ + local_ip = ovpn_nl_attr_local_ip(attrs); + + /* set peer sockaddr */ + ret = ovpn_peer_reset_sockaddr(peer, &ss, local_ip); + if (ret < 0) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot set peer sockaddr: %d", + ret); + goto err_unlock; + } + dst_cache_reset(&peer->dst_cache); + } + + if (attrs[OVPN_A_PEER_VPN_IPV4]) { + rehash = true; + peer->vpn_addrs.ipv4.s_addr = + nla_get_in_addr(attrs[OVPN_A_PEER_VPN_IPV4]); + } + + if (attrs[OVPN_A_PEER_VPN_IPV6]) { + rehash = true; + peer->vpn_addrs.ipv6 = + nla_get_in6_addr(attrs[OVPN_A_PEER_VPN_IPV6]); + } + + /* when setting the keepalive, both parameters have to be configured */ + if (attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] && + attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) { + interv = nla_get_u32(attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]); + timeout = nla_get_u32(attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]); + ovpn_peer_keepalive_set(peer, interv, timeout); + } + + netdev_dbg(peer->ovpn->dev, + "modify peer id=%u endpoint=%pIScp VPN-IPv4=%pI4 VPN-IPv6=%pI6c\n", + peer->id, &ss, + &peer->vpn_addrs.ipv4.s_addr, &peer->vpn_addrs.ipv6); + + spin_unlock_bh(&peer->lock); + + return rehash ? 1 : 0; +err_unlock: + spin_unlock_bh(&peer->lock); + return ret; +} + +int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_socket *ovpn_sock; + struct socket *sock = NULL; + struct ovpn_peer *peer; + u32 sockfd, peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], + ovpn_peer_nl_policy, info->extack); + if (ret) + return ret; + + ret = ovpn_nl_peer_precheck(ovpn, info, attrs); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs, + OVPN_A_PEER_SOCKET)) + return -EINVAL; + + /* in MP mode VPN IPs are required for selecting the right peer */ + if (ovpn->mode == OVPN_MODE_MP && !attrs[OVPN_A_PEER_VPN_IPV4] && + !attrs[OVPN_A_PEER_VPN_IPV6]) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "VPN IP must be provided in MP mode"); + return -EINVAL; + } + + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); + peer = ovpn_peer_new(ovpn, peer_id); + if (IS_ERR(peer)) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot create new peer object for peer %u: %ld", + peer_id, PTR_ERR(peer)); + return PTR_ERR(peer); + } + + /* lookup the fd in the kernel table and extract the socket object */ + sockfd = nla_get_u32(attrs[OVPN_A_PEER_SOCKET]); + /* sockfd_lookup() increases sock's refcounter */ + sock = sockfd_lookup(sockfd, &ret); + if (!sock) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot lookup peer socket (fd=%u): %d", + sockfd, ret); + ret = -ENOTSOCK; + goto peer_release; + } + + /* Only when using UDP as transport protocol the remote endpoint + * can be configured so that ovpn knows where to send packets to. + */ + if (sock->sk->sk_protocol == IPPROTO_UDP && + !attrs[OVPN_A_PEER_REMOTE_IPV4] && + !attrs[OVPN_A_PEER_REMOTE_IPV6]) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "missing remote IP address for UDP socket"); + sockfd_put(sock); + ret = -EINVAL; + goto peer_release; + } + + /* In case of TCP, the socket is connected to the peer and ovpn + * will just send bytes over it, without the need to specify a + * destination. + */ + if (sock->sk->sk_protocol == IPPROTO_TCP && + (attrs[OVPN_A_PEER_REMOTE_IPV4] || + attrs[OVPN_A_PEER_REMOTE_IPV6])) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "unexpected remote IP address with TCP socket"); + sockfd_put(sock); + ret = -EINVAL; + goto peer_release; + } + + ovpn_sock = ovpn_socket_new(sock, peer); + /* at this point we unconditionally drop the reference to the socket: + * - in case of error, the socket has to be dropped + * - if case of success, the socket is configured and let + * userspace own the reference, so that the latter can + * trigger the final close() + */ + sockfd_put(sock); + if (IS_ERR(ovpn_sock)) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot encapsulate socket: %ld", + PTR_ERR(ovpn_sock)); + ret = -ENOTSOCK; + goto peer_release; + } + + rcu_assign_pointer(peer->sock, ovpn_sock); + + ret = ovpn_nl_peer_modify(peer, info, attrs); + if (ret < 0) + goto sock_release; + + ret = ovpn_peer_add(ovpn, peer); + if (ret < 0) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot add new peer (id=%u) to hashtable: %d", + peer->id, ret); + goto sock_release; + } + + return 0; + +sock_release: + ovpn_socket_release(peer); +peer_release: + /* release right away because peer was not yet hashed, thus it is not + * used in any context + */ + ovpn_peer_release(peer); + + return ret; +} + +int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_socket *sock; + struct ovpn_peer *peer; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], + ovpn_peer_nl_policy, info->extack); + if (ret) + return ret; + + ret = ovpn_nl_peer_precheck(ovpn, info, attrs); + if (ret < 0) + return ret; + + if (attrs[OVPN_A_PEER_SOCKET]) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "socket cannot be modified"); + return -EINVAL; + } + + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot find peer with id %u", peer_id); + return -ENOENT; + } + + /* when using a TCP socket the remote IP is not expected */ + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (sock && sock->sock->sk->sk_protocol == IPPROTO_TCP && + (attrs[OVPN_A_PEER_REMOTE_IPV4] || + attrs[OVPN_A_PEER_REMOTE_IPV6])) { + rcu_read_unlock(); + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "unexpected remote IP address with TCP socket"); + ovpn_peer_put(peer); + return -EINVAL; + } + rcu_read_unlock(); + + spin_lock_bh(&ovpn->lock); + ret = ovpn_nl_peer_modify(peer, info, attrs); + if (ret < 0) { + spin_unlock_bh(&ovpn->lock); + ovpn_peer_put(peer); + return ret; + } + + /* ret == 1 means that VPN IPv4/6 has been modified and rehashing + * is required + */ + if (ret > 0) + ovpn_peer_hash_vpn_ip(peer); + spin_unlock_bh(&ovpn->lock); + ovpn_peer_put(peer); + + return 0; +} + +static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info, + const struct ovpn_peer *peer, u32 portid, u32 seq, + int flags) +{ + const struct ovpn_bind *bind; + struct ovpn_socket *sock; + int ret = -EMSGSIZE; + struct nlattr *attr; + __be16 local_port; + void *hdr; + int id; + + hdr = genlmsg_put(skb, portid, seq, &ovpn_nl_family, flags, + OVPN_CMD_PEER_GET); + if (!hdr) + return -ENOBUFS; + + attr = nla_nest_start(skb, OVPN_A_PEER); + if (!attr) + goto err; + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (!sock) { + ret = -EINVAL; + goto err_unlock; + } + + if (!net_eq(genl_info_net(info), sock_net(sock->sock->sk))) { + id = peernet2id_alloc(genl_info_net(info), + sock_net(sock->sock->sk), + GFP_ATOMIC); + if (nla_put_s32(skb, OVPN_A_PEER_SOCKET_NETNSID, id)) + goto err_unlock; + } + local_port = inet_sk(sock->sock->sk)->inet_sport; + rcu_read_unlock(); + + if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id)) + goto err; + + if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) + if (nla_put_in_addr(skb, OVPN_A_PEER_VPN_IPV4, + peer->vpn_addrs.ipv4.s_addr)) + goto err; + + if (!ipv6_addr_equal(&peer->vpn_addrs.ipv6, &in6addr_any)) + if (nla_put_in6_addr(skb, OVPN_A_PEER_VPN_IPV6, + &peer->vpn_addrs.ipv6)) + goto err; + + if (nla_put_u32(skb, OVPN_A_PEER_KEEPALIVE_INTERVAL, + peer->keepalive_interval) || + nla_put_u32(skb, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + peer->keepalive_timeout)) + goto err; + + rcu_read_lock(); + bind = rcu_dereference(peer->bind); + if (bind) { + if (bind->remote.in4.sin_family == AF_INET) { + if (nla_put_in_addr(skb, OVPN_A_PEER_REMOTE_IPV4, + bind->remote.in4.sin_addr.s_addr) || + nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT, + bind->remote.in4.sin_port) || + nla_put_in_addr(skb, OVPN_A_PEER_LOCAL_IPV4, + bind->local.ipv4.s_addr)) + goto err_unlock; + } else if (bind->remote.in4.sin_family == AF_INET6) { + if (nla_put_in6_addr(skb, OVPN_A_PEER_REMOTE_IPV6, + &bind->remote.in6.sin6_addr) || + nla_put_u32(skb, OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + bind->remote.in6.sin6_scope_id) || + nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT, + bind->remote.in6.sin6_port) || + nla_put_in6_addr(skb, OVPN_A_PEER_LOCAL_IPV6, + &bind->local.ipv6)) + goto err_unlock; + } + } + rcu_read_unlock(); + + if (nla_put_net16(skb, OVPN_A_PEER_LOCAL_PORT, local_port) || + /* VPN RX stats */ + nla_put_uint(skb, OVPN_A_PEER_VPN_RX_BYTES, + atomic64_read(&peer->vpn_stats.rx.bytes)) || + nla_put_uint(skb, OVPN_A_PEER_VPN_RX_PACKETS, + atomic64_read(&peer->vpn_stats.rx.packets)) || + /* VPN TX stats */ + nla_put_uint(skb, OVPN_A_PEER_VPN_TX_BYTES, + atomic64_read(&peer->vpn_stats.tx.bytes)) || + nla_put_uint(skb, OVPN_A_PEER_VPN_TX_PACKETS, + atomic64_read(&peer->vpn_stats.tx.packets)) || + /* link RX stats */ + nla_put_uint(skb, OVPN_A_PEER_LINK_RX_BYTES, + atomic64_read(&peer->link_stats.rx.bytes)) || + nla_put_uint(skb, OVPN_A_PEER_LINK_RX_PACKETS, + atomic64_read(&peer->link_stats.rx.packets)) || + /* link TX stats */ + nla_put_uint(skb, OVPN_A_PEER_LINK_TX_BYTES, + atomic64_read(&peer->link_stats.tx.bytes)) || + nla_put_uint(skb, OVPN_A_PEER_LINK_TX_PACKETS, + atomic64_read(&peer->link_stats.tx.packets))) + goto err; + + nla_nest_end(skb, attr); + genlmsg_end(skb, hdr); + + return 0; +err_unlock: + rcu_read_unlock(); +err: + genlmsg_cancel(skb, hdr); + return ret; +} + +int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_peer *peer; + struct sk_buff *msg; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], + ovpn_peer_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs, + OVPN_A_PEER_ID)) + return -EINVAL; + + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot find peer with id %u", peer_id); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + ret = ovpn_nl_send_peer(msg, info, peer, info->snd_portid, + info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + goto err; + } + + ret = genlmsg_reply(msg, info); +err: + ovpn_peer_put(peer); + return ret; +} + +int ovpn_nl_peer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + int bkt, last_idx = cb->args[1], dumped = 0; + netdevice_tracker tracker; + struct ovpn_priv *ovpn; + struct ovpn_peer *peer; + + ovpn = ovpn_get_dev_from_attrs(sock_net(cb->skb->sk), info, &tracker); + if (IS_ERR(ovpn)) + return PTR_ERR(ovpn); + + if (ovpn->mode == OVPN_MODE_P2P) { + /* if we already dumped a peer it means we are done */ + if (last_idx) + goto out; + + rcu_read_lock(); + peer = rcu_dereference(ovpn->peer); + if (peer) { + if (ovpn_nl_send_peer(skb, info, peer, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) == 0) + dumped++; + } + rcu_read_unlock(); + } else { + rcu_read_lock(); + hash_for_each_rcu(ovpn->peers->by_id, bkt, peer, + hash_entry_id) { + /* skip already dumped peers that were dumped by + * previous invocations + */ + if (last_idx > 0) { + last_idx--; + continue; + } + + if (ovpn_nl_send_peer(skb, info, peer, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + break; + + /* count peers being dumped during this invocation */ + dumped++; + } + rcu_read_unlock(); + } + +out: + netdev_put(ovpn->dev, &tracker); + + /* sum up peers dumped in this message, so that at the next invocation + * we can continue from where we left + */ + cb->args[1] += dumped; + return skb->len; +} + +int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_peer *peer; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], + ovpn_peer_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs, + OVPN_A_PEER_ID)) + return -EINVAL; + + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot find peer with id %u", peer_id); + return -ENOENT; + } + + netdev_dbg(ovpn->dev, "del peer %u\n", peer->id); + ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE); + ovpn_peer_put(peer); + + return ret; +} + +static int ovpn_nl_get_key_dir(struct genl_info *info, struct nlattr *key, + enum ovpn_cipher_alg cipher, + struct ovpn_key_direction *dir) +{ + struct nlattr *attrs[OVPN_A_KEYDIR_MAX + 1]; + int ret; + + ret = nla_parse_nested(attrs, OVPN_A_KEYDIR_MAX, key, + ovpn_keydir_nl_policy, info->extack); + if (ret) + return ret; + + switch (cipher) { + case OVPN_CIPHER_ALG_AES_GCM: + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + if (NL_REQ_ATTR_CHECK(info->extack, key, attrs, + OVPN_A_KEYDIR_CIPHER_KEY) || + NL_REQ_ATTR_CHECK(info->extack, key, attrs, + OVPN_A_KEYDIR_NONCE_TAIL)) + return -EINVAL; + + dir->cipher_key = nla_data(attrs[OVPN_A_KEYDIR_CIPHER_KEY]); + dir->cipher_key_size = nla_len(attrs[OVPN_A_KEYDIR_CIPHER_KEY]); + + /* These algorithms require a 96bit nonce, + * Construct it by combining 4-bytes packet id and + * 8-bytes nonce-tail from userspace + */ + dir->nonce_tail = nla_data(attrs[OVPN_A_KEYDIR_NONCE_TAIL]); + dir->nonce_tail_size = nla_len(attrs[OVPN_A_KEYDIR_NONCE_TAIL]); + break; + default: + NL_SET_ERR_MSG_MOD(info->extack, "unsupported cipher"); + return -EINVAL; + } + + return 0; +} + +/** + * ovpn_nl_key_new_doit - configure a new key for the specified peer + * @skb: incoming netlink message + * @info: genetlink metadata + * + * This function allows the user to install a new key in the peer crypto + * state. + * Each peer has two 'slots', namely 'primary' and 'secondary', where + * keys can be installed. The key in the 'primary' slot is used for + * encryption, while both keys can be used for decryption by matching the + * key ID carried in the incoming packet. + * + * The user is responsible for rotating keys when necessary. The user + * may fetch peer traffic statistics via netlink in order to better + * identify the right time to rotate keys. + * The renegotiation follows these steps: + * 1. a new key is computed by the user and is installed in the 'secondary' + * slot + * 2. at user discretion (usually after a predetermined time) 'primary' and + * 'secondary' contents are swapped and the new key starts being used for + * encryption, while the old key is kept around for decryption of late + * packets. + * + * Return: 0 on success or a negative error code otherwise. + */ +int ovpn_nl_key_new_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_peer_key_reset pkr; + struct ovpn_peer *peer; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, + info->attrs[OVPN_A_KEYCONF], + ovpn_keyconf_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_PEER_ID)) + return -EINVAL; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_SLOT) || + NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_KEY_ID) || + NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_CIPHER_ALG) || + NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_ENCRYPT_DIR) || + NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_DECRYPT_DIR)) + return -EINVAL; + + pkr.slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]); + pkr.key.key_id = nla_get_u32(attrs[OVPN_A_KEYCONF_KEY_ID]); + pkr.key.cipher_alg = nla_get_u32(attrs[OVPN_A_KEYCONF_CIPHER_ALG]); + + ret = ovpn_nl_get_key_dir(info, attrs[OVPN_A_KEYCONF_ENCRYPT_DIR], + pkr.key.cipher_alg, &pkr.key.encrypt); + if (ret < 0) + return ret; + + ret = ovpn_nl_get_key_dir(info, attrs[OVPN_A_KEYCONF_DECRYPT_DIR], + pkr.key.cipher_alg, &pkr.key.decrypt); + if (ret < 0) + return ret; + + peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "no peer with id %u to set key for", + peer_id); + return -ENOENT; + } + + ret = ovpn_crypto_state_reset(&peer->crypto, &pkr); + if (ret < 0) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot install new key for peer %u", + peer_id); + goto out; + } + + netdev_dbg(ovpn->dev, "new key installed (id=%u) for peer %u\n", + pkr.key.key_id, peer_id); +out: + ovpn_peer_put(peer); + return ret; +} + +static int ovpn_nl_send_key(struct sk_buff *skb, const struct genl_info *info, + u32 peer_id, enum ovpn_key_slot slot, + const struct ovpn_key_config *keyconf) +{ + struct nlattr *attr; + void *hdr; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &ovpn_nl_family, + 0, OVPN_CMD_KEY_GET); + if (!hdr) + return -ENOBUFS; + + attr = nla_nest_start(skb, OVPN_A_KEYCONF); + if (!attr) + goto err; + + if (nla_put_u32(skb, OVPN_A_KEYCONF_PEER_ID, peer_id)) + goto err; + + if (nla_put_u32(skb, OVPN_A_KEYCONF_SLOT, slot) || + nla_put_u32(skb, OVPN_A_KEYCONF_KEY_ID, keyconf->key_id) || + nla_put_u32(skb, OVPN_A_KEYCONF_CIPHER_ALG, keyconf->cipher_alg)) + goto err; + + nla_nest_end(skb, attr); + genlmsg_end(skb, hdr); + + return 0; +err: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct ovpn_key_config keyconf = { 0 }; + enum ovpn_key_slot slot; + struct ovpn_peer *peer; + struct sk_buff *msg; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, + info->attrs[OVPN_A_KEYCONF], + ovpn_keyconf_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_PEER_ID)) + return -EINVAL; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_SLOT)) + return -EINVAL; + + peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot find peer with id %u", peer_id); + return -ENOENT; + } + + slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]); + + ret = ovpn_crypto_config_get(&peer->crypto, slot, &keyconf); + if (ret < 0) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "cannot extract key from slot %u for peer %u", + slot, peer_id); + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + ret = ovpn_nl_send_key(msg, info, peer->id, slot, &keyconf); + if (ret < 0) { + nlmsg_free(msg); + goto err; + } + + ret = genlmsg_reply(msg, info); +err: + ovpn_peer_put(peer); + return ret; +} + +int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct ovpn_priv *ovpn = info->user_ptr[0]; + struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; + struct ovpn_peer *peer; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, + info->attrs[OVPN_A_KEYCONF], + ovpn_keyconf_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_PEER_ID)) + return -EINVAL; + + peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]); + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "no peer with id %u to swap keys for", + peer_id); + return -ENOENT; + } + + ovpn_crypto_key_slots_swap(&peer->crypto); + ovpn_peer_put(peer); + + return 0; +} + +int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1]; + struct ovpn_priv *ovpn = info->user_ptr[0]; + enum ovpn_key_slot slot; + struct ovpn_peer *peer; + u32 peer_id; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF)) + return -EINVAL; + + ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, + info->attrs[OVPN_A_KEYCONF], + ovpn_keyconf_nl_policy, info->extack); + if (ret) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_PEER_ID)) + return -EINVAL; + + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs, + OVPN_A_KEYCONF_SLOT)) + return -EINVAL; + + peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]); + slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]); + + peer = ovpn_peer_get_by_id(ovpn, peer_id); + if (!peer) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "no peer with id %u to delete key for", + peer_id); + return -ENOENT; + } + + ovpn_crypto_key_slot_delete(&peer->crypto, slot); + ovpn_peer_put(peer); + + return 0; +} + +/** + * ovpn_nl_peer_del_notify - notify userspace about peer being deleted + * @peer: the peer being deleted + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_nl_peer_del_notify(struct ovpn_peer *peer) +{ + struct ovpn_socket *sock; + struct sk_buff *msg; + struct nlattr *attr; + int ret = -EMSGSIZE; + void *hdr; + + netdev_info(peer->ovpn->dev, "deleting peer with id %u, reason %d\n", + peer->id, peer->delete_reason); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &ovpn_nl_family, 0, OVPN_CMD_PEER_DEL_NTF); + if (!hdr) { + ret = -ENOBUFS; + goto err_free_msg; + } + + if (nla_put_u32(msg, OVPN_A_IFINDEX, peer->ovpn->dev->ifindex)) + goto err_cancel_msg; + + attr = nla_nest_start(msg, OVPN_A_PEER); + if (!attr) + goto err_cancel_msg; + + if (nla_put_u32(msg, OVPN_A_PEER_DEL_REASON, peer->delete_reason)) + goto err_cancel_msg; + + if (nla_put_u32(msg, OVPN_A_PEER_ID, peer->id)) + goto err_cancel_msg; + + nla_nest_end(msg, attr); + + genlmsg_end(msg, hdr); + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (!sock) { + ret = -EINVAL; + goto err_unlock; + } + genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk), + msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC); + rcu_read_unlock(); + + return 0; + +err_unlock: + rcu_read_unlock(); +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + return ret; +} + +/** + * ovpn_nl_key_swap_notify - notify userspace peer's key must be renewed + * @peer: the peer whose key needs to be renewed + * @key_id: the ID of the key that needs to be renewed + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id) +{ + struct ovpn_socket *sock; + struct nlattr *k_attr; + struct sk_buff *msg; + int ret = -EMSGSIZE; + void *hdr; + + netdev_info(peer->ovpn->dev, "peer with id %u must rekey - primary key unusable.\n", + peer->id); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &ovpn_nl_family, 0, OVPN_CMD_KEY_SWAP_NTF); + if (!hdr) { + ret = -ENOBUFS; + goto err_free_msg; + } + + if (nla_put_u32(msg, OVPN_A_IFINDEX, peer->ovpn->dev->ifindex)) + goto err_cancel_msg; + + k_attr = nla_nest_start(msg, OVPN_A_KEYCONF); + if (!k_attr) + goto err_cancel_msg; + + if (nla_put_u32(msg, OVPN_A_KEYCONF_PEER_ID, peer->id)) + goto err_cancel_msg; + + if (nla_put_u16(msg, OVPN_A_KEYCONF_KEY_ID, key_id)) + goto err_cancel_msg; + + nla_nest_end(msg, k_attr); + genlmsg_end(msg, hdr); + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (!sock) { + ret = -EINVAL; + goto err_unlock; + } + genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk), + msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC); + rcu_read_unlock(); + + return 0; +err_unlock: + rcu_read_unlock(); +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + return ret; +} + +/** + * ovpn_nl_register - perform any needed registration in the NL subsustem + * + * Return: 0 on success, a negative error code otherwise + */ +int __init ovpn_nl_register(void) +{ + int ret = genl_register_family(&ovpn_nl_family); + + if (ret) { + pr_err("ovpn: genl_register_family failed: %d\n", ret); + return ret; + } + + return 0; +} + +/** + * ovpn_nl_unregister - undo any module wide netlink registration + */ +void ovpn_nl_unregister(void) +{ + genl_unregister_family(&ovpn_nl_family); +} diff --git a/drivers/net/ovpn/netlink.h b/drivers/net/ovpn/netlink.h new file mode 100644 index 000000000000..8615dfc3c472 --- /dev/null +++ b/drivers/net/ovpn/netlink.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_NETLINK_H_ +#define _NET_OVPN_NETLINK_H_ + +int ovpn_nl_register(void); +void ovpn_nl_unregister(void); + +int ovpn_nl_peer_del_notify(struct ovpn_peer *peer); +int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id); + +#endif /* _NET_OVPN_NETLINK_H_ */ diff --git a/drivers/net/ovpn/ovpnpriv.h b/drivers/net/ovpn/ovpnpriv.h new file mode 100644 index 000000000000..5898f6adada7 --- /dev/null +++ b/drivers/net/ovpn/ovpnpriv.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNSTRUCT_H_ +#define _NET_OVPN_OVPNSTRUCT_H_ + +#include <linux/workqueue.h> +#include <net/gro_cells.h> +#include <uapi/linux/if_link.h> +#include <uapi/linux/ovpn.h> + +/** + * struct ovpn_peer_collection - container of peers for MultiPeer mode + * @by_id: table of peers index by ID + * @by_vpn_addr4: table of peers indexed by VPN IPv4 address (items can be + * rehashed on the fly due to peer IP change) + * @by_vpn_addr6: table of peers indexed by VPN IPv6 address (items can be + * rehashed on the fly due to peer IP change) + * @by_transp_addr: table of peers indexed by transport address (items can be + * rehashed on the fly due to peer IP change) + */ +struct ovpn_peer_collection { + DECLARE_HASHTABLE(by_id, 12); + struct hlist_nulls_head by_vpn_addr4[1 << 12]; + struct hlist_nulls_head by_vpn_addr6[1 << 12]; + struct hlist_nulls_head by_transp_addr[1 << 12]; +}; + +/** + * struct ovpn_priv - per ovpn interface state + * @dev: the actual netdev representing the tunnel + * @mode: device operation mode (i.e. p2p, mp, ..) + * @lock: protect this object + * @peers: data structures holding multi-peer references + * @peer: in P2P mode, this is the only remote peer + * @gro_cells: pointer to the Generic Receive Offload cell + * @keepalive_work: struct used to schedule keepalive periodic job + */ +struct ovpn_priv { + struct net_device *dev; + enum ovpn_mode mode; + spinlock_t lock; /* protect writing to the ovpn_priv object */ + struct ovpn_peer_collection *peers; + struct ovpn_peer __rcu *peer; + struct gro_cells gro_cells; + struct delayed_work keepalive_work; +}; + +#endif /* _NET_OVPN_OVPNSTRUCT_H_ */ diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c new file mode 100644 index 000000000000..a37f89fffb02 --- /dev/null +++ b/drivers/net/ovpn/peer.c @@ -0,0 +1,1365 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/hashtable.h> +#include <net/ip6_route.h> + +#include "ovpnpriv.h" +#include "bind.h" +#include "pktid.h" +#include "crypto.h" +#include "io.h" +#include "main.h" +#include "netlink.h" +#include "peer.h" +#include "socket.h" + +static void unlock_ovpn(struct ovpn_priv *ovpn, + struct llist_head *release_list) + __releases(&ovpn->lock) +{ + struct ovpn_peer *peer; + + spin_unlock_bh(&ovpn->lock); + + llist_for_each_entry(peer, release_list->first, release_entry) { + ovpn_socket_release(peer); + ovpn_peer_put(peer); + } +} + +/** + * ovpn_peer_keepalive_set - configure keepalive values for peer + * @peer: the peer to configure + * @interval: outgoing keepalive interval + * @timeout: incoming keepalive timeout + */ +void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout) +{ + time64_t now = ktime_get_real_seconds(); + + netdev_dbg(peer->ovpn->dev, + "scheduling keepalive for peer %u: interval=%u timeout=%u\n", + peer->id, interval, timeout); + + peer->keepalive_interval = interval; + WRITE_ONCE(peer->last_sent, now); + peer->keepalive_xmit_exp = now + interval; + + peer->keepalive_timeout = timeout; + WRITE_ONCE(peer->last_recv, now); + peer->keepalive_recv_exp = now + timeout; + + /* now that interval and timeout have been changed, kick + * off the worker so that the next delay can be recomputed + */ + mod_delayed_work(system_wq, &peer->ovpn->keepalive_work, 0); +} + +/** + * ovpn_peer_keepalive_send - periodic worker sending keepalive packets + * @work: pointer to the work member of the related peer object + * + * NOTE: the reference to peer is not dropped because it gets inherited + * by ovpn_xmit_special() + */ +static void ovpn_peer_keepalive_send(struct work_struct *work) +{ + struct ovpn_peer *peer = container_of(work, struct ovpn_peer, + keepalive_work); + + local_bh_disable(); + ovpn_xmit_special(peer, ovpn_keepalive_message, + sizeof(ovpn_keepalive_message)); + local_bh_enable(); +} + +/** + * ovpn_peer_new - allocate and initialize a new peer object + * @ovpn: the openvpn instance inside which the peer should be created + * @id: the ID assigned to this peer + * + * Return: a pointer to the new peer on success or an error code otherwise + */ +struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id) +{ + struct ovpn_peer *peer; + int ret; + + /* alloc and init peer object */ + peer = kzalloc(sizeof(*peer), GFP_KERNEL); + if (!peer) + return ERR_PTR(-ENOMEM); + + peer->id = id; + peer->ovpn = ovpn; + + peer->vpn_addrs.ipv4.s_addr = htonl(INADDR_ANY); + peer->vpn_addrs.ipv6 = in6addr_any; + + RCU_INIT_POINTER(peer->bind, NULL); + ovpn_crypto_state_init(&peer->crypto); + spin_lock_init(&peer->lock); + kref_init(&peer->refcount); + ovpn_peer_stats_init(&peer->vpn_stats); + ovpn_peer_stats_init(&peer->link_stats); + INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send); + + ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL); + if (ret < 0) { + netdev_err(ovpn->dev, + "cannot initialize dst cache for peer %u\n", + peer->id); + kfree(peer); + return ERR_PTR(ret); + } + + netdev_hold(ovpn->dev, &peer->dev_tracker, GFP_KERNEL); + + return peer; +} + +/** + * ovpn_peer_reset_sockaddr - recreate binding for peer + * @peer: peer to recreate the binding for + * @ss: sockaddr to use as remote endpoint for the binding + * @local_ip: local IP for the binding + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer, + const struct sockaddr_storage *ss, + const void *local_ip) +{ + struct ovpn_bind *bind; + size_t ip_len; + + lockdep_assert_held(&peer->lock); + + /* create new ovpn_bind object */ + bind = ovpn_bind_from_sockaddr(ss); + if (IS_ERR(bind)) + return PTR_ERR(bind); + + if (local_ip) { + if (ss->ss_family == AF_INET) { + ip_len = sizeof(struct in_addr); + } else if (ss->ss_family == AF_INET6) { + ip_len = sizeof(struct in6_addr); + } else { + net_dbg_ratelimited("%s: invalid family %u for remote endpoint for peer %u\n", + netdev_name(peer->ovpn->dev), + ss->ss_family, peer->id); + kfree(bind); + return -EINVAL; + } + + memcpy(&bind->local, local_ip, ip_len); + } + + /* set binding */ + ovpn_bind_reset(peer, bind); + + return 0; +} + +/* variable name __tbl2 needs to be different from __tbl1 + * in the macro below to avoid confusing clang + */ +#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \ + typeof(_tbl) *__tbl2 = &(_tbl); \ + jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \ +}) + +#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \ + typeof(_tbl) *__tbl1 = &(_tbl); \ + &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\ +}) + +/** + * ovpn_peer_endpoints_update - update remote or local endpoint for peer + * @peer: peer to update the remote endpoint for + * @skb: incoming packet to retrieve the source/destination address from + */ +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) +{ + struct hlist_nulls_head *nhead; + struct sockaddr_storage ss; + struct sockaddr_in6 *sa6; + bool reset_cache = false; + struct sockaddr_in *sa; + struct ovpn_bind *bind; + const void *local_ip; + size_t salen = 0; + + spin_lock_bh(&peer->lock); + bind = rcu_dereference_protected(peer->bind, + lockdep_is_held(&peer->lock)); + if (unlikely(!bind)) + goto unlock; + + switch (skb->protocol) { + case htons(ETH_P_IP): + /* float check */ + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { + /* unconditionally save local endpoint in case + * of float, as it may have changed as well + */ + local_ip = &ip_hdr(skb)->daddr; + sa = (struct sockaddr_in *)&ss; + sa->sin_family = AF_INET; + sa->sin_addr.s_addr = ip_hdr(skb)->saddr; + sa->sin_port = udp_hdr(skb)->source; + salen = sizeof(*sa); + reset_cache = true; + break; + } + + /* if no float happened, let's double check if the local endpoint + * has changed + */ + if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) { + net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n", + netdev_name(peer->ovpn->dev), + peer->id, &bind->local.ipv4.s_addr, + &ip_hdr(skb)->daddr); + bind->local.ipv4.s_addr = ip_hdr(skb)->daddr; + reset_cache = true; + } + break; + case htons(ETH_P_IPV6): + /* float check */ + if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) { + /* unconditionally save local endpoint in case + * of float, as it may have changed as well + */ + local_ip = &ipv6_hdr(skb)->daddr; + sa6 = (struct sockaddr_in6 *)&ss; + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = ipv6_hdr(skb)->saddr; + sa6->sin6_port = udp_hdr(skb)->source; + sa6->sin6_scope_id = ipv6_iface_scope_id(&ipv6_hdr(skb)->saddr, + skb->skb_iif); + salen = sizeof(*sa6); + reset_cache = true; + break; + } + + /* if no float happened, let's double check if the local endpoint + * has changed + */ + if (unlikely(!ipv6_addr_equal(&bind->local.ipv6, + &ipv6_hdr(skb)->daddr))) { + net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n", + netdev_name(peer->ovpn->dev), + peer->id, &bind->local.ipv6, + &ipv6_hdr(skb)->daddr); + bind->local.ipv6 = ipv6_hdr(skb)->daddr; + reset_cache = true; + } + break; + default: + goto unlock; + } + + if (unlikely(reset_cache)) + dst_cache_reset(&peer->dst_cache); + + /* if the peer did not float, we can bail out now */ + if (likely(!salen)) + goto unlock; + + if (unlikely(ovpn_peer_reset_sockaddr(peer, + (struct sockaddr_storage *)&ss, + local_ip) < 0)) + goto unlock; + + net_dbg_ratelimited("%s: peer %d floated to %pIScp", + netdev_name(peer->ovpn->dev), peer->id, &ss); + + spin_unlock_bh(&peer->lock); + + /* rehashing is required only in MP mode as P2P has one peer + * only and thus there is no hashtable + */ + if (peer->ovpn->mode == OVPN_MODE_MP) { + spin_lock_bh(&peer->ovpn->lock); + spin_lock_bh(&peer->lock); + bind = rcu_dereference_protected(peer->bind, + lockdep_is_held(&peer->lock)); + if (unlikely(!bind)) { + spin_unlock_bh(&peer->lock); + spin_unlock_bh(&peer->ovpn->lock); + return; + } + + /* This function may be invoked concurrently, therefore another + * float may have happened in parallel: perform rehashing + * using the peer->bind->remote directly as key + */ + + switch (bind->remote.in4.sin_family) { + case AF_INET: + salen = sizeof(*sa); + break; + case AF_INET6: + salen = sizeof(*sa6); + break; + } + + /* remove old hashing */ + hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr); + /* re-add with new transport address */ + nhead = ovpn_get_hash_head(peer->ovpn->peers->by_transp_addr, + &bind->remote, salen); + hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead); + spin_unlock_bh(&peer->lock); + spin_unlock_bh(&peer->ovpn->lock); + } + return; +unlock: + spin_unlock_bh(&peer->lock); +} + +/** + * ovpn_peer_release_rcu - RCU callback performing last peer release steps + * @head: RCU member of the ovpn_peer + */ +static void ovpn_peer_release_rcu(struct rcu_head *head) +{ + struct ovpn_peer *peer = container_of(head, struct ovpn_peer, rcu); + + /* this call will immediately free the dst_cache, therefore we + * perform it in the RCU callback, when all contexts are done + */ + dst_cache_destroy(&peer->dst_cache); + kfree(peer); +} + +/** + * ovpn_peer_release - release peer private members + * @peer: the peer to release + */ +void ovpn_peer_release(struct ovpn_peer *peer) +{ + ovpn_crypto_state_release(&peer->crypto); + spin_lock_bh(&peer->lock); + ovpn_bind_reset(peer, NULL); + spin_unlock_bh(&peer->lock); + call_rcu(&peer->rcu, ovpn_peer_release_rcu); + netdev_put(peer->ovpn->dev, &peer->dev_tracker); +} + +/** + * ovpn_peer_release_kref - callback for kref_put + * @kref: the kref object belonging to the peer + */ +void ovpn_peer_release_kref(struct kref *kref) +{ + struct ovpn_peer *peer = container_of(kref, struct ovpn_peer, refcount); + + ovpn_peer_release(peer); +} + +/** + * ovpn_peer_skb_to_sockaddr - fill sockaddr with skb source address + * @skb: the packet to extract data from + * @ss: the sockaddr to fill + * + * Return: sockaddr length on success or -1 otherwise + */ +static int ovpn_peer_skb_to_sockaddr(struct sk_buff *skb, + struct sockaddr_storage *ss) +{ + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa4; + + switch (skb->protocol) { + case htons(ETH_P_IP): + sa4 = (struct sockaddr_in *)ss; + sa4->sin_family = AF_INET; + sa4->sin_addr.s_addr = ip_hdr(skb)->saddr; + sa4->sin_port = udp_hdr(skb)->source; + return sizeof(*sa4); + case htons(ETH_P_IPV6): + sa6 = (struct sockaddr_in6 *)ss; + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = ipv6_hdr(skb)->saddr; + sa6->sin6_port = udp_hdr(skb)->source; + return sizeof(*sa6); + } + + return -1; +} + +/** + * ovpn_nexthop_from_skb4 - retrieve IPv4 nexthop for outgoing skb + * @skb: the outgoing packet + * + * Return: the IPv4 of the nexthop + */ +static __be32 ovpn_nexthop_from_skb4(struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + + if (rt && rt->rt_uses_gateway) + return rt->rt_gw4; + + return ip_hdr(skb)->daddr; +} + +/** + * ovpn_nexthop_from_skb6 - retrieve IPv6 nexthop for outgoing skb + * @skb: the outgoing packet + * + * Return: the IPv6 of the nexthop + */ +static struct in6_addr ovpn_nexthop_from_skb6(struct sk_buff *skb) +{ + const struct rt6_info *rt = skb_rt6_info(skb); + + if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) + return ipv6_hdr(skb)->daddr; + + return rt->rt6i_gateway; +} + +/** + * ovpn_peer_get_by_vpn_addr4 - retrieve peer by its VPN IPv4 address + * @ovpn: the openvpn instance to search + * @addr: VPN IPv4 to use as search key + * + * Refcounter is not increased for the returned peer. + * + * Return: the peer if found or NULL otherwise + */ +static struct ovpn_peer *ovpn_peer_get_by_vpn_addr4(struct ovpn_priv *ovpn, + __be32 addr) +{ + struct hlist_nulls_head *nhead; + struct hlist_nulls_node *ntmp; + struct ovpn_peer *tmp; + unsigned int slot; + +begin: + slot = ovpn_get_hash_slot(ovpn->peers->by_vpn_addr4, &addr, + sizeof(addr)); + nhead = &ovpn->peers->by_vpn_addr4[slot]; + + hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead, hash_entry_addr4) + if (addr == tmp->vpn_addrs.ipv4.s_addr) + return tmp; + + /* item may have moved during lookup - check nulls and restart + * if that's the case + */ + if (get_nulls_value(ntmp) != slot) + goto begin; + + return NULL; +} + +/** + * ovpn_peer_get_by_vpn_addr6 - retrieve peer by its VPN IPv6 address + * @ovpn: the openvpn instance to search + * @addr: VPN IPv6 to use as search key + * + * Refcounter is not increased for the returned peer. + * + * Return: the peer if found or NULL otherwise + */ +static struct ovpn_peer *ovpn_peer_get_by_vpn_addr6(struct ovpn_priv *ovpn, + struct in6_addr *addr) +{ + struct hlist_nulls_head *nhead; + struct hlist_nulls_node *ntmp; + struct ovpn_peer *tmp; + unsigned int slot; + +begin: + slot = ovpn_get_hash_slot(ovpn->peers->by_vpn_addr6, addr, + sizeof(*addr)); + nhead = &ovpn->peers->by_vpn_addr6[slot]; + + hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead, hash_entry_addr6) + if (ipv6_addr_equal(addr, &tmp->vpn_addrs.ipv6)) + return tmp; + + /* item may have moved during lookup - check nulls and restart + * if that's the case + */ + if (get_nulls_value(ntmp) != slot) + goto begin; + + return NULL; +} + +/** + * ovpn_peer_transp_match - check if sockaddr and peer binding match + * @peer: the peer to get the binding from + * @ss: the sockaddr to match + * + * Return: true if sockaddr and binding match or false otherwise + */ +static bool ovpn_peer_transp_match(const struct ovpn_peer *peer, + const struct sockaddr_storage *ss) +{ + struct ovpn_bind *bind = rcu_dereference(peer->bind); + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa4; + + if (unlikely(!bind)) + return false; + + if (ss->ss_family != bind->remote.in4.sin_family) + return false; + + switch (ss->ss_family) { + case AF_INET: + sa4 = (struct sockaddr_in *)ss; + if (sa4->sin_addr.s_addr != bind->remote.in4.sin_addr.s_addr) + return false; + if (sa4->sin_port != bind->remote.in4.sin_port) + return false; + break; + case AF_INET6: + sa6 = (struct sockaddr_in6 *)ss; + if (!ipv6_addr_equal(&sa6->sin6_addr, + &bind->remote.in6.sin6_addr)) + return false; + if (sa6->sin6_port != bind->remote.in6.sin6_port) + return false; + break; + default: + return false; + } + + return true; +} + +/** + * ovpn_peer_get_by_transp_addr_p2p - get peer by transport address in a P2P + * instance + * @ovpn: the openvpn instance to search + * @ss: the transport socket address + * + * Return: the peer if found or NULL otherwise + */ +static struct ovpn_peer * +ovpn_peer_get_by_transp_addr_p2p(struct ovpn_priv *ovpn, + struct sockaddr_storage *ss) +{ + struct ovpn_peer *tmp, *peer = NULL; + + rcu_read_lock(); + tmp = rcu_dereference(ovpn->peer); + if (likely(tmp && ovpn_peer_transp_match(tmp, ss) && + ovpn_peer_hold(tmp))) + peer = tmp; + rcu_read_unlock(); + + return peer; +} + +/** + * ovpn_peer_get_by_transp_addr - retrieve peer by transport address + * @ovpn: the openvpn instance to search + * @skb: the skb to retrieve the source transport address from + * + * Return: a pointer to the peer if found or NULL otherwise + */ +struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_priv *ovpn, + struct sk_buff *skb) +{ + struct ovpn_peer *tmp, *peer = NULL; + struct sockaddr_storage ss = { 0 }; + struct hlist_nulls_head *nhead; + struct hlist_nulls_node *ntmp; + unsigned int slot; + ssize_t sa_len; + + sa_len = ovpn_peer_skb_to_sockaddr(skb, &ss); + if (unlikely(sa_len < 0)) + return NULL; + + if (ovpn->mode == OVPN_MODE_P2P) + return ovpn_peer_get_by_transp_addr_p2p(ovpn, &ss); + + rcu_read_lock(); +begin: + slot = ovpn_get_hash_slot(ovpn->peers->by_transp_addr, &ss, sa_len); + nhead = &ovpn->peers->by_transp_addr[slot]; + + hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead, + hash_entry_transp_addr) { + if (!ovpn_peer_transp_match(tmp, &ss)) + continue; + + if (!ovpn_peer_hold(tmp)) + continue; + + peer = tmp; + break; + } + + /* item may have moved during lookup - check nulls and restart + * if that's the case + */ + if (!peer && get_nulls_value(ntmp) != slot) + goto begin; + rcu_read_unlock(); + + return peer; +} + +/** + * ovpn_peer_get_by_id_p2p - get peer by ID in a P2P instance + * @ovpn: the openvpn instance to search + * @peer_id: the ID of the peer to find + * + * Return: the peer if found or NULL otherwise + */ +static struct ovpn_peer *ovpn_peer_get_by_id_p2p(struct ovpn_priv *ovpn, + u32 peer_id) +{ + struct ovpn_peer *tmp, *peer = NULL; + + rcu_read_lock(); + tmp = rcu_dereference(ovpn->peer); + if (likely(tmp && tmp->id == peer_id && ovpn_peer_hold(tmp))) + peer = tmp; + rcu_read_unlock(); + + return peer; +} + +/** + * ovpn_peer_get_by_id - retrieve peer by ID + * @ovpn: the openvpn instance to search + * @peer_id: the unique peer identifier to match + * + * Return: a pointer to the peer if found or NULL otherwise + */ +struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_priv *ovpn, u32 peer_id) +{ + struct ovpn_peer *tmp, *peer = NULL; + struct hlist_head *head; + + if (ovpn->mode == OVPN_MODE_P2P) + return ovpn_peer_get_by_id_p2p(ovpn, peer_id); + + head = ovpn_get_hash_head(ovpn->peers->by_id, &peer_id, + sizeof(peer_id)); + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, head, hash_entry_id) { + if (tmp->id != peer_id) + continue; + + if (!ovpn_peer_hold(tmp)) + continue; + + peer = tmp; + break; + } + rcu_read_unlock(); + + return peer; +} + +static void ovpn_peer_remove(struct ovpn_peer *peer, + enum ovpn_del_peer_reason reason, + struct llist_head *release_list) +{ + lockdep_assert_held(&peer->ovpn->lock); + + switch (peer->ovpn->mode) { + case OVPN_MODE_MP: + /* prevent double remove */ + if (hlist_unhashed(&peer->hash_entry_id)) + return; + + hlist_del_init_rcu(&peer->hash_entry_id); + hlist_nulls_del_init_rcu(&peer->hash_entry_addr4); + hlist_nulls_del_init_rcu(&peer->hash_entry_addr6); + hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr); + break; + case OVPN_MODE_P2P: + /* prevent double remove */ + if (peer != rcu_access_pointer(peer->ovpn->peer)) + return; + + RCU_INIT_POINTER(peer->ovpn->peer, NULL); + /* in P2P mode the carrier is switched off when the peer is + * deleted so that third party protocols can react accordingly + */ + netif_carrier_off(peer->ovpn->dev); + break; + } + + peer->delete_reason = reason; + ovpn_nl_peer_del_notify(peer); + + /* append to provided list for later socket release and ref drop */ + llist_add(&peer->release_entry, release_list); +} + +/** + * ovpn_peer_get_by_dst - Lookup peer to send skb to + * @ovpn: the private data representing the current VPN session + * @skb: the skb to extract the destination address from + * + * This function takes a tunnel packet and looks up the peer to send it to + * after encapsulation. The skb is expected to be the in-tunnel packet, without + * any OpenVPN related header. + * + * Assume that the IP header is accessible in the skb data. + * + * Return: the peer if found or NULL otherwise. + */ +struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn, + struct sk_buff *skb) +{ + struct ovpn_peer *peer = NULL; + struct in6_addr addr6; + __be32 addr4; + + /* in P2P mode, no matter the destination, packets are always sent to + * the single peer listening on the other side + */ + if (ovpn->mode == OVPN_MODE_P2P) { + rcu_read_lock(); + peer = rcu_dereference(ovpn->peer); + if (unlikely(peer && !ovpn_peer_hold(peer))) + peer = NULL; + rcu_read_unlock(); + return peer; + } + + rcu_read_lock(); + switch (skb->protocol) { + case htons(ETH_P_IP): + addr4 = ovpn_nexthop_from_skb4(skb); + peer = ovpn_peer_get_by_vpn_addr4(ovpn, addr4); + break; + case htons(ETH_P_IPV6): + addr6 = ovpn_nexthop_from_skb6(skb); + peer = ovpn_peer_get_by_vpn_addr6(ovpn, &addr6); + break; + } + + if (unlikely(peer && !ovpn_peer_hold(peer))) + peer = NULL; + rcu_read_unlock(); + + return peer; +} + +/** + * ovpn_nexthop_from_rt4 - look up the IPv4 nexthop for the given destination + * @ovpn: the private data representing the current VPN session + * @dest: the destination to be looked up + * + * Looks up in the IPv4 system routing table the IP of the nexthop to be used + * to reach the destination passed as argument. If no nexthop can be found, the + * destination itself is returned as it probably has to be used as nexthop. + * + * Return: the IP of the next hop if found or dest itself otherwise + */ +static __be32 ovpn_nexthop_from_rt4(struct ovpn_priv *ovpn, __be32 dest) +{ + struct rtable *rt; + struct flowi4 fl = { + .daddr = dest + }; + + rt = ip_route_output_flow(dev_net(ovpn->dev), &fl, NULL); + if (IS_ERR(rt)) { + net_dbg_ratelimited("%s: no route to host %pI4\n", + netdev_name(ovpn->dev), &dest); + /* if we end up here this packet is probably going to be + * thrown away later + */ + return dest; + } + + if (!rt->rt_uses_gateway) + goto out; + + dest = rt->rt_gw4; +out: + ip_rt_put(rt); + return dest; +} + +/** + * ovpn_nexthop_from_rt6 - look up the IPv6 nexthop for the given destination + * @ovpn: the private data representing the current VPN session + * @dest: the destination to be looked up + * + * Looks up in the IPv6 system routing table the IP of the nexthop to be used + * to reach the destination passed as argument. If no nexthop can be found, the + * destination itself is returned as it probably has to be used as nexthop. + * + * Return: the IP of the next hop if found or dest itself otherwise + */ +static struct in6_addr ovpn_nexthop_from_rt6(struct ovpn_priv *ovpn, + struct in6_addr dest) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *entry; + struct rt6_info *rt; + struct flowi6 fl = { + .daddr = dest, + }; + + entry = ipv6_stub->ipv6_dst_lookup_flow(dev_net(ovpn->dev), NULL, &fl, + NULL); + if (IS_ERR(entry)) { + net_dbg_ratelimited("%s: no route to host %pI6c\n", + netdev_name(ovpn->dev), &dest); + /* if we end up here this packet is probably going to be + * thrown away later + */ + return dest; + } + + rt = dst_rt6_info(entry); + + if (!(rt->rt6i_flags & RTF_GATEWAY)) + goto out; + + dest = rt->rt6i_gateway; +out: + dst_release((struct dst_entry *)rt); +#endif + return dest; +} + +/** + * ovpn_peer_check_by_src - check that skb source is routed via peer + * @ovpn: the openvpn instance to search + * @skb: the packet to extract source address from + * @peer: the peer to check against the source address + * + * Return: true if the peer is matching or false otherwise + */ +bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb, + struct ovpn_peer *peer) +{ + bool match = false; + struct in6_addr addr6; + __be32 addr4; + + if (ovpn->mode == OVPN_MODE_P2P) { + /* in P2P mode, no matter the destination, packets are always + * sent to the single peer listening on the other side + */ + return peer == rcu_access_pointer(ovpn->peer); + } + + /* This function performs a reverse path check, therefore we now + * lookup the nexthop we would use if we wanted to route a packet + * to the source IP. If the nexthop matches the sender we know the + * latter is valid and we allow the packet to come in + */ + + switch (skb->protocol) { + case htons(ETH_P_IP): + addr4 = ovpn_nexthop_from_rt4(ovpn, ip_hdr(skb)->saddr); + rcu_read_lock(); + match = (peer == ovpn_peer_get_by_vpn_addr4(ovpn, addr4)); + rcu_read_unlock(); + break; + case htons(ETH_P_IPV6): + addr6 = ovpn_nexthop_from_rt6(ovpn, ipv6_hdr(skb)->saddr); + rcu_read_lock(); + match = (peer == ovpn_peer_get_by_vpn_addr6(ovpn, &addr6)); + rcu_read_unlock(); + break; + } + + return match; +} + +void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer) +{ + struct hlist_nulls_head *nhead; + + lockdep_assert_held(&peer->ovpn->lock); + + /* rehashing makes sense only in multipeer mode */ + if (peer->ovpn->mode != OVPN_MODE_MP) + return; + + if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) { + /* remove potential old hashing */ + hlist_nulls_del_init_rcu(&peer->hash_entry_addr4); + + nhead = ovpn_get_hash_head(peer->ovpn->peers->by_vpn_addr4, + &peer->vpn_addrs.ipv4, + sizeof(peer->vpn_addrs.ipv4)); + hlist_nulls_add_head_rcu(&peer->hash_entry_addr4, nhead); + } + + if (!ipv6_addr_any(&peer->vpn_addrs.ipv6)) { + /* remove potential old hashing */ + hlist_nulls_del_init_rcu(&peer->hash_entry_addr6); + + nhead = ovpn_get_hash_head(peer->ovpn->peers->by_vpn_addr6, + &peer->vpn_addrs.ipv6, + sizeof(peer->vpn_addrs.ipv6)); + hlist_nulls_add_head_rcu(&peer->hash_entry_addr6, nhead); + } +} + +/** + * ovpn_peer_add_mp - add peer to related tables in a MP instance + * @ovpn: the instance to add the peer to + * @peer: the peer to add + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_add_mp(struct ovpn_priv *ovpn, struct ovpn_peer *peer) +{ + struct sockaddr_storage sa = { 0 }; + struct hlist_nulls_head *nhead; + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa4; + struct ovpn_bind *bind; + struct ovpn_peer *tmp; + size_t salen; + int ret = 0; + + spin_lock_bh(&ovpn->lock); + /* do not add duplicates */ + tmp = ovpn_peer_get_by_id(ovpn, peer->id); + if (tmp) { + ovpn_peer_put(tmp); + ret = -EEXIST; + goto out; + } + + bind = rcu_dereference_protected(peer->bind, true); + /* peers connected via TCP have bind == NULL */ + if (bind) { + switch (bind->remote.in4.sin_family) { + case AF_INET: + sa4 = (struct sockaddr_in *)&sa; + + sa4->sin_family = AF_INET; + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; + sa4->sin_port = bind->remote.in4.sin_port; + salen = sizeof(*sa4); + break; + case AF_INET6: + sa6 = (struct sockaddr_in6 *)&sa; + + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = bind->remote.in6.sin6_addr; + sa6->sin6_port = bind->remote.in6.sin6_port; + salen = sizeof(*sa6); + break; + default: + ret = -EPROTONOSUPPORT; + goto out; + } + + nhead = ovpn_get_hash_head(ovpn->peers->by_transp_addr, &sa, + salen); + hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead); + } + + hlist_add_head_rcu(&peer->hash_entry_id, + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, + sizeof(peer->id))); + + ovpn_peer_hash_vpn_ip(peer); +out: + spin_unlock_bh(&ovpn->lock); + return ret; +} + +/** + * ovpn_peer_add_p2p - add peer to related tables in a P2P instance + * @ovpn: the instance to add the peer to + * @peer: the peer to add + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_add_p2p(struct ovpn_priv *ovpn, struct ovpn_peer *peer) +{ + LLIST_HEAD(release_list); + struct ovpn_peer *tmp; + + spin_lock_bh(&ovpn->lock); + /* in p2p mode it is possible to have a single peer only, therefore the + * old one is released and substituted by the new one + */ + tmp = rcu_dereference_protected(ovpn->peer, + lockdep_is_held(&ovpn->lock)); + if (tmp) + ovpn_peer_remove(tmp, OVPN_DEL_PEER_REASON_TEARDOWN, + &release_list); + + rcu_assign_pointer(ovpn->peer, peer); + /* in P2P mode the carrier is switched on when the peer is added */ + netif_carrier_on(ovpn->dev); + unlock_ovpn(ovpn, &release_list); + + return 0; +} + +/** + * ovpn_peer_add - add peer to the related tables + * @ovpn: the openvpn instance the peer belongs to + * @peer: the peer object to add + * + * Assume refcounter was increased by caller + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer) +{ + switch (ovpn->mode) { + case OVPN_MODE_MP: + return ovpn_peer_add_mp(ovpn, peer); + case OVPN_MODE_P2P: + return ovpn_peer_add_p2p(ovpn, peer); + } + + return -EOPNOTSUPP; +} + +/** + * ovpn_peer_del_mp - delete peer from related tables in a MP instance + * @peer: the peer to delete + * @reason: reason why the peer was deleted (sent to userspace) + * @release_list: list where delete peer should be appended + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_del_mp(struct ovpn_peer *peer, + enum ovpn_del_peer_reason reason, + struct llist_head *release_list) +{ + struct ovpn_peer *tmp; + int ret = -ENOENT; + + lockdep_assert_held(&peer->ovpn->lock); + + tmp = ovpn_peer_get_by_id(peer->ovpn, peer->id); + if (tmp == peer) { + ovpn_peer_remove(peer, reason, release_list); + ret = 0; + } + + if (tmp) + ovpn_peer_put(tmp); + + return ret; +} + +/** + * ovpn_peer_del_p2p - delete peer from related tables in a P2P instance + * @peer: the peer to delete + * @reason: reason why the peer was deleted (sent to userspace) + * @release_list: list where delete peer should be appended + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_del_p2p(struct ovpn_peer *peer, + enum ovpn_del_peer_reason reason, + struct llist_head *release_list) +{ + struct ovpn_peer *tmp; + + lockdep_assert_held(&peer->ovpn->lock); + + tmp = rcu_dereference_protected(peer->ovpn->peer, + lockdep_is_held(&peer->ovpn->lock)); + if (tmp != peer) + return -ENOENT; + + ovpn_peer_remove(peer, reason, release_list); + + return 0; +} + +/** + * ovpn_peer_del - delete peer from related tables + * @peer: the peer object to delete + * @reason: reason for deleting peer (will be sent to userspace) + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason) +{ + LLIST_HEAD(release_list); + int ret = -EOPNOTSUPP; + + spin_lock_bh(&peer->ovpn->lock); + switch (peer->ovpn->mode) { + case OVPN_MODE_MP: + ret = ovpn_peer_del_mp(peer, reason, &release_list); + break; + case OVPN_MODE_P2P: + ret = ovpn_peer_del_p2p(peer, reason, &release_list); + break; + default: + break; + } + unlock_ovpn(peer->ovpn, &release_list); + + return ret; +} + +/** + * ovpn_peer_release_p2p - release peer upon P2P device teardown + * @ovpn: the instance being torn down + * @sk: if not NULL, release peer only if it's using this specific socket + * @reason: the reason for releasing the peer + */ +static void ovpn_peer_release_p2p(struct ovpn_priv *ovpn, struct sock *sk, + enum ovpn_del_peer_reason reason) +{ + struct ovpn_socket *ovpn_sock; + LLIST_HEAD(release_list); + struct ovpn_peer *peer; + + spin_lock_bh(&ovpn->lock); + peer = rcu_dereference_protected(ovpn->peer, + lockdep_is_held(&ovpn->lock)); + if (!peer) { + spin_unlock_bh(&ovpn->lock); + return; + } + + if (sk) { + ovpn_sock = rcu_access_pointer(peer->sock); + if (!ovpn_sock || ovpn_sock->sock->sk != sk) { + spin_unlock_bh(&ovpn->lock); + ovpn_peer_put(peer); + return; + } + } + + ovpn_peer_remove(peer, reason, &release_list); + unlock_ovpn(ovpn, &release_list); +} + +static void ovpn_peers_release_mp(struct ovpn_priv *ovpn, struct sock *sk, + enum ovpn_del_peer_reason reason) +{ + struct ovpn_socket *ovpn_sock; + LLIST_HEAD(release_list); + struct ovpn_peer *peer; + struct hlist_node *tmp; + int bkt; + + spin_lock_bh(&ovpn->lock); + hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) { + bool remove = true; + + /* if a socket was passed as argument, skip all peers except + * those using it + */ + if (sk) { + rcu_read_lock(); + ovpn_sock = rcu_dereference(peer->sock); + remove = ovpn_sock && ovpn_sock->sock->sk == sk; + rcu_read_unlock(); + } + + if (remove) + ovpn_peer_remove(peer, reason, &release_list); + } + unlock_ovpn(ovpn, &release_list); +} + +/** + * ovpn_peers_free - free all peers in the instance + * @ovpn: the instance whose peers should be released + * @sk: if not NULL, only peers using this socket are removed and the socket + * is released immediately + * @reason: the reason for releasing all peers + */ +void ovpn_peers_free(struct ovpn_priv *ovpn, struct sock *sk, + enum ovpn_del_peer_reason reason) +{ + switch (ovpn->mode) { + case OVPN_MODE_P2P: + ovpn_peer_release_p2p(ovpn, sk, reason); + break; + case OVPN_MODE_MP: + ovpn_peers_release_mp(ovpn, sk, reason); + break; + } +} + +static time64_t ovpn_peer_keepalive_work_single(struct ovpn_peer *peer, + time64_t now, + struct llist_head *release_list) +{ + time64_t last_recv, last_sent, next_run1, next_run2; + unsigned long timeout, interval; + bool expired; + + spin_lock_bh(&peer->lock); + /* we expect both timers to be configured at the same time, + * therefore bail out if either is not set + */ + if (!peer->keepalive_timeout || !peer->keepalive_interval) { + spin_unlock_bh(&peer->lock); + return 0; + } + + /* check for peer timeout */ + expired = false; + timeout = peer->keepalive_timeout; + last_recv = READ_ONCE(peer->last_recv); + if (now < last_recv + timeout) { + peer->keepalive_recv_exp = last_recv + timeout; + next_run1 = peer->keepalive_recv_exp; + } else if (peer->keepalive_recv_exp > now) { + next_run1 = peer->keepalive_recv_exp; + } else { + expired = true; + } + + if (expired) { + /* peer is dead -> kill it and move on */ + spin_unlock_bh(&peer->lock); + netdev_dbg(peer->ovpn->dev, "peer %u expired\n", + peer->id); + ovpn_peer_remove(peer, OVPN_DEL_PEER_REASON_EXPIRED, + release_list); + return 0; + } + + /* check for peer keepalive */ + expired = false; + interval = peer->keepalive_interval; + last_sent = READ_ONCE(peer->last_sent); + if (now < last_sent + interval) { + peer->keepalive_xmit_exp = last_sent + interval; + next_run2 = peer->keepalive_xmit_exp; + } else if (peer->keepalive_xmit_exp > now) { + next_run2 = peer->keepalive_xmit_exp; + } else { + expired = true; + next_run2 = now + interval; + } + spin_unlock_bh(&peer->lock); + + if (expired) { + /* a keepalive packet is required */ + netdev_dbg(peer->ovpn->dev, + "sending keepalive to peer %u\n", + peer->id); + if (schedule_work(&peer->keepalive_work)) + ovpn_peer_hold(peer); + } + + if (next_run1 < next_run2) + return next_run1; + + return next_run2; +} + +static time64_t ovpn_peer_keepalive_work_mp(struct ovpn_priv *ovpn, + time64_t now, + struct llist_head *release_list) +{ + time64_t tmp_next_run, next_run = 0; + struct hlist_node *tmp; + struct ovpn_peer *peer; + int bkt; + + lockdep_assert_held(&ovpn->lock); + + hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) { + tmp_next_run = ovpn_peer_keepalive_work_single(peer, now, + release_list); + if (!tmp_next_run) + continue; + + /* the next worker run will be scheduled based on the shortest + * required interval across all peers + */ + if (!next_run || tmp_next_run < next_run) + next_run = tmp_next_run; + } + + return next_run; +} + +static time64_t ovpn_peer_keepalive_work_p2p(struct ovpn_priv *ovpn, + time64_t now, + struct llist_head *release_list) +{ + struct ovpn_peer *peer; + time64_t next_run = 0; + + lockdep_assert_held(&ovpn->lock); + + peer = rcu_dereference_protected(ovpn->peer, + lockdep_is_held(&ovpn->lock)); + if (peer) + next_run = ovpn_peer_keepalive_work_single(peer, now, + release_list); + + return next_run; +} + +/** + * ovpn_peer_keepalive_work - run keepalive logic on each known peer + * @work: pointer to the work member of the related ovpn object + * + * Each peer has two timers (if configured): + * 1. peer timeout: when no data is received for a certain interval, + * the peer is considered dead and it gets killed. + * 2. peer keepalive: when no data is sent to a certain peer for a + * certain interval, a special 'keepalive' packet is explicitly sent. + * + * This function iterates across the whole peer collection while + * checking the timers described above. + */ +void ovpn_peer_keepalive_work(struct work_struct *work) +{ + struct ovpn_priv *ovpn = container_of(work, struct ovpn_priv, + keepalive_work.work); + time64_t next_run = 0, now = ktime_get_real_seconds(); + LLIST_HEAD(release_list); + + spin_lock_bh(&ovpn->lock); + switch (ovpn->mode) { + case OVPN_MODE_MP: + next_run = ovpn_peer_keepalive_work_mp(ovpn, now, + &release_list); + break; + case OVPN_MODE_P2P: + next_run = ovpn_peer_keepalive_work_p2p(ovpn, now, + &release_list); + break; + } + + /* prevent rearming if the interface is being destroyed */ + if (next_run > 0 && + READ_ONCE(ovpn->dev->reg_state) == NETREG_REGISTERED) { + netdev_dbg(ovpn->dev, + "scheduling keepalive work: now=%llu next_run=%llu delta=%llu\n", + next_run, now, next_run - now); + schedule_delayed_work(&ovpn->keepalive_work, + (next_run - now) * HZ); + } + unlock_ovpn(ovpn, &release_list); +} diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h new file mode 100644 index 000000000000..a1423f2b09e0 --- /dev/null +++ b/drivers/net/ovpn/peer.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNPEER_H_ +#define _NET_OVPN_OVPNPEER_H_ + +#include <net/dst_cache.h> +#include <net/strparser.h> + +#include "crypto.h" +#include "socket.h" +#include "stats.h" + +/** + * struct ovpn_peer - the main remote peer object + * @ovpn: main openvpn instance this peer belongs to + * @dev_tracker: reference tracker for associated dev + * @id: unique identifier + * @vpn_addrs: IP addresses assigned over the tunnel + * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel + * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel + * @hash_entry_id: entry in the peer ID hashtable + * @hash_entry_addr4: entry in the peer IPv4 hashtable + * @hash_entry_addr6: entry in the peer IPv6 hashtable + * @hash_entry_transp_addr: entry in the peer transport address hashtable + * @sock: the socket being used to talk to this peer + * @tcp: keeps track of TCP specific state + * @tcp.strp: stream parser context (TCP only) + * @tcp.user_queue: received packets that have to go to userspace (TCP only) + * @tcp.out_queue: packets on hold while socket is taken by user (TCP only) + * @tcp.tx_in_progress: true if TX is already ongoing (TCP only) + * @tcp.out_msg.skb: packet scheduled for sending (TCP only) + * @tcp.out_msg.offset: offset where next send should start (TCP only) + * @tcp.out_msg.len: remaining data to send within packet (TCP only) + * @tcp.sk_cb.sk_data_ready: pointer to original cb (TCP only) + * @tcp.sk_cb.sk_write_space: pointer to original cb (TCP only) + * @tcp.sk_cb.prot: pointer to original prot object (TCP only) + * @tcp.sk_cb.ops: pointer to the original prot_ops object (TCP only) + * @crypto: the crypto configuration (ciphers, keys, etc..) + * @dst_cache: cache for dst_entry used to send to peer + * @bind: remote peer binding + * @keepalive_interval: seconds after which a new keepalive should be sent + * @keepalive_xmit_exp: future timestamp when next keepalive should be sent + * @last_sent: timestamp of the last successfully sent packet + * @keepalive_timeout: seconds after which an inactive peer is considered dead + * @keepalive_recv_exp: future timestamp when the peer should expire + * @last_recv: timestamp of the last authenticated received packet + * @vpn_stats: per-peer in-VPN TX/RX stats + * @link_stats: per-peer link/transport TX/RX stats + * @delete_reason: why peer was deleted (i.e. timeout, transport error, ..) + * @lock: protects binding to peer (bind) and keepalive* fields + * @refcount: reference counter + * @rcu: used to free peer in an RCU safe way + * @release_entry: entry for the socket release list + * @keepalive_work: used to schedule keepalive sending + */ +struct ovpn_peer { + struct ovpn_priv *ovpn; + netdevice_tracker dev_tracker; + u32 id; + struct { + struct in_addr ipv4; + struct in6_addr ipv6; + } vpn_addrs; + struct hlist_node hash_entry_id; + struct hlist_nulls_node hash_entry_addr4; + struct hlist_nulls_node hash_entry_addr6; + struct hlist_nulls_node hash_entry_transp_addr; + struct ovpn_socket __rcu *sock; + + struct { + struct strparser strp; + struct sk_buff_head user_queue; + struct sk_buff_head out_queue; + bool tx_in_progress; + + struct { + struct sk_buff *skb; + int offset; + int len; + } out_msg; + + struct { + void (*sk_data_ready)(struct sock *sk); + void (*sk_write_space)(struct sock *sk); + struct proto *prot; + const struct proto_ops *ops; + } sk_cb; + + struct work_struct defer_del_work; + } tcp; + struct ovpn_crypto_state crypto; + struct dst_cache dst_cache; + struct ovpn_bind __rcu *bind; + unsigned long keepalive_interval; + unsigned long keepalive_xmit_exp; + time64_t last_sent; + unsigned long keepalive_timeout; + unsigned long keepalive_recv_exp; + time64_t last_recv; + struct ovpn_peer_stats vpn_stats; + struct ovpn_peer_stats link_stats; + enum ovpn_del_peer_reason delete_reason; + spinlock_t lock; /* protects bind and keepalive* */ + struct kref refcount; + struct rcu_head rcu; + struct llist_node release_entry; + struct work_struct keepalive_work; +}; + +/** + * ovpn_peer_hold - increase reference counter + * @peer: the peer whose counter should be increased + * + * Return: true if the counter was increased or false if it was zero already + */ +static inline bool ovpn_peer_hold(struct ovpn_peer *peer) +{ + return kref_get_unless_zero(&peer->refcount); +} + +void ovpn_peer_release(struct ovpn_peer *peer); +void ovpn_peer_release_kref(struct kref *kref); + +/** + * ovpn_peer_put - decrease reference counter + * @peer: the peer whose counter should be decreased + */ +static inline void ovpn_peer_put(struct ovpn_peer *peer) +{ + kref_put(&peer->refcount, ovpn_peer_release_kref); +} + +struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id); +int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer); +int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason); +void ovpn_peers_free(struct ovpn_priv *ovpn, struct sock *sock, + enum ovpn_del_peer_reason reason); + +struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_priv *ovpn, + struct sk_buff *skb); +struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_priv *ovpn, u32 peer_id); +struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn, + struct sk_buff *skb); +void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer); +bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb, + struct ovpn_peer *peer); + +void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout); +void ovpn_peer_keepalive_work(struct work_struct *work); + +void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb); +int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer, + const struct sockaddr_storage *ss, + const void *local_ip); + +#endif /* _NET_OVPN_OVPNPEER_H_ */ diff --git a/drivers/net/ovpn/pktid.c b/drivers/net/ovpn/pktid.c new file mode 100644 index 000000000000..2f29049897e3 --- /dev/null +++ b/drivers/net/ovpn/pktid.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#include <linux/atomic.h> +#include <linux/jiffies.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/types.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "pktid.h" + +void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid) +{ + atomic_set(&pid->seq_num, 1); +} + +void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr) +{ + memset(pr, 0, sizeof(*pr)); + spin_lock_init(&pr->lock); +} + +/* Packet replay detection. + * Allows ID backtrack of up to REPLAY_WINDOW_SIZE - 1. + */ +int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time) +{ + const unsigned long now = jiffies; + int ret; + + /* ID must not be zero */ + if (unlikely(pkt_id == 0)) + return -EINVAL; + + spin_lock_bh(&pr->lock); + + /* expire backtracks at or below pr->id after PKTID_RECV_EXPIRE time */ + if (unlikely(time_after_eq(now, pr->expire))) + pr->id_floor = pr->id; + + /* time changed? */ + if (unlikely(pkt_time != pr->time)) { + if (pkt_time > pr->time) { + /* time moved forward, accept */ + pr->base = 0; + pr->extent = 0; + pr->id = 0; + pr->time = pkt_time; + pr->id_floor = 0; + } else { + /* time moved backward, reject */ + ret = -ETIME; + goto out; + } + } + + if (likely(pkt_id == pr->id + 1)) { + /* well-formed ID sequence (incremented by 1) */ + pr->base = REPLAY_INDEX(pr->base, -1); + pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + if (pr->extent < REPLAY_WINDOW_SIZE) + ++pr->extent; + pr->id = pkt_id; + } else if (pkt_id > pr->id) { + /* ID jumped forward by more than one */ + const unsigned int delta = pkt_id - pr->id; + + if (delta < REPLAY_WINDOW_SIZE) { + unsigned int i; + + pr->base = REPLAY_INDEX(pr->base, -delta); + pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + pr->extent += delta; + if (pr->extent > REPLAY_WINDOW_SIZE) + pr->extent = REPLAY_WINDOW_SIZE; + for (i = 1; i < delta; ++i) { + unsigned int newb = REPLAY_INDEX(pr->base, i); + + pr->history[newb / 8] &= ~BIT(newb % 8); + } + } else { + pr->base = 0; + pr->extent = REPLAY_WINDOW_SIZE; + memset(pr->history, 0, sizeof(pr->history)); + pr->history[0] = 1; + } + pr->id = pkt_id; + } else { + /* ID backtrack */ + const unsigned int delta = pr->id - pkt_id; + + if (delta > pr->max_backtrack) + pr->max_backtrack = delta; + if (delta < pr->extent) { + if (pkt_id > pr->id_floor) { + const unsigned int ri = REPLAY_INDEX(pr->base, + delta); + u8 *p = &pr->history[ri / 8]; + const u8 mask = (1 << (ri % 8)); + + if (*p & mask) { + ret = -EINVAL; + goto out; + } + *p |= mask; + } else { + ret = -EINVAL; + goto out; + } + } else { + ret = -EINVAL; + goto out; + } + } + + pr->expire = now + PKTID_RECV_EXPIRE; + ret = 0; +out: + spin_unlock_bh(&pr->lock); + return ret; +} diff --git a/drivers/net/ovpn/pktid.h b/drivers/net/ovpn/pktid.h new file mode 100644 index 000000000000..0262d026d15e --- /dev/null +++ b/drivers/net/ovpn/pktid.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNPKTID_H_ +#define _NET_OVPN_OVPNPKTID_H_ + +#include "proto.h" + +/* If no packets received for this length of time, set a backtrack floor + * at highest received packet ID thus far. + */ +#define PKTID_RECV_EXPIRE (30 * HZ) + +/* Packet-ID state for transmitter */ +struct ovpn_pktid_xmit { + atomic_t seq_num; +}; + +/* replay window sizing in bytes = 2^REPLAY_WINDOW_ORDER */ +#define REPLAY_WINDOW_ORDER 8 + +#define REPLAY_WINDOW_BYTES BIT(REPLAY_WINDOW_ORDER) +#define REPLAY_WINDOW_SIZE (REPLAY_WINDOW_BYTES * 8) +#define REPLAY_INDEX(base, i) (((base) + (i)) & (REPLAY_WINDOW_SIZE - 1)) + +/* Packet-ID state for receiver. + * Other than lock member, can be zeroed to initialize. + */ +struct ovpn_pktid_recv { + /* "sliding window" bitmask of recent packet IDs received */ + u8 history[REPLAY_WINDOW_BYTES]; + /* bit position of deque base in history */ + unsigned int base; + /* extent (in bits) of deque in history */ + unsigned int extent; + /* expiration of history in jiffies */ + unsigned long expire; + /* highest sequence number received */ + u32 id; + /* highest time stamp received */ + u32 time; + /* we will only accept backtrack IDs > id_floor */ + u32 id_floor; + unsigned int max_backtrack; + /* protects entire pktd ID state */ + spinlock_t lock; +}; + +/* Get the next packet ID for xmit */ +static inline int ovpn_pktid_xmit_next(struct ovpn_pktid_xmit *pid, u32 *pktid) +{ + const u32 seq_num = atomic_fetch_add_unless(&pid->seq_num, 1, 0); + /* when the 32bit space is over, we return an error because the packet + * ID is used to create the cipher IV and we do not want to reuse the + * same value more than once + */ + if (unlikely(!seq_num)) + return -ERANGE; + + *pktid = seq_num; + + return 0; +} + +/* Write 12-byte AEAD IV to dest */ +static inline void ovpn_pktid_aead_write(const u32 pktid, + const u8 nt[], + unsigned char *dest) +{ + *(__force __be32 *)(dest) = htonl(pktid); + BUILD_BUG_ON(4 + OVPN_NONCE_TAIL_SIZE != OVPN_NONCE_SIZE); + memcpy(dest + 4, nt, OVPN_NONCE_TAIL_SIZE); +} + +void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid); +void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr); + +int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time); + +#endif /* _NET_OVPN_OVPNPKTID_H_ */ diff --git a/drivers/net/ovpn/proto.h b/drivers/net/ovpn/proto.h new file mode 100644 index 000000000000..b7d285b4d9c1 --- /dev/null +++ b/drivers/net/ovpn/proto.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#ifndef _NET_OVPN_PROTO_H_ +#define _NET_OVPN_PROTO_H_ + +#include "main.h" + +#include <linux/bitfield.h> +#include <linux/skbuff.h> + +/* When the OpenVPN protocol is ran in AEAD mode, use + * the OpenVPN packet ID as the AEAD nonce: + * + * 00000005 521c3b01 4308c041 + * [seq # ] [ nonce_tail ] + * [ 12-byte full IV ] -> OVPN_NONCE_SIZE + * [4-bytes -> OVPN_NONCE_WIRE_SIZE + * on wire] + */ + +/* nonce size (96bits) as required by AEAD ciphers */ +#define OVPN_NONCE_SIZE 12 +/* last 8 bytes of AEAD nonce: provided by userspace and usually derived + * from key material generated during TLS handshake + */ +#define OVPN_NONCE_TAIL_SIZE 8 + +/* OpenVPN nonce size reduced by 8-byte nonce tail -- this is the + * size of the AEAD Associated Data (AD) sent over the wire + * and is normally the head of the IV + */ +#define OVPN_NONCE_WIRE_SIZE (OVPN_NONCE_SIZE - OVPN_NONCE_TAIL_SIZE) + +#define OVPN_OPCODE_SIZE 4 /* DATA_V2 opcode size */ +#define OVPN_OPCODE_KEYID_MASK 0x07000000 +#define OVPN_OPCODE_PKTTYPE_MASK 0xF8000000 +#define OVPN_OPCODE_PEERID_MASK 0x00FFFFFF + +/* packet opcodes of interest to us */ +#define OVPN_DATA_V1 6 /* data channel v1 packet */ +#define OVPN_DATA_V2 9 /* data channel v2 packet */ + +#define OVPN_PEER_ID_UNDEF 0x00FFFFFF + +/** + * ovpn_opcode_from_skb - extract OP code from skb at specified offset + * @skb: the packet to extract the OP code from + * @offset: the offset in the data buffer where the OP code is located + * + * Note: this function assumes that the skb head was pulled enough + * to access the first 4 bytes. + * + * Return: the OP code + */ +static inline u8 ovpn_opcode_from_skb(const struct sk_buff *skb, u16 offset) +{ + u32 opcode = be32_to_cpu(*(__be32 *)(skb->data + offset)); + + return FIELD_GET(OVPN_OPCODE_PKTTYPE_MASK, opcode); +} + +/** + * ovpn_peer_id_from_skb - extract peer ID from skb at specified offset + * @skb: the packet to extract the OP code from + * @offset: the offset in the data buffer where the OP code is located + * + * Note: this function assumes that the skb head was pulled enough + * to access the first 4 bytes. + * + * Return: the peer ID + */ +static inline u32 ovpn_peer_id_from_skb(const struct sk_buff *skb, u16 offset) +{ + u32 opcode = be32_to_cpu(*(__be32 *)(skb->data + offset)); + + return FIELD_GET(OVPN_OPCODE_PEERID_MASK, opcode); +} + +/** + * ovpn_key_id_from_skb - extract key ID from the skb head + * @skb: the packet to extract the key ID code from + * + * Note: this function assumes that the skb head was pulled enough + * to access the first 4 bytes. + * + * Return: the key ID + */ +static inline u8 ovpn_key_id_from_skb(const struct sk_buff *skb) +{ + u32 opcode = be32_to_cpu(*(__be32 *)skb->data); + + return FIELD_GET(OVPN_OPCODE_KEYID_MASK, opcode); +} + +/** + * ovpn_opcode_compose - combine OP code, key ID and peer ID to wire format + * @opcode: the OP code + * @key_id: the key ID + * @peer_id: the peer ID + * + * Return: a 4 bytes integer obtained combining all input values following the + * OpenVPN wire format. This integer can then be written to the packet header. + */ +static inline u32 ovpn_opcode_compose(u8 opcode, u8 key_id, u32 peer_id) +{ + return FIELD_PREP(OVPN_OPCODE_PKTTYPE_MASK, opcode) | + FIELD_PREP(OVPN_OPCODE_KEYID_MASK, key_id) | + FIELD_PREP(OVPN_OPCODE_PEERID_MASK, peer_id); +} + +#endif /* _NET_OVPN_OVPNPROTO_H_ */ diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h new file mode 100644 index 000000000000..64430880f1da --- /dev/null +++ b/drivers/net/ovpn/skb.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + * James Yonan <james@openvpn.net> + */ + +#ifndef _NET_OVPN_SKB_H_ +#define _NET_OVPN_SKB_H_ + +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/types.h> + +struct ovpn_cb { + struct ovpn_peer *peer; + struct ovpn_crypto_key_slot *ks; + struct aead_request *req; + struct scatterlist *sg; + u8 *iv; + unsigned int payload_offset; + bool nosignal; +}; + +static inline struct ovpn_cb *ovpn_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ovpn_cb) > sizeof(skb->cb)); + return (struct ovpn_cb *)skb->cb; +} + +/* Return IP protocol version from skb header. + * Return 0 if protocol is not IPv4/IPv6 or cannot be read. + */ +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb) +{ + __be16 proto = 0; + + /* skb could be non-linear, + * make sure IP header is in non-fragmented part + */ + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return 0; + + if (ip_hdr(skb)->version == 4) { + proto = htons(ETH_P_IP); + } else if (ip_hdr(skb)->version == 6) { + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + proto = htons(ETH_P_IPV6); + } + + return proto; +} + +#endif /* _NET_OVPN_SKB_H_ */ diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c new file mode 100644 index 000000000000..a83cbab72591 --- /dev/null +++ b/drivers/net/ovpn/socket.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/udp.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "io.h" +#include "peer.h" +#include "socket.h" +#include "tcp.h" +#include "udp.h" + +static void ovpn_socket_release_kref(struct kref *kref) +{ + struct ovpn_socket *sock = container_of(kref, struct ovpn_socket, + refcount); + + if (sock->sock->sk->sk_protocol == IPPROTO_UDP) + ovpn_udp_socket_detach(sock); + else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) + ovpn_tcp_socket_detach(sock); +} + +/** + * ovpn_socket_put - decrease reference counter + * @peer: peer whose socket reference counter should be decreased + * @sock: the RCU protected peer socket + * + * This function is only used internally. Users willing to release + * references to the ovpn_socket should use ovpn_socket_release() + * + * Return: true if the socket was released, false otherwise + */ +static bool ovpn_socket_put(struct ovpn_peer *peer, struct ovpn_socket *sock) +{ + return kref_put(&sock->refcount, ovpn_socket_release_kref); +} + +/** + * ovpn_socket_release - release resources owned by socket user + * @peer: peer whose socket should be released + * + * This function should be invoked when the peer is being removed + * and wants to drop its link to the socket. + * + * In case of UDP, the detach routine will drop a reference to the + * ovpn netdev, pointed by the ovpn_socket. + * + * In case of TCP, releasing the socket will cause dropping + * the refcounter for the peer it is linked to, thus allowing the peer + * disappear as well. + * + * This function is expected to be invoked exactly once per peer + * + * NOTE: this function may sleep + */ +void ovpn_socket_release(struct ovpn_peer *peer) +{ + struct ovpn_socket *sock; + bool released; + + might_sleep(); + + sock = rcu_replace_pointer(peer->sock, NULL, true); + /* release may be invoked after socket was detached */ + if (!sock) + return; + + /* sanity check: we should not end up here if the socket + * was already closed + */ + if (!sock->sock->sk) { + DEBUG_NET_WARN_ON_ONCE(1); + return; + } + + /* Drop the reference while holding the sock lock to avoid + * concurrent ovpn_socket_new call to mess up with a partially + * detached socket. + * + * Holding the lock ensures that a socket with refcnt 0 is fully + * detached before it can be picked by a concurrent reader. + */ + lock_sock(sock->sock->sk); + released = ovpn_socket_put(peer, sock); + release_sock(sock->sock->sk); + + /* align all readers with sk_user_data being NULL */ + synchronize_rcu(); + + /* following cleanup should happen with lock released */ + if (released) { + if (sock->sock->sk->sk_protocol == IPPROTO_UDP) { + netdev_put(sock->ovpn->dev, &sock->dev_tracker); + } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) { + /* wait for TCP jobs to terminate */ + ovpn_tcp_socket_wait_finish(sock); + ovpn_peer_put(sock->peer); + } + /* we can call plain kfree() because we already waited one RCU + * period due to synchronize_rcu() + */ + kfree(sock); + } +} + +static bool ovpn_socket_hold(struct ovpn_socket *sock) +{ + return kref_get_unless_zero(&sock->refcount); +} + +static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer) +{ + if (sock->sock->sk->sk_protocol == IPPROTO_UDP) + return ovpn_udp_socket_attach(sock, peer->ovpn); + else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) + return ovpn_tcp_socket_attach(sock, peer); + + return -EOPNOTSUPP; +} + +/** + * ovpn_socket_new - create a new socket and initialize it + * @sock: the kernel socket to embed + * @peer: the peer reachable via this socket + * + * Return: an openvpn socket on success or a negative error code otherwise + */ +struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer) +{ + struct ovpn_socket *ovpn_sock; + int ret; + + lock_sock(sock->sk); + + /* a TCP socket can only be owned by a single peer, therefore there + * can't be any other user + */ + if (sock->sk->sk_protocol == IPPROTO_TCP && sock->sk->sk_user_data) { + ovpn_sock = ERR_PTR(-EBUSY); + goto sock_release; + } + + /* a UDP socket can be shared across multiple peers, but we must make + * sure it is not owned by something else + */ + if (sock->sk->sk_protocol == IPPROTO_UDP) { + u8 type = READ_ONCE(udp_sk(sock->sk)->encap_type); + + /* socket owned by other encapsulation module */ + if (type && type != UDP_ENCAP_OVPNINUDP) { + ovpn_sock = ERR_PTR(-EBUSY); + goto sock_release; + } + + rcu_read_lock(); + ovpn_sock = rcu_dereference_sk_user_data(sock->sk); + if (ovpn_sock) { + /* socket owned by another ovpn instance, we can't use it */ + if (ovpn_sock->ovpn != peer->ovpn) { + ovpn_sock = ERR_PTR(-EBUSY); + rcu_read_unlock(); + goto sock_release; + } + + /* this socket is already owned by this instance, + * therefore we can increase the refcounter and + * use it as expected + */ + if (WARN_ON(!ovpn_socket_hold(ovpn_sock))) { + /* this should never happen because setting + * the refcnt to 0 and detaching the socket + * is expected to be atomic + */ + ovpn_sock = ERR_PTR(-EAGAIN); + rcu_read_unlock(); + goto sock_release; + } + + rcu_read_unlock(); + goto sock_release; + } + rcu_read_unlock(); + } + + /* socket is not owned: attach to this ovpn instance */ + + ovpn_sock = kzalloc(sizeof(*ovpn_sock), GFP_KERNEL); + if (!ovpn_sock) { + ovpn_sock = ERR_PTR(-ENOMEM); + goto sock_release; + } + + ovpn_sock->sock = sock; + kref_init(&ovpn_sock->refcount); + + ret = ovpn_socket_attach(ovpn_sock, peer); + if (ret < 0) { + kfree(ovpn_sock); + ovpn_sock = ERR_PTR(ret); + goto sock_release; + } + + /* TCP sockets are per-peer, therefore they are linked to their unique + * peer + */ + if (sock->sk->sk_protocol == IPPROTO_TCP) { + INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work); + ovpn_sock->peer = peer; + ovpn_peer_hold(peer); + } else if (sock->sk->sk_protocol == IPPROTO_UDP) { + /* in UDP we only link the ovpn instance since the socket is + * shared among multiple peers + */ + ovpn_sock->ovpn = peer->ovpn; + netdev_hold(peer->ovpn->dev, &ovpn_sock->dev_tracker, + GFP_KERNEL); + } + + rcu_assign_sk_user_data(sock->sk, ovpn_sock); +sock_release: + release_sock(sock->sk); + return ovpn_sock; +} diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h new file mode 100644 index 000000000000..00d856b1a5d8 --- /dev/null +++ b/drivers/net/ovpn/socket.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_SOCK_H_ +#define _NET_OVPN_SOCK_H_ + +#include <linux/net.h> +#include <linux/kref.h> +#include <net/sock.h> + +struct ovpn_priv; +struct ovpn_peer; + +/** + * struct ovpn_socket - a kernel socket referenced in the ovpn code + * @ovpn: ovpn instance owning this socket (UDP only) + * @dev_tracker: reference tracker for associated dev (UDP only) + * @peer: unique peer transmitting over this socket (TCP only) + * @sock: the low level sock object + * @refcount: amount of contexts currently referencing this object + * @work: member used to schedule release routine (it may block) + * @tcp_tx_work: work for deferring outgoing packet processing (TCP only) + */ +struct ovpn_socket { + union { + struct { + struct ovpn_priv *ovpn; + netdevice_tracker dev_tracker; + }; + struct ovpn_peer *peer; + }; + + struct socket *sock; + struct kref refcount; + struct work_struct work; + struct work_struct tcp_tx_work; +}; + +struct ovpn_socket *ovpn_socket_new(struct socket *sock, + struct ovpn_peer *peer); +void ovpn_socket_release(struct ovpn_peer *peer); + +#endif /* _NET_OVPN_SOCK_H_ */ diff --git a/drivers/net/ovpn/stats.c b/drivers/net/ovpn/stats.c new file mode 100644 index 000000000000..d637143473bb --- /dev/null +++ b/drivers/net/ovpn/stats.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/atomic.h> + +#include "stats.h" + +void ovpn_peer_stats_init(struct ovpn_peer_stats *ps) +{ + atomic64_set(&ps->rx.bytes, 0); + atomic64_set(&ps->rx.packets, 0); + + atomic64_set(&ps->tx.bytes, 0); + atomic64_set(&ps->tx.packets, 0); +} diff --git a/drivers/net/ovpn/stats.h b/drivers/net/ovpn/stats.h new file mode 100644 index 000000000000..53433d8b6c33 --- /dev/null +++ b/drivers/net/ovpn/stats.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: James Yonan <james@openvpn.net> + * Antonio Quartulli <antonio@openvpn.net> + * Lev Stipakov <lev@openvpn.net> + */ + +#ifndef _NET_OVPN_OVPNSTATS_H_ +#define _NET_OVPN_OVPNSTATS_H_ + +/* one stat */ +struct ovpn_peer_stat { + atomic64_t bytes; + atomic64_t packets; +}; + +/* rx and tx stats combined */ +struct ovpn_peer_stats { + struct ovpn_peer_stat rx; + struct ovpn_peer_stat tx; +}; + +void ovpn_peer_stats_init(struct ovpn_peer_stats *ps); + +static inline void ovpn_peer_stats_increment(struct ovpn_peer_stat *stat, + const unsigned int n) +{ + atomic64_add(n, &stat->bytes); + atomic64_inc(&stat->packets); +} + +static inline void ovpn_peer_stats_increment_rx(struct ovpn_peer_stats *stats, + const unsigned int n) +{ + ovpn_peer_stats_increment(&stats->rx, n); +} + +static inline void ovpn_peer_stats_increment_tx(struct ovpn_peer_stats *stats, + const unsigned int n) +{ + ovpn_peer_stats_increment(&stats->tx, n); +} + +#endif /* _NET_OVPN_OVPNSTATS_H_ */ diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c new file mode 100644 index 000000000000..7c42d84987ad --- /dev/null +++ b/drivers/net/ovpn/tcp.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/skbuff.h> +#include <net/hotdata.h> +#include <net/inet_common.h> +#include <net/ipv6.h> +#include <net/tcp.h> +#include <net/transp_v6.h> +#include <net/route.h> +#include <trace/events/sock.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "io.h" +#include "peer.h" +#include "proto.h" +#include "skb.h" +#include "tcp.h" + +#define OVPN_TCP_DEPTH_NESTING 2 +#if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "OVPN TCP requires its own lockdep subclass" +#endif + +static struct proto ovpn_tcp_prot __ro_after_init; +static struct proto_ops ovpn_tcp_ops __ro_after_init; +static struct proto ovpn_tcp6_prot __ro_after_init; +static struct proto_ops ovpn_tcp6_ops __ro_after_init; + +static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb) +{ + struct strp_msg *rxm = strp_msg(skb); + __be16 blen; + u16 len; + int err; + + /* when packets are written to the TCP stream, they are prepended with + * two bytes indicating the actual packet size. + * Parse accordingly and return the actual size (including the size + * header) + */ + + if (skb->len < rxm->offset + 2) + return 0; + + err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen)); + if (err < 0) + return err; + + len = be16_to_cpu(blen); + if (len < 2) + return -EINVAL; + + return len + 2; +} + +/* queue skb for sending to userspace via recvmsg on the socket */ +static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, + struct sk_buff *skb) +{ + skb_set_owner_r(skb, sk); + memset(skb->cb, 0, sizeof(skb->cb)); + skb_queue_tail(&peer->tcp.user_queue, skb); + peer->tcp.sk_cb.sk_data_ready(sk); +} + +static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) +{ + struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); + struct strp_msg *msg = strp_msg(skb); + size_t pkt_len = msg->full_len - 2; + size_t off = msg->offset + 2; + u8 opcode; + + /* ensure skb->data points to the beginning of the openvpn packet */ + if (!pskb_pull(skb, off)) { + net_warn_ratelimited("%s: packet too small for peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + goto err; + } + + /* strparser does not trim the skb for us, therefore we do it now */ + if (pskb_trim(skb, pkt_len) != 0) { + net_warn_ratelimited("%s: trimming skb failed for peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + goto err; + } + + /* we need the first 4 bytes of data to be accessible + * to extract the opcode and the key ID later on + */ + if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { + net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + goto err; + } + + /* DATA_V2 packets are handled in kernel, the rest goes to user space */ + opcode = ovpn_opcode_from_skb(skb, 0); + if (unlikely(opcode != OVPN_DATA_V2)) { + if (opcode == OVPN_DATA_V1) { + net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n", + netdev_name(peer->ovpn->dev)); + goto err; + } + + /* The packet size header must be there when sending the packet + * to userspace, therefore we put it back + */ + skb_push(skb, 2); + ovpn_tcp_to_userspace(peer, strp->sk, skb); + return; + } + + /* hold reference to peer as required by ovpn_recv(). + * + * NOTE: in this context we should already be holding a reference to + * this peer, therefore ovpn_peer_hold() is not expected to fail + */ + if (WARN_ON(!ovpn_peer_hold(peer))) + goto err; + + ovpn_recv(peer, skb); + return; +err: + dev_dstats_rx_dropped(peer->ovpn->dev); + kfree_skb(skb); + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); +} + +static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int flags, int *addr_len) +{ + int err = 0, off, copied = 0, ret; + struct ovpn_socket *sock; + struct ovpn_peer *peer; + struct sk_buff *skb; + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { + rcu_read_unlock(); + return -EBADF; + } + peer = sock->peer; + rcu_read_unlock(); + + skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err); + if (!skb) { + if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) { + ret = 0; + goto out; + } + ret = err; + goto out; + } + + copied = len; + if (copied > skb->len) + copied = skb->len; + else if (copied < skb->len) + msg->msg_flags |= MSG_TRUNC; + + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (unlikely(err)) { + kfree_skb(skb); + ret = err; + goto out; + } + + if (flags & MSG_TRUNC) + copied = skb->len; + kfree_skb(skb); + ret = copied; +out: + ovpn_peer_put(peer); + return ret; +} + +void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock) +{ + struct ovpn_peer *peer = ovpn_sock->peer; + struct socket *sock = ovpn_sock->sock; + + strp_stop(&peer->tcp.strp); + skb_queue_purge(&peer->tcp.user_queue); + + /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */ + sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; + sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; + sock->sk->sk_prot = peer->tcp.sk_cb.prot; + sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops; + + rcu_assign_sk_user_data(sock->sk, NULL); +} + +void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock) +{ + struct ovpn_peer *peer = sock->peer; + + /* NOTE: we don't wait for peer->tcp.defer_del_work to finish: + * either the worker is not running or this function + * was invoked by that worker. + */ + + cancel_work_sync(&sock->tcp_tx_work); + strp_done(&peer->tcp.strp); + + skb_queue_purge(&peer->tcp.out_queue); + kfree_skb(peer->tcp.out_msg.skb); + peer->tcp.out_msg.skb = NULL; +} + +static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) +{ + struct sk_buff *skb = peer->tcp.out_msg.skb; + int ret, flags; + + if (!skb) + return; + + if (peer->tcp.tx_in_progress) + return; + + peer->tcp.tx_in_progress = true; + + do { + flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0; + ret = skb_send_sock_locked_with_flags(sk, skb, + peer->tcp.out_msg.offset, + peer->tcp.out_msg.len, + flags); + if (unlikely(ret < 0)) { + if (ret == -EAGAIN) + goto out; + + net_warn_ratelimited("%s: TCP error to peer %u: %d\n", + netdev_name(peer->ovpn->dev), + peer->id, ret); + + /* in case of TCP error we can't recover the VPN + * stream therefore we abort the connection + */ + ovpn_peer_hold(peer); + schedule_work(&peer->tcp.defer_del_work); + + /* we bail out immediately and keep tx_in_progress set + * to true. This way we prevent more TX attempts + * which would lead to more invocations of + * schedule_work() + */ + return; + } + + peer->tcp.out_msg.len -= ret; + peer->tcp.out_msg.offset += ret; + } while (peer->tcp.out_msg.len > 0); + + if (!peer->tcp.out_msg.len) { + preempt_disable(); + dev_dstats_tx_add(peer->ovpn->dev, skb->len); + preempt_enable(); + } + + kfree_skb(peer->tcp.out_msg.skb); + peer->tcp.out_msg.skb = NULL; + peer->tcp.out_msg.len = 0; + peer->tcp.out_msg.offset = 0; + +out: + peer->tcp.tx_in_progress = false; +} + +void ovpn_tcp_tx_work(struct work_struct *work) +{ + struct ovpn_socket *sock; + + sock = container_of(work, struct ovpn_socket, tcp_tx_work); + + lock_sock(sock->sock->sk); + if (sock->peer) + ovpn_tcp_send_sock(sock->peer, sock->sock->sk); + release_sock(sock->sock->sk); +} + +static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, + struct sk_buff *skb) +{ + if (peer->tcp.out_msg.skb) + ovpn_tcp_send_sock(peer, sk); + + if (peer->tcp.out_msg.skb) { + dev_dstats_tx_dropped(peer->ovpn->dev); + kfree_skb(skb); + return; + } + + peer->tcp.out_msg.skb = skb; + peer->tcp.out_msg.len = skb->len; + peer->tcp.out_msg.offset = 0; + ovpn_tcp_send_sock(peer, sk); +} + +void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock, + struct sk_buff *skb) +{ + u16 len = skb->len; + + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len); + + spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING); + if (sock_owned_by_user(sock->sk)) { + if (skb_queue_len(&peer->tcp.out_queue) >= + READ_ONCE(net_hotdata.max_backlog)) { + dev_dstats_tx_dropped(peer->ovpn->dev); + kfree_skb(skb); + goto unlock; + } + __skb_queue_tail(&peer->tcp.out_queue, skb); + } else { + ovpn_tcp_send_sock_skb(peer, sock->sk, skb); + } +unlock: + spin_unlock(&sock->sk->sk_lock.slock); +} + +static void ovpn_tcp_release(struct sock *sk) +{ + struct sk_buff_head queue; + struct ovpn_socket *sock; + struct ovpn_peer *peer; + struct sk_buff *skb; + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (!sock) { + rcu_read_unlock(); + return; + } + + peer = sock->peer; + + /* during initialization this function is called before + * assigning sock->peer + */ + if (unlikely(!peer || !ovpn_peer_hold(peer))) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + __skb_queue_head_init(&queue); + skb_queue_splice_init(&peer->tcp.out_queue, &queue); + + while ((skb = __skb_dequeue(&queue))) + ovpn_tcp_send_sock_skb(peer, sk, skb); + + peer->tcp.sk_cb.prot->release_cb(sk); + ovpn_peer_put(peer); +} + +static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct ovpn_socket *sock; + int ret, linear = PAGE_SIZE; + struct ovpn_peer *peer; + struct sk_buff *skb; + + lock_sock(sk); + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { + rcu_read_unlock(); + release_sock(sk); + return -EIO; + } + rcu_read_unlock(); + peer = sock->peer; + + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) { + ret = -EOPNOTSUPP; + goto peer_free; + } + + if (peer->tcp.out_msg.skb) { + ret = -EAGAIN; + goto peer_free; + } + + if (size < linear) + linear = size; + + skb = sock_alloc_send_pskb(sk, linear, size - linear, + msg->msg_flags & MSG_DONTWAIT, &ret, 0); + if (!skb) { + net_err_ratelimited("%s: skb alloc failed: %d\n", + netdev_name(peer->ovpn->dev), ret); + goto peer_free; + } + + skb_put(skb, linear); + skb->len = size; + skb->data_len = size - linear; + + ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (ret) { + kfree_skb(skb); + net_err_ratelimited("%s: skb copy from iter failed: %d\n", + netdev_name(peer->ovpn->dev), ret); + goto peer_free; + } + + ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL; + ovpn_tcp_send_sock_skb(peer, sk, skb); + ret = size; +peer_free: + release_sock(sk); + ovpn_peer_put(peer); + return ret; +} + +static int ovpn_tcp_disconnect(struct sock *sk, int flags) +{ + return -EBUSY; +} + +static void ovpn_tcp_data_ready(struct sock *sk) +{ + struct ovpn_socket *sock; + + trace_sk_data_ready(sk); + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (likely(sock && sock->peer)) + strp_data_ready(&sock->peer->tcp.strp); + rcu_read_unlock(); +} + +static void ovpn_tcp_write_space(struct sock *sk) +{ + struct ovpn_socket *sock; + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (likely(sock && sock->peer)) { + schedule_work(&sock->tcp_tx_work); + sock->peer->tcp.sk_cb.sk_write_space(sk); + } + rcu_read_unlock(); +} + +static void ovpn_tcp_build_protos(struct proto *new_prot, + struct proto_ops *new_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); + +static void ovpn_tcp_peer_del_work(struct work_struct *work) +{ + struct ovpn_peer *peer = container_of(work, struct ovpn_peer, + tcp.defer_del_work); + + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); + ovpn_peer_put(peer); +} + +/* Set TCP encapsulation callbacks */ +int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock, + struct ovpn_peer *peer) +{ + struct socket *sock = ovpn_sock->sock; + struct strp_callbacks cb = { + .rcv_msg = ovpn_tcp_rcv, + .parse_msg = ovpn_tcp_parse, + }; + int ret; + + /* make sure no pre-existing encapsulation handler exists */ + if (sock->sk->sk_user_data) + return -EBUSY; + + /* only a fully connected socket is expected. Connection should be + * handled in userspace + */ + if (sock->sk->sk_state != TCP_ESTABLISHED) { + net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", + netdev_name(peer->ovpn->dev), + sock->sk->sk_state); + return -EINVAL; + } + + ret = strp_init(&peer->tcp.strp, sock->sk, &cb); + if (ret < 0) { + DEBUG_NET_WARN_ON_ONCE(1); + return ret; + } + + INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work); + + __sk_dst_reset(sock->sk); + skb_queue_head_init(&peer->tcp.user_queue); + skb_queue_head_init(&peer->tcp.out_queue); + + /* save current CBs so that they can be restored upon socket release */ + peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready; + peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space; + peer->tcp.sk_cb.prot = sock->sk->sk_prot; + peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops; + + /* assign our static CBs and prot/ops */ + sock->sk->sk_data_ready = ovpn_tcp_data_ready; + sock->sk->sk_write_space = ovpn_tcp_write_space; + + if (sock->sk->sk_family == AF_INET) { + sock->sk->sk_prot = &ovpn_tcp_prot; + sock->sk->sk_socket->ops = &ovpn_tcp_ops; + } else { + sock->sk->sk_prot = &ovpn_tcp6_prot; + sock->sk->sk_socket->ops = &ovpn_tcp6_ops; + } + + /* avoid using task_frag */ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_use_task_frag = false; + + /* enqueue the RX worker */ + strp_check_rcv(&peer->tcp.strp); + + return 0; +} + +static void ovpn_tcp_close(struct sock *sk, long timeout) +{ + struct ovpn_socket *sock; + struct ovpn_peer *peer; + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { + rcu_read_unlock(); + return; + } + peer = sock->peer; + rcu_read_unlock(); + + ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); + peer->tcp.sk_cb.prot->close(sk, timeout); + ovpn_peer_put(peer); +} + +static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + __poll_t mask = datagram_poll(file, sock, wait); + struct ovpn_socket *ovpn_sock; + + rcu_read_lock(); + ovpn_sock = rcu_dereference_sk_user_data(sock->sk); + if (ovpn_sock && ovpn_sock->peer && + !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue)) + mask |= EPOLLIN | EPOLLRDNORM; + rcu_read_unlock(); + + return mask; +} + +static void ovpn_tcp_build_protos(struct proto *new_prot, + struct proto_ops *new_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(new_prot, orig_prot, sizeof(*new_prot)); + memcpy(new_ops, orig_ops, sizeof(*new_ops)); + new_prot->recvmsg = ovpn_tcp_recvmsg; + new_prot->sendmsg = ovpn_tcp_sendmsg; + new_prot->disconnect = ovpn_tcp_disconnect; + new_prot->close = ovpn_tcp_close; + new_prot->release_cb = ovpn_tcp_release; + new_ops->poll = ovpn_tcp_poll; +} + +/* Initialize TCP static objects */ +void __init ovpn_tcp_init(void) +{ + ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot, + &inet_stream_ops); + +#if IS_ENABLED(CONFIG_IPV6) + ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot, + &inet6_stream_ops); +#endif +} diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h new file mode 100644 index 000000000000..10aefa834cf3 --- /dev/null +++ b/drivers/net/ovpn/tcp.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_TCP_H_ +#define _NET_OVPN_TCP_H_ + +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/types.h> + +#include "peer.h" +#include "skb.h" +#include "socket.h" + +void __init ovpn_tcp_init(void); + +int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock, + struct ovpn_peer *peer); +void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock); +void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock); + +/* Prepare skb and enqueue it for sending to peer. + * + * Preparation consist in prepending the skb payload with its size. + * Required by the OpenVPN protocol in order to extract packets from + * the TCP stream on the receiver side. + */ +void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock, struct sk_buff *skb); +void ovpn_tcp_tx_work(struct work_struct *work); + +#endif /* _NET_OVPN_TCP_H_ */ diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c new file mode 100644 index 000000000000..c9e189056f33 --- /dev/null +++ b/drivers/net/ovpn/udp.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/udp.h> +#include <net/addrconf.h> +#include <net/dst_cache.h> +#include <net/route.h> +#include <net/ipv6_stubs.h> +#include <net/transp_v6.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> + +#include "ovpnpriv.h" +#include "main.h" +#include "bind.h" +#include "io.h" +#include "peer.h" +#include "proto.h" +#include "socket.h" +#include "udp.h" + +/* Retrieve the corresponding ovpn object from a UDP socket + * rcu_read_lock must be held on entry + */ +static struct ovpn_socket *ovpn_socket_from_udp_sock(struct sock *sk) +{ + struct ovpn_socket *ovpn_sock; + + if (unlikely(READ_ONCE(udp_sk(sk)->encap_type) != UDP_ENCAP_OVPNINUDP)) + return NULL; + + ovpn_sock = rcu_dereference_sk_user_data(sk); + if (unlikely(!ovpn_sock)) + return NULL; + + /* make sure that sk matches our stored transport socket */ + if (unlikely(!ovpn_sock->sock || sk != ovpn_sock->sock->sk)) + return NULL; + + return ovpn_sock; +} + +/** + * ovpn_udp_encap_recv - Start processing a received UDP packet. + * @sk: socket over which the packet was received + * @skb: the received packet + * + * If the first byte of the payload is: + * - DATA_V2 the packet is accepted for further processing, + * - DATA_V1 the packet is dropped as not supported, + * - anything else the packet is forwarded to the UDP stack for + * delivery to user space. + * + * Return: + * 0 if skb was consumed or dropped + * >0 if skb should be passed up to userspace as UDP (packet not consumed) + * <0 if skb should be resubmitted as proto -N (packet not consumed) + */ +static int ovpn_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct ovpn_socket *ovpn_sock; + struct ovpn_priv *ovpn; + struct ovpn_peer *peer; + u32 peer_id; + u8 opcode; + + ovpn_sock = ovpn_socket_from_udp_sock(sk); + if (unlikely(!ovpn_sock)) { + net_err_ratelimited("ovpn: %s invoked on non ovpn socket\n", + __func__); + goto drop_noovpn; + } + + ovpn = ovpn_sock->ovpn; + if (unlikely(!ovpn)) { + net_err_ratelimited("ovpn: cannot obtain ovpn object from UDP socket\n"); + goto drop_noovpn; + } + + /* Make sure the first 4 bytes of the skb data buffer after the UDP + * header are accessible. + * They are required to fetch the OP code, the key ID and the peer ID. + */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct udphdr) + + OVPN_OPCODE_SIZE))) { + net_dbg_ratelimited("%s: packet too small from UDP socket\n", + netdev_name(ovpn->dev)); + goto drop; + } + + opcode = ovpn_opcode_from_skb(skb, sizeof(struct udphdr)); + if (unlikely(opcode != OVPN_DATA_V2)) { + /* DATA_V1 is not supported */ + if (opcode == OVPN_DATA_V1) + goto drop; + + /* unknown or control packet: let it bubble up to userspace */ + return 1; + } + + peer_id = ovpn_peer_id_from_skb(skb, sizeof(struct udphdr)); + /* some OpenVPN server implementations send data packets with the + * peer-id set to UNDEF. In this case we skip the peer lookup by peer-id + * and we try with the transport address + */ + if (peer_id == OVPN_PEER_ID_UNDEF) + peer = ovpn_peer_get_by_transp_addr(ovpn, skb); + else + peer = ovpn_peer_get_by_id(ovpn, peer_id); + + if (unlikely(!peer)) + goto drop; + + /* pop off outer UDP header */ + __skb_pull(skb, sizeof(struct udphdr)); + ovpn_recv(peer, skb); + return 0; + +drop: + dev_dstats_rx_dropped(ovpn->dev); +drop_noovpn: + kfree_skb(skb); + return 0; +} + +/** + * ovpn_udp4_output - send IPv4 packet over udp socket + * @peer: the destination peer + * @bind: the binding related to the destination peer + * @cache: dst cache + * @sk: the socket to send the packet over + * @skb: the packet to send + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind, + struct dst_cache *cache, struct sock *sk, + struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi4 fl = { + .saddr = bind->local.ipv4.s_addr, + .daddr = bind->remote.in4.sin_addr.s_addr, + .fl4_sport = inet_sk(sk)->inet_sport, + .fl4_dport = bind->remote.in4.sin_port, + .flowi4_proto = sk->sk_protocol, + .flowi4_mark = sk->sk_mark, + }; + int ret; + + local_bh_disable(); + rt = dst_cache_get_ip4(cache, &fl.saddr); + if (rt) + goto transmit; + + if (unlikely(!inet_confirm_addr(sock_net(sk), NULL, 0, fl.saddr, + RT_SCOPE_HOST))) { + /* we may end up here when the cached address is not usable + * anymore. In this case we reset address/cache and perform a + * new look up + */ + fl.saddr = 0; + spin_lock_bh(&peer->lock); + bind->local.ipv4.s_addr = 0; + spin_unlock_bh(&peer->lock); + dst_cache_reset(cache); + } + + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + if (IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) { + fl.saddr = 0; + spin_lock_bh(&peer->lock); + bind->local.ipv4.s_addr = 0; + spin_unlock_bh(&peer->lock); + dst_cache_reset(cache); + + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + } + + if (IS_ERR(rt)) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: no route to host %pISpc: %d\n", + netdev_name(peer->ovpn->dev), + &bind->remote.in4, + ret); + goto err; + } + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + +transmit: + udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, sk->sk_no_check_tx); + ret = 0; +err: + local_bh_enable(); + return ret; +} + +#if IS_ENABLED(CONFIG_IPV6) +/** + * ovpn_udp6_output - send IPv6 packet over udp socket + * @peer: the destination peer + * @bind: the binding related to the destination peer + * @cache: dst cache + * @sk: the socket to send the packet over + * @skb: the packet to send + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind, + struct dst_cache *cache, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst; + int ret; + + struct flowi6 fl = { + .saddr = bind->local.ipv6, + .daddr = bind->remote.in6.sin6_addr, + .fl6_sport = inet_sk(sk)->inet_sport, + .fl6_dport = bind->remote.in6.sin6_port, + .flowi6_proto = sk->sk_protocol, + .flowi6_mark = sk->sk_mark, + .flowi6_oif = bind->remote.in6.sin6_scope_id, + }; + + local_bh_disable(); + dst = dst_cache_get_ip6(cache, &fl.saddr); + if (dst) + goto transmit; + + if (unlikely(!ipv6_chk_addr(sock_net(sk), &fl.saddr, NULL, 0))) { + /* we may end up here when the cached address is not usable + * anymore. In this case we reset address/cache and perform a + * new look up + */ + fl.saddr = in6addr_any; + spin_lock_bh(&peer->lock); + bind->local.ipv6 = in6addr_any; + spin_unlock_bh(&peer->lock); + dst_cache_reset(cache); + } + + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL); + if (IS_ERR(dst)) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: no route to host %pISpc: %d\n", + netdev_name(peer->ovpn->dev), + &bind->remote.in6, ret); + goto err; + } + dst_cache_set_ip6(cache, dst, &fl.saddr); + +transmit: + udp_tunnel6_xmit_skb(dst, sk, skb, skb->dev, &fl.saddr, &fl.daddr, 0, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, udp_get_no_check6_tx(sk)); + ret = 0; +err: + local_bh_enable(); + return ret; +} +#endif + +/** + * ovpn_udp_output - transmit skb using udp-tunnel + * @peer: the destination peer + * @cache: dst cache + * @sk: the socket to send the packet over + * @skb: the packet to send + * + * rcu_read_lock should be held on entry. + * On return, the skb is consumed. + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_udp_output(struct ovpn_peer *peer, struct dst_cache *cache, + struct sock *sk, struct sk_buff *skb) +{ + struct ovpn_bind *bind; + int ret; + + /* set sk to null if skb is already orphaned */ + if (!skb->destructor) + skb->sk = NULL; + + rcu_read_lock(); + bind = rcu_dereference(peer->bind); + if (unlikely(!bind)) { + net_warn_ratelimited("%s: no bind for remote peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + ret = -ENODEV; + goto out; + } + + switch (bind->remote.in4.sin_family) { + case AF_INET: + ret = ovpn_udp4_output(peer, bind, cache, sk, skb); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + ret = ovpn_udp6_output(peer, bind, cache, sk, skb); + break; +#endif + default: + ret = -EAFNOSUPPORT; + break; + } + +out: + rcu_read_unlock(); + return ret; +} + +/** + * ovpn_udp_send_skb - prepare skb and send it over via UDP + * @peer: the destination peer + * @sock: the RCU protected peer socket + * @skb: the packet to send + */ +void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock, + struct sk_buff *skb) +{ + int ret = -1; + + skb->dev = peer->ovpn->dev; + /* no checksum performed at this layer */ + skb->ip_summed = CHECKSUM_NONE; + + /* get socket info */ + if (unlikely(!sock)) { + net_warn_ratelimited("%s: no sock for remote peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + goto out; + } + + /* crypto layer -> transport (UDP) */ + ret = ovpn_udp_output(peer, &peer->dst_cache, sock->sk, skb); +out: + if (unlikely(ret < 0)) { + kfree_skb(skb); + return; + } +} + +static void ovpn_udp_encap_destroy(struct sock *sk) +{ + struct ovpn_socket *sock; + struct ovpn_priv *ovpn; + + rcu_read_lock(); + sock = rcu_dereference_sk_user_data(sk); + if (!sock || !sock->ovpn) { + rcu_read_unlock(); + return; + } + ovpn = sock->ovpn; + rcu_read_unlock(); + + ovpn_peers_free(ovpn, sk, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); +} + +/** + * ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn + * @ovpn_sock: socket to configure + * @ovpn: the openvp instance to link + * + * After invoking this function, the sock will be controlled by ovpn so that + * any incoming packet may be processed by ovpn first. + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, + struct ovpn_priv *ovpn) +{ + struct udp_tunnel_sock_cfg cfg = { + .encap_type = UDP_ENCAP_OVPNINUDP, + .encap_rcv = ovpn_udp_encap_recv, + .encap_destroy = ovpn_udp_encap_destroy, + }; + struct socket *sock = ovpn_sock->sock; + struct ovpn_socket *old_data; + int ret; + + /* make sure no pre-existing encapsulation handler exists */ + rcu_read_lock(); + old_data = rcu_dereference_sk_user_data(sock->sk); + if (!old_data) { + /* socket is currently unused - we can take it */ + rcu_read_unlock(); + setup_udp_tunnel_sock(sock_net(sock->sk), sock, &cfg); + return 0; + } + + /* socket is in use. We need to understand if it's owned by this ovpn + * instance or by something else. + * In the former case, we can increase the refcounter and happily + * use it, because the same UDP socket is expected to be shared among + * different peers. + * + * Unlikely TCP, a single UDP socket can be used to talk to many remote + * hosts and therefore openvpn instantiates one only for all its peers + */ + if ((READ_ONCE(udp_sk(sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) && + old_data->ovpn == ovpn) { + netdev_dbg(ovpn->dev, + "provided socket already owned by this interface\n"); + ret = -EALREADY; + } else { + netdev_dbg(ovpn->dev, + "provided socket already taken by other user\n"); + ret = -EBUSY; + } + rcu_read_unlock(); + + return ret; +} + +/** + * ovpn_udp_socket_detach - clean udp-tunnel status for this socket + * @ovpn_sock: the socket to clean + */ +void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock) +{ + struct udp_tunnel_sock_cfg cfg = { }; + + setup_udp_tunnel_sock(sock_net(ovpn_sock->sock->sk), ovpn_sock->sock, + &cfg); +} diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h new file mode 100644 index 000000000000..9994eb6e0428 --- /dev/null +++ b/drivers/net/ovpn/udp.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* OpenVPN data channel offload + * + * Copyright (C) 2019-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#ifndef _NET_OVPN_UDP_H_ +#define _NET_OVPN_UDP_H_ + +#include <net/sock.h> + +struct ovpn_peer; +struct ovpn_priv; +struct socket; + +int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, + struct ovpn_priv *ovpn); +void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock); + +void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock, + struct sk_buff *skb); + +#endif /* _NET_OVPN_UDP_H_ */ diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c index f873a92d2445..28e6bc4a1f14 100644 --- a/drivers/net/pfcp.c +++ b/drivers/net/pfcp.c @@ -245,30 +245,21 @@ static int __net_init pfcp_net_init(struct net *net) return 0; } -static void __net_exit pfcp_net_exit(struct net *net) +static void __net_exit pfcp_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { struct pfcp_net *pn = net_generic(net, pfcp_net_id); struct pfcp_dev *pfcp, *pfcp_next; - struct net_device *dev; - LIST_HEAD(list); - - rtnl_lock(); - for_each_netdev(net, dev) - if (dev->rtnl_link_ops == &pfcp_link_ops) - pfcp_dellink(dev, &list); list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list) - pfcp_dellink(pfcp->dev, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + pfcp_dellink(pfcp->dev, dev_to_kill); } static struct pernet_operations pfcp_net_ops = { - .init = pfcp_net_init, - .exit = pfcp_net_exit, - .id = &pfcp_net_id, - .size = sizeof(struct pfcp_net), + .init = pfcp_net_init, + .exit_rtnl = pfcp_net_exit_rtnl, + .id = &pfcp_net_id, + .size = sizeof(struct pfcp_net), }; static int __init pfcp_init(void) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index d29f9f7fd2e1..677d56e06539 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -5,7 +5,6 @@ config PHYLINK tristate - depends on NETDEVICES select PHYLIB select SWPHY help @@ -15,9 +14,7 @@ config PHYLINK menuconfig PHYLIB tristate "PHY Device support and infrastructure" - depends on NETDEVICES select MDIO_DEVICE - select MDIO_DEVRES help Ethernet controllers are usually attached to PHY devices. This option provides infrastructure for diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 23ce205ae91d..59ac3a9a3177 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -3,7 +3,7 @@ libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ linkmode.o phy_link_topology.o \ - phy_package.o phy_caps.o + phy_package.o phy_caps.o mdio_bus_provider.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE @@ -20,7 +20,7 @@ obj-y += stubs.o else obj-$(CONFIG_MDIO_DEVICE) += mdio-bus.o endif -obj-$(CONFIG_MDIO_DEVRES) += mdio_devres.o +obj-$(CONFIG_PHYLIB) += mdio_devres.o libphy-$(CONFIG_SWPHY) += swphy.o libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o libphy-$(CONFIG_OPEN_ALLIANCE_HELPERS) += open_alliance_helpers.o diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c index e9fd24cb7270..57fbd8df9438 100644 --- a/drivers/net/phy/air_en8811h.c +++ b/drivers/net/phy/air_en8811h.c @@ -11,6 +11,7 @@ * Copyright (C) 2023 Airoha Technology Corp. */ +#include <linux/clk-provider.h> #include <linux/phy.h> #include <linux/firmware.h> #include <linux/property.h> @@ -115,6 +116,11 @@ #define EN8811H_GPIO_OUTPUT 0xcf8b8 #define EN8811H_GPIO_OUTPUT_345 (BIT(3) | BIT(4) | BIT(5)) +#define EN8811H_HWTRAP1 0xcf914 +#define EN8811H_HWTRAP1_CKO BIT(12) +#define EN8811H_CLK_CGM 0xcf958 +#define EN8811H_CLK_CGM_CKO BIT(26) + #define EN8811H_FW_CTRL_1 0x0f0018 #define EN8811H_FW_CTRL_1_START 0x0 #define EN8811H_FW_CTRL_1_FINISH 0x1 @@ -142,10 +148,15 @@ struct led { unsigned long state; }; +#define clk_hw_to_en8811h_priv(_hw) \ + container_of(_hw, struct en8811h_priv, hw) + struct en8811h_priv { - u32 firmware_version; - bool mcu_needs_restart; - struct led led[EN8811H_LED_COUNT]; + u32 firmware_version; + bool mcu_needs_restart; + struct led led[EN8811H_LED_COUNT]; + struct clk_hw hw; + struct phy_device *phydev; }; enum { @@ -806,6 +817,86 @@ static int en8811h_led_hw_is_supported(struct phy_device *phydev, u8 index, return 0; }; +static unsigned long en8811h_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent) +{ + struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw); + struct phy_device *phydev = priv->phydev; + u32 pbus_value; + int ret; + + ret = air_buckpbus_reg_read(phydev, EN8811H_HWTRAP1, &pbus_value); + if (ret < 0) + return ret; + + return (pbus_value & EN8811H_HWTRAP1_CKO) ? 50000000 : 25000000; +} + +static int en8811h_clk_enable(struct clk_hw *hw) +{ + struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw); + struct phy_device *phydev = priv->phydev; + + return air_buckpbus_reg_modify(phydev, EN8811H_CLK_CGM, + EN8811H_CLK_CGM_CKO, + EN8811H_CLK_CGM_CKO); +} + +static void en8811h_clk_disable(struct clk_hw *hw) +{ + struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw); + struct phy_device *phydev = priv->phydev; + + air_buckpbus_reg_modify(phydev, EN8811H_CLK_CGM, + EN8811H_CLK_CGM_CKO, 0); +} + +static int en8811h_clk_is_enabled(struct clk_hw *hw) +{ + struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw); + struct phy_device *phydev = priv->phydev; + u32 pbus_value; + int ret; + + ret = air_buckpbus_reg_read(phydev, EN8811H_CLK_CGM, &pbus_value); + if (ret < 0) + return ret; + + return (pbus_value & EN8811H_CLK_CGM_CKO); +} + +static const struct clk_ops en8811h_clk_ops = { + .recalc_rate = en8811h_clk_recalc_rate, + .enable = en8811h_clk_enable, + .disable = en8811h_clk_disable, + .is_enabled = en8811h_clk_is_enabled, +}; + +static int en8811h_clk_provider_setup(struct device *dev, struct clk_hw *hw) +{ + struct clk_init_data init; + int ret; + + if (!IS_ENABLED(CONFIG_COMMON_CLK)) + return 0; + + init.name = devm_kasprintf(dev, GFP_KERNEL, "%s-cko", + fwnode_get_name(dev_fwnode(dev))); + if (!init.name) + return -ENOMEM; + + init.ops = &en8811h_clk_ops; + init.flags = 0; + init.num_parents = 0; + hw->init = &init; + + ret = devm_clk_hw_register(dev, hw); + if (ret) + return ret; + + return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); +} + static int en8811h_probe(struct phy_device *phydev) { struct en8811h_priv *priv; @@ -838,6 +929,12 @@ static int en8811h_probe(struct phy_device *phydev) return ret; } + priv->phydev = phydev; + /* Co-Clock Output */ + ret = en8811h_clk_provider_setup(&phydev->mdio.dev, &priv->hw); + if (ret) + return ret; + /* Configure led gpio pins as output */ ret = air_buckpbus_reg_modify(phydev, EN8811H_GPIO_OUTPUT, EN8811H_GPIO_OUTPUT_345, diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index 08b1c9cc902b..77a48635d7bf 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -516,8 +516,7 @@ static int aqr105_read_status(struct phy_device *phydev) if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) return 0; - /** - * The status register is not immediately correct on line side link up. + /* The status register is not immediately correct on line side link up. * Poll periodically until it reflects the correct ON state. * Only return fail for read error, timeout defaults to OFF state. */ @@ -634,8 +633,7 @@ static int aqr107_read_status(struct phy_device *phydev) if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) return 0; - /** - * The status register is not immediately correct on line side link up. + /* The status register is not immediately correct on line side link up. * Poll periodically until it reflects the correct ON state. * Only return fail for read error, timeout defaults to OFF state. */ diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 85e231451093..daab555721df 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -478,13 +478,6 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests to enable time stamping on both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -513,9 +506,6 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp, return 0; case PTP_CLK_REQ_PEROUT: - /* Reject requests with unsupported flags */ - if (rq->perout.flags) - return -EOPNOTSUPP; if (rq->perout.index >= N_PER_OUT) return -EINVAL; return periodic_output(clock, rq, on, rq->perout.index); @@ -1002,6 +992,9 @@ static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus) clock->caps.n_per_out = N_PER_OUT; clock->caps.n_pins = DP83640_N_PINS; clock->caps.pps = 0; + clock->caps.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; clock->caps.adjfine = ptp_dp83640_adjfine; clock->caps.adjtime = ptp_dp83640_adjtime; clock->caps.gettime64 = ptp_dp83640_gettime; diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index e32013eb0186..01255dada600 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -33,6 +33,7 @@ #define MII_DP83822_MLEDCR 0x25 #define MII_DP83822_LDCTRL 0x403 #define MII_DP83822_LEDCFG1 0x460 +#define MII_DP83822_IOCTRL 0x461 #define MII_DP83822_IOCTRL1 0x462 #define MII_DP83822_IOCTRL2 0x463 #define MII_DP83822_GENCFG 0x465 @@ -118,6 +119,9 @@ #define DP83822_LEDCFG1_LED1_CTRL GENMASK(11, 8) #define DP83822_LEDCFG1_LED3_CTRL GENMASK(7, 4) +/* IOCTRL bits */ +#define DP83822_IOCTRL_MAC_IMPEDANCE_CTRL GENMASK(4, 1) + /* IOCTRL1 bits */ #define DP83822_IOCTRL1_GPIO3_CTRL GENMASK(10, 8) #define DP83822_IOCTRL1_GPIO3_CTRL_LED3 BIT(0) @@ -202,6 +206,7 @@ struct dp83822_private { u32 gpio2_clk_out; bool led_pin_enable[DP83822_MAX_LED_PINS]; int tx_amplitude_100base_tx_index; + int mac_termination_index; }; static int dp83822_config_wol(struct phy_device *phydev, @@ -533,6 +538,12 @@ static int dp83822_config_init(struct phy_device *phydev) FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING, dp83822->tx_amplitude_100base_tx_index)); + if (dp83822->mac_termination_index >= 0) + phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL, + DP83822_IOCTRL_MAC_IMPEDANCE_CTRL, + FIELD_PREP(DP83822_IOCTRL_MAC_IMPEDANCE_CTRL, + dp83822->mac_termination_index)); + err = dp83822_config_init_leds(phydev); if (err) return err; @@ -736,6 +747,10 @@ static const u32 tx_amplitude_100base_tx_gain[] = { 93, 95, 97, 98, 100, 102, 103, 105, }; +static const u32 mac_termination[] = { + 99, 91, 84, 78, 73, 69, 65, 61, 58, 55, 53, 50, 48, 46, 44, 43, +}; + static int dp83822_of_init_leds(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -852,6 +867,23 @@ static int dp83822_of_init(struct phy_device *phydev) } } + ret = phy_get_mac_termination(phydev, dev, &val); + if (!ret) { + for (i = 0; i < ARRAY_SIZE(mac_termination); i++) { + if (mac_termination[i] == val) { + dp83822->mac_termination_index = i; + break; + } + } + + if (dp83822->mac_termination_index < 0) { + phydev_err(phydev, + "Invalid value for mac-termination-ohms property (%u)\n", + val); + return -EINVAL; + } + } + return dp83822_of_init_leds(phydev); } @@ -931,6 +963,7 @@ static int dp8382x_probe(struct phy_device *phydev) return -ENOMEM; dp83822->tx_amplitude_100base_tx_index = -1; + dp83822->mac_termination_index = -1; phydev->priv = dp83822; return 0; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 063266cafe9c..deeefb962566 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -92,11 +92,6 @@ #define DP83867_STRAP_STS1_RESERVED BIT(11) /* STRAP_STS2 bits */ -#define DP83867_STRAP_STS2_CLK_SKEW_TX_MASK GENMASK(6, 4) -#define DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT 4 -#define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK GENMASK(2, 0) -#define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT 0 -#define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2) #define DP83867_STRAP_STS2_STRAP_FLD BIT(10) /* PHY CTRL bits */ @@ -111,10 +106,8 @@ /* RGMIIDCTL bits */ #define DP83867_RGMII_TX_CLK_DELAY_MAX 0xf #define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4 -#define DP83867_RGMII_TX_CLK_DELAY_INV (DP83867_RGMII_TX_CLK_DELAY_MAX + 1) #define DP83867_RGMII_RX_CLK_DELAY_MAX 0xf #define DP83867_RGMII_RX_CLK_DELAY_SHIFT 0 -#define DP83867_RGMII_RX_CLK_DELAY_INV (DP83867_RGMII_RX_CLK_DELAY_MAX + 1) /* IO_MUX_CFG bits */ #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK 0x1f @@ -506,48 +499,6 @@ static int dp83867_config_port_mirroring(struct phy_device *phydev) return 0; } -static int dp83867_verify_rgmii_cfg(struct phy_device *phydev) -{ - struct dp83867_private *dp83867 = phydev->priv; - - /* Existing behavior was to use default pin strapping delay in rgmii - * mode, but rgmii should have meant no delay. Warn existing users. - */ - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) { - const u16 val = phy_read_mmd(phydev, DP83867_DEVADDR, - DP83867_STRAP_STS2); - const u16 txskew = (val & DP83867_STRAP_STS2_CLK_SKEW_TX_MASK) >> - DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT; - const u16 rxskew = (val & DP83867_STRAP_STS2_CLK_SKEW_RX_MASK) >> - DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT; - - if (txskew != DP83867_STRAP_STS2_CLK_SKEW_NONE || - rxskew != DP83867_STRAP_STS2_CLK_SKEW_NONE) - phydev_warn(phydev, - "PHY has delays via pin strapping, but phy-mode = 'rgmii'\n" - "Should be 'rgmii-id' to use internal delays txskew:%x rxskew:%x\n", - txskew, rxskew); - } - - /* RX delay *must* be specified if internal delay of RX is used. */ - if ((phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) && - dp83867->rx_id_delay == DP83867_RGMII_RX_CLK_DELAY_INV) { - phydev_err(phydev, "ti,rx-internal-delay must be specified\n"); - return -EINVAL; - } - - /* TX delay *must* be specified if internal delay of TX is used. */ - if ((phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) && - dp83867->tx_id_delay == DP83867_RGMII_TX_CLK_DELAY_INV) { - phydev_err(phydev, "ti,tx-internal-delay must be specified\n"); - return -EINVAL; - } - - return 0; -} - #if IS_ENABLED(CONFIG_OF_MDIO) static int dp83867_of_init_io_impedance(struct phy_device *phydev) { @@ -631,7 +582,7 @@ static int dp83867_of_init(struct phy_device *phydev) dp83867->sgmii_ref_clk_en = of_property_read_bool(of_node, "ti,sgmii-ref-clock-output-enable"); - dp83867->rx_id_delay = DP83867_RGMII_RX_CLK_DELAY_INV; + dp83867->rx_id_delay = DP83867_RGMIIDCTL_2_00_NS; ret = of_property_read_u32(of_node, "ti,rx-internal-delay", &dp83867->rx_id_delay); if (!ret && dp83867->rx_id_delay > DP83867_RGMII_RX_CLK_DELAY_MAX) { @@ -641,7 +592,7 @@ static int dp83867_of_init(struct phy_device *phydev) return -EINVAL; } - dp83867->tx_id_delay = DP83867_RGMII_TX_CLK_DELAY_INV; + dp83867->tx_id_delay = DP83867_RGMIIDCTL_2_00_NS; ret = of_property_read_u32(of_node, "ti,tx-internal-delay", &dp83867->tx_id_delay); if (!ret && dp83867->tx_id_delay > DP83867_RGMII_TX_CLK_DELAY_MAX) { @@ -761,7 +712,6 @@ static int dp83867_config_init(struct phy_device *phydev) { struct dp83867_private *dp83867 = phydev->priv; int ret, val, bs; - u16 delay; /* Force speed optimization for the PHY even if it strapped */ ret = phy_modify(phydev, DP83867_CFG2, DP83867_DOWNSHIFT_EN, @@ -769,10 +719,6 @@ static int dp83867_config_init(struct phy_device *phydev) if (ret) return ret; - ret = dp83867_verify_rgmii_cfg(phydev); - if (ret) - return ret; - /* RX_DV/RX_CTRL strapped in mode 1 or mode 2 workaround */ if (dp83867->rxctrl_strap_quirk) phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, @@ -836,13 +782,7 @@ static int dp83867_config_init(struct phy_device *phydev) if (ret) return ret; - /* If rgmii mode with no internal delay is selected, we do NOT use - * aligned mode as one might expect. Instead we use the PHY's default - * based on pin strapping. And the "mode 0" default is to *use* - * internal delay with a value of 7 (2.00 ns). - * - * Set up RGMII delays - */ + /* Set up RGMII delays */ val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL); val &= ~(DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN); @@ -857,15 +797,9 @@ static int dp83867_config_init(struct phy_device *phydev) phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL, val); - delay = 0; - if (dp83867->rx_id_delay != DP83867_RGMII_RX_CLK_DELAY_INV) - delay |= dp83867->rx_id_delay; - if (dp83867->tx_id_delay != DP83867_RGMII_TX_CLK_DELAY_INV) - delay |= dp83867->tx_id_delay << - DP83867_RGMII_TX_CLK_DELAY_SHIFT; - phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL, - delay); + dp83867->rx_id_delay | + (dp83867->tx_id_delay << DP83867_RGMII_TX_CLK_DELAY_SHIFT)); } /* If specified, set io impedance */ diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index 23e1f0521f54..f3d83b04c953 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -119,7 +119,6 @@ #define MV88Q2XXX_LED_INDEX_GPIO 1 struct mv88q2xxx_priv { - bool enable_temp; bool enable_led0; }; @@ -482,49 +481,6 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev) return phydev->drv->soft_reset(phydev); } -static int mv88q2xxx_config_init(struct phy_device *phydev) -{ - struct mv88q2xxx_priv *priv = phydev->priv; - int ret; - - /* The 88Q2XXX PHYs do have the extended ability register available, but - * register MDIO_PMA_EXTABLE where they should signalize it does not - * work according to specification. Therefore, we force it here. - */ - phydev->pma_extable = MDIO_PMA_EXTABLE_BT1; - - /* Configure interrupt with default settings, output is driven low for - * active interrupt and high for inactive. - */ - if (phy_interrupt_is_valid(phydev)) { - ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); - if (ret < 0) - return ret; - } - - /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ - if (priv->enable_led0) { - ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_RESET_CTRL, - MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); - if (ret < 0) - return ret; - } - - /* Enable temperature sense */ - if (priv->enable_temp) { - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_TEMP_SENSOR2, - MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); - if (ret < 0) - return ret; - } - - return 0; -} - static int mv88q2xxx_get_sqi(struct phy_device *phydev) { int ret; @@ -667,6 +623,12 @@ static int mv88q2xxx_resume(struct phy_device *phydev) } #if IS_ENABLED(CONFIG_HWMON) +static int mv88q2xxx_enable_temp_sense(struct phy_device *phydev) +{ + return phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2, + MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); +} + static const struct hwmon_channel_info * const mv88q2xxx_hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_ALARM), NULL @@ -762,11 +724,13 @@ static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = { static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { - struct mv88q2xxx_priv *priv = phydev->priv; struct device *dev = &phydev->mdio.dev; struct device *hwmon; + int ret; - priv->enable_temp = true; + ret = mv88q2xxx_enable_temp_sense(phydev); + if (ret < 0) + return ret; hwmon = devm_hwmon_device_register_with_info(dev, NULL, phydev, &mv88q2xxx_hwmon_chip_info, @@ -776,6 +740,11 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) } #else +static int mv88q2xxx_enable_temp_sense(struct phy_device *phydev) +{ + return 0; +} + static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { return 0; @@ -828,6 +797,7 @@ static int mv88q2xxx_leds_probe(struct phy_device *phydev) static int mv88q2xxx_probe(struct phy_device *phydev) { struct mv88q2xxx_priv *priv; + int ret; priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -835,22 +805,53 @@ static int mv88q2xxx_probe(struct phy_device *phydev) phydev->priv = priv; - return 0; + ret = mv88q2xxx_leds_probe(phydev); + if (ret) + return ret; + + return mv88q2xxx_hwmon_probe(phydev); } -static int mv88q222x_probe(struct phy_device *phydev) +static int mv88q2xxx_config_init(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; int ret; - ret = mv88q2xxx_probe(phydev); - if (ret) - return ret; + /* The 88Q2XXX PHYs do have the extended ability register available, but + * register MDIO_PMA_EXTABLE where they should signalize it does not + * work according to specification. Therefore, we force it here. + */ + phydev->pma_extable = MDIO_PMA_EXTABLE_BT1; - ret = mv88q2xxx_leds_probe(phydev); - if (ret) + /* Configure interrupt with default settings, output is driven low for + * active interrupt and high for inactive. + */ + if (phy_interrupt_is_valid(phydev)) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (ret < 0) + return ret; + } + + /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ + if (priv->enable_led0) { + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_RESET_CTRL, + MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); + if (ret < 0) + return ret; + } + + /* Enable temperature sense again. There might have been a hard reset + * of the PHY and in this case the register content is restored to + * defaults and we need to enable it again. + */ + ret = mv88q2xxx_enable_temp_sense(phydev); + if (ret < 0) return ret; - return mv88q2xxx_hwmon_probe(phydev); + return 0; } static int mv88q2110_config_init(struct phy_device *phydev) @@ -1118,7 +1119,7 @@ static struct phy_driver mv88q2xxx_driver[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88q2220", .flags = PHY_POLL_CABLE_TEST, - .probe = mv88q222x_probe, + .probe = mv88q2xxx_probe, .get_features = mv88q2xxx_get_features, .config_aneg = mv88q2xxx_config_aneg, .aneg_done = genphy_c45_aneg_done, diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index ede596c1a69d..f5ccbe33a384 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -8,17 +8,14 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/delay.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/gpio/consumer.h> #include <linux/init.h> -#include <linux/interrupt.h> #include <linux/io.h> #include <linux/kernel.h> -#include <linux/micrel_phy.h> #include <linux/mii.h> #include <linux/mm.h> #include <linux/module.h> @@ -27,7 +24,6 @@ #include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/reset.h> -#include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> @@ -37,8 +33,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/mdio.h> -#include "mdio-boardinfo.h" - static int mdiobus_register_gpiod(struct mdio_device *mdiodev) { /* Deassert the optional reset signal */ @@ -137,45 +131,6 @@ bool mdiobus_is_registered_device(struct mii_bus *bus, int addr) EXPORT_SYMBOL(mdiobus_is_registered_device); /** - * mdiobus_alloc_size - allocate a mii_bus structure - * @size: extra amount of memory to allocate for private storage. - * If non-zero, then bus->priv is points to that memory. - * - * Description: called by a bus driver to allocate an mii_bus - * structure to fill in. - */ -struct mii_bus *mdiobus_alloc_size(size_t size) -{ - struct mii_bus *bus; - size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN); - size_t alloc_size; - int i; - - /* If we alloc extra space, it should be aligned */ - if (size) - alloc_size = aligned_size + size; - else - alloc_size = sizeof(*bus); - - bus = kzalloc(alloc_size, GFP_KERNEL); - if (!bus) - return NULL; - - bus->state = MDIOBUS_ALLOCATED; - if (size) - bus->priv = (void *)bus + aligned_size; - - /* Initialise the interrupts to polling and 64-bit seqcounts */ - for (i = 0; i < PHY_MAX_ADDR; i++) { - bus->irq[i] = PHY_POLL; - u64_stats_init(&bus->stats[i].syncp); - } - - return bus; -} -EXPORT_SYMBOL(mdiobus_alloc_size); - -/** * mdiobus_release - mii_bus device release callback * @d: the target struct device that contains the mii_bus * @@ -403,11 +358,12 @@ static const struct attribute_group *mdio_bus_groups[] = { NULL, }; -static struct class mdio_bus_class = { +const struct class mdio_bus_class = { .name = "mdio_bus", .dev_release = mdiobus_release, .dev_groups = mdio_bus_groups, }; +EXPORT_SYMBOL_GPL(mdio_bus_class); /** * mdio_find_bus - Given the name of a mdiobus, find the mii_bus. @@ -451,422 +407,8 @@ struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np) return d ? to_mii_bus(d) : NULL; } EXPORT_SYMBOL(of_mdio_find_bus); - -/* Walk the list of subnodes of a mdio bus and look for a node that - * matches the mdio device's address with its 'reg' property. If - * found, set the of_node pointer for the mdio device. This allows - * auto-probed phy devices to be supplied with information passed in - * via DT. - * If a PHY package is found, PHY is searched also there. - */ -static int of_mdiobus_find_phy(struct device *dev, struct mdio_device *mdiodev, - struct device_node *np) -{ - struct device_node *child; - - for_each_available_child_of_node(np, child) { - int addr; - - if (of_node_name_eq(child, "ethernet-phy-package")) { - /* Validate PHY package reg presence */ - if (!of_property_present(child, "reg")) { - of_node_put(child); - return -EINVAL; - } - - if (!of_mdiobus_find_phy(dev, mdiodev, child)) { - /* The refcount for the PHY package will be - * incremented later when PHY join the Package. - */ - of_node_put(child); - return 0; - } - - continue; - } - - addr = of_mdio_parse_addr(dev, child); - if (addr < 0) - continue; - - if (addr == mdiodev->addr) { - device_set_node(dev, of_fwnode_handle(child)); - /* The refcount on "child" is passed to the mdio - * device. Do _not_ use of_node_put(child) here. - */ - return 0; - } - } - - return -ENODEV; -} - -static void of_mdiobus_link_mdiodev(struct mii_bus *bus, - struct mdio_device *mdiodev) -{ - struct device *dev = &mdiodev->dev; - - if (dev->of_node || !bus->dev.of_node) - return; - - of_mdiobus_find_phy(dev, mdiodev, bus->dev.of_node); -} -#else /* !IS_ENABLED(CONFIG_OF_MDIO) */ -static inline void of_mdiobus_link_mdiodev(struct mii_bus *mdio, - struct mdio_device *mdiodev) -{ -} #endif -/** - * mdiobus_create_device - create a full MDIO device given - * a mdio_board_info structure - * @bus: MDIO bus to create the devices on - * @bi: mdio_board_info structure describing the devices - * - * Returns 0 on success or < 0 on error. - */ -static int mdiobus_create_device(struct mii_bus *bus, - struct mdio_board_info *bi) -{ - struct mdio_device *mdiodev; - int ret = 0; - - mdiodev = mdio_device_create(bus, bi->mdio_addr); - if (IS_ERR(mdiodev)) - return -ENODEV; - - strscpy(mdiodev->modalias, bi->modalias, - sizeof(mdiodev->modalias)); - mdiodev->bus_match = mdio_device_bus_match; - mdiodev->dev.platform_data = (void *)bi->platform_data; - - ret = mdio_device_register(mdiodev); - if (ret) - mdio_device_free(mdiodev); - - return ret; -} - -static struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr, bool c45) -{ - struct phy_device *phydev = ERR_PTR(-ENODEV); - struct fwnode_handle *fwnode; - char node_name[16]; - int err; - - phydev = get_phy_device(bus, addr, c45); - if (IS_ERR(phydev)) - return phydev; - - /* For DT, see if the auto-probed phy has a corresponding child - * in the bus node, and set the of_node pointer in this case. - */ - of_mdiobus_link_mdiodev(bus, &phydev->mdio); - - /* Search for a swnode for the phy in the swnode hierarchy of the bus. - * If there is no swnode for the phy provided, just ignore it. - */ - if (dev_fwnode(&bus->dev) && !dev_fwnode(&phydev->mdio.dev)) { - snprintf(node_name, sizeof(node_name), "ethernet-phy@%d", - addr); - fwnode = fwnode_get_named_child_node(dev_fwnode(&bus->dev), - node_name); - if (fwnode) - device_set_node(&phydev->mdio.dev, fwnode); - } - - err = phy_device_register(phydev); - if (err) { - phy_device_free(phydev); - return ERR_PTR(-ENODEV); - } - - return phydev; -} - -/** - * mdiobus_scan_c22 - scan one address on a bus for C22 MDIO devices. - * @bus: mii_bus to scan - * @addr: address on bus to scan - * - * This function scans one address on the MDIO bus, looking for - * devices which can be identified using a vendor/product ID in - * registers 2 and 3. Not all MDIO devices have such registers, but - * PHY devices typically do. Hence this function assumes anything - * found is a PHY, or can be treated as a PHY. Other MDIO devices, - * such as switches, will probably not be found during the scan. - */ -struct phy_device *mdiobus_scan_c22(struct mii_bus *bus, int addr) -{ - return mdiobus_scan(bus, addr, false); -} -EXPORT_SYMBOL(mdiobus_scan_c22); - -/** - * mdiobus_scan_c45 - scan one address on a bus for C45 MDIO devices. - * @bus: mii_bus to scan - * @addr: address on bus to scan - * - * This function scans one address on the MDIO bus, looking for - * devices which can be identified using a vendor/product ID in - * registers 2 and 3. Not all MDIO devices have such registers, but - * PHY devices typically do. Hence this function assumes anything - * found is a PHY, or can be treated as a PHY. Other MDIO devices, - * such as switches, will probably not be found during the scan. - */ -static struct phy_device *mdiobus_scan_c45(struct mii_bus *bus, int addr) -{ - return mdiobus_scan(bus, addr, true); -} - -static int mdiobus_scan_bus_c22(struct mii_bus *bus) -{ - int i; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - if ((bus->phy_mask & BIT(i)) == 0) { - struct phy_device *phydev; - - phydev = mdiobus_scan_c22(bus, i); - if (IS_ERR(phydev) && (PTR_ERR(phydev) != -ENODEV)) - return PTR_ERR(phydev); - } - } - return 0; -} - -static int mdiobus_scan_bus_c45(struct mii_bus *bus) -{ - int i; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - if ((bus->phy_mask & BIT(i)) == 0) { - struct phy_device *phydev; - - /* Don't scan C45 if we already have a C22 device */ - if (bus->mdio_map[i]) - continue; - - phydev = mdiobus_scan_c45(bus, i); - if (IS_ERR(phydev) && (PTR_ERR(phydev) != -ENODEV)) - return PTR_ERR(phydev); - } - } - return 0; -} - -/* There are some C22 PHYs which do bad things when where is a C45 - * transaction on the bus, like accepting a read themselves, and - * stomping over the true devices reply, to performing a write to - * themselves which was intended for another device. Now that C22 - * devices have been found, see if any of them are bad for C45, and if we - * should skip the C45 scan. - */ -static bool mdiobus_prevent_c45_scan(struct mii_bus *bus) -{ - int i; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev; - u32 oui; - - phydev = mdiobus_get_phy(bus, i); - if (!phydev) - continue; - oui = phydev->phy_id >> 10; - - if (oui == MICREL_OUI) - return true; - } - return false; -} - -/** - * __mdiobus_register - bring up all the PHYs on a given bus and attach them to bus - * @bus: target mii_bus - * @owner: module containing bus accessor functions - * - * Description: Called by a bus driver to bring up all the PHYs - * on a given bus, and attach them to the bus. Drivers should use - * mdiobus_register() rather than __mdiobus_register() unless they - * need to pass a specific owner module. MDIO devices which are not - * PHYs will not be brought up by this function. They are expected - * to be explicitly listed in DT and instantiated by of_mdiobus_register(). - * - * Returns 0 on success or < 0 on error. - */ -int __mdiobus_register(struct mii_bus *bus, struct module *owner) -{ - struct mdio_device *mdiodev; - struct gpio_desc *gpiod; - bool prevent_c45_scan; - int i, err; - - if (!bus || !bus->name) - return -EINVAL; - - /* An access method always needs both read and write operations */ - if (!!bus->read != !!bus->write || !!bus->read_c45 != !!bus->write_c45) - return -EINVAL; - - /* At least one method is mandatory */ - if (!bus->read && !bus->read_c45) - return -EINVAL; - - if (bus->parent && bus->parent->of_node) - bus->parent->of_node->fwnode.flags |= - FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD; - - WARN(bus->state != MDIOBUS_ALLOCATED && - bus->state != MDIOBUS_UNREGISTERED, - "%s: not in ALLOCATED or UNREGISTERED state\n", bus->id); - - bus->owner = owner; - bus->dev.parent = bus->parent; - bus->dev.class = &mdio_bus_class; - bus->dev.groups = NULL; - dev_set_name(&bus->dev, "%s", bus->id); - - /* If the bus state is allocated, we're registering a fresh bus - * that may have a fwnode associated with it. Grab a reference - * to the fwnode. This will be dropped when the bus is released. - * If the bus was set to unregistered, it means that the bus was - * previously registered, and we've already grabbed a reference. - */ - if (bus->state == MDIOBUS_ALLOCATED) - fwnode_handle_get(dev_fwnode(&bus->dev)); - - /* We need to set state to MDIOBUS_UNREGISTERED to correctly release - * the device in mdiobus_free() - * - * State will be updated later in this function in case of success - */ - bus->state = MDIOBUS_UNREGISTERED; - - err = device_register(&bus->dev); - if (err) { - pr_err("mii_bus %s failed to register\n", bus->id); - return -EINVAL; - } - - mutex_init(&bus->mdio_lock); - mutex_init(&bus->shared_lock); - - /* assert bus level PHY GPIO reset */ - gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(gpiod)) { - err = dev_err_probe(&bus->dev, PTR_ERR(gpiod), - "mii_bus %s couldn't get reset GPIO\n", - bus->id); - device_del(&bus->dev); - return err; - } else if (gpiod) { - bus->reset_gpiod = gpiod; - fsleep(bus->reset_delay_us); - gpiod_set_value_cansleep(gpiod, 0); - if (bus->reset_post_delay_us > 0) - fsleep(bus->reset_post_delay_us); - } - - if (bus->reset) { - err = bus->reset(bus); - if (err) - goto error_reset_gpiod; - } - - if (bus->read) { - err = mdiobus_scan_bus_c22(bus); - if (err) - goto error; - } - - prevent_c45_scan = mdiobus_prevent_c45_scan(bus); - - if (!prevent_c45_scan && bus->read_c45) { - err = mdiobus_scan_bus_c45(bus); - if (err) - goto error; - } - - mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device); - - bus->state = MDIOBUS_REGISTERED; - dev_dbg(&bus->dev, "probed\n"); - return 0; - -error: - for (i = 0; i < PHY_MAX_ADDR; i++) { - mdiodev = bus->mdio_map[i]; - if (!mdiodev) - continue; - - mdiodev->device_remove(mdiodev); - mdiodev->device_free(mdiodev); - } -error_reset_gpiod: - /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); - - device_del(&bus->dev); - return err; -} -EXPORT_SYMBOL(__mdiobus_register); - -void mdiobus_unregister(struct mii_bus *bus) -{ - struct mdio_device *mdiodev; - int i; - - if (WARN_ON_ONCE(bus->state != MDIOBUS_REGISTERED)) - return; - bus->state = MDIOBUS_UNREGISTERED; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - mdiodev = bus->mdio_map[i]; - if (!mdiodev) - continue; - - if (mdiodev->reset_gpio) - gpiod_put(mdiodev->reset_gpio); - - mdiodev->device_remove(mdiodev); - mdiodev->device_free(mdiodev); - } - - /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); - - device_del(&bus->dev); -} -EXPORT_SYMBOL(mdiobus_unregister); - -/** - * mdiobus_free - free a struct mii_bus - * @bus: mii_bus to free - * - * This function releases the reference to the underlying device - * object in the mii_bus. If this is the last reference, the mii_bus - * will be freed. - */ -void mdiobus_free(struct mii_bus *bus) -{ - /* For compatibility with error handling in drivers. */ - if (bus->state == MDIOBUS_ALLOCATED) { - kfree(bus); - return; - } - - WARN(bus->state != MDIOBUS_UNREGISTERED, - "%s: not in UNREGISTERED state\n", bus->id); - bus->state = MDIOBUS_RELEASED; - - put_device(&bus->dev); -} -EXPORT_SYMBOL(mdiobus_free); - static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret) { preempt_disable(); diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c new file mode 100644 index 000000000000..65850e36284d --- /dev/null +++ b/drivers/net/phy/mdio_bus_provider.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* MDIO Bus provider interface + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/gpio/consumer.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/micrel_phy.h> +#include <linux/mii.h> +#include <linux/mm.h> +#include <linux/netdevice.h> +#include <linux/of_device.h> +#include <linux/of_mdio.h> +#include <linux/phy.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/unistd.h> + +#include "mdio-boardinfo.h" + +/** + * mdiobus_alloc_size - allocate a mii_bus structure + * @size: extra amount of memory to allocate for private storage. + * If non-zero, then bus->priv is points to that memory. + * + * Description: called by a bus driver to allocate an mii_bus + * structure to fill in. + */ +struct mii_bus *mdiobus_alloc_size(size_t size) +{ + struct mii_bus *bus; + size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN); + size_t alloc_size; + int i; + + /* If we alloc extra space, it should be aligned */ + if (size) + alloc_size = aligned_size + size; + else + alloc_size = sizeof(*bus); + + bus = kzalloc(alloc_size, GFP_KERNEL); + if (!bus) + return NULL; + + bus->state = MDIOBUS_ALLOCATED; + if (size) + bus->priv = (void *)bus + aligned_size; + + /* Initialise the interrupts to polling and 64-bit seqcounts */ + for (i = 0; i < PHY_MAX_ADDR; i++) { + bus->irq[i] = PHY_POLL; + u64_stats_init(&bus->stats[i].syncp); + } + + return bus; +} +EXPORT_SYMBOL(mdiobus_alloc_size); + +#if IS_ENABLED(CONFIG_OF_MDIO) +/* Walk the list of subnodes of a mdio bus and look for a node that + * matches the mdio device's address with its 'reg' property. If + * found, set the of_node pointer for the mdio device. This allows + * auto-probed phy devices to be supplied with information passed in + * via DT. + * If a PHY package is found, PHY is searched also there. + */ +static int of_mdiobus_find_phy(struct device *dev, struct mdio_device *mdiodev, + struct device_node *np) +{ + struct device_node *child; + + for_each_available_child_of_node(np, child) { + int addr; + + if (of_node_name_eq(child, "ethernet-phy-package")) { + /* Validate PHY package reg presence */ + if (!of_property_present(child, "reg")) { + of_node_put(child); + return -EINVAL; + } + + if (!of_mdiobus_find_phy(dev, mdiodev, child)) { + /* The refcount for the PHY package will be + * incremented later when PHY join the Package. + */ + of_node_put(child); + return 0; + } + + continue; + } + + addr = of_mdio_parse_addr(dev, child); + if (addr < 0) + continue; + + if (addr == mdiodev->addr) { + device_set_node(dev, of_fwnode_handle(child)); + /* The refcount on "child" is passed to the mdio + * device. Do _not_ use of_node_put(child) here. + */ + return 0; + } + } + + return -ENODEV; +} + +static void of_mdiobus_link_mdiodev(struct mii_bus *bus, + struct mdio_device *mdiodev) +{ + struct device *dev = &mdiodev->dev; + + if (dev->of_node || !bus->dev.of_node) + return; + + of_mdiobus_find_phy(dev, mdiodev, bus->dev.of_node); +} +#endif + +/** + * mdiobus_create_device - create a full MDIO device given + * a mdio_board_info structure + * @bus: MDIO bus to create the devices on + * @bi: mdio_board_info structure describing the devices + * + * Returns 0 on success or < 0 on error. + */ +static int mdiobus_create_device(struct mii_bus *bus, + struct mdio_board_info *bi) +{ + struct mdio_device *mdiodev; + int ret = 0; + + mdiodev = mdio_device_create(bus, bi->mdio_addr); + if (IS_ERR(mdiodev)) + return -ENODEV; + + strscpy(mdiodev->modalias, bi->modalias, + sizeof(mdiodev->modalias)); + mdiodev->bus_match = mdio_device_bus_match; + mdiodev->dev.platform_data = (void *)bi->platform_data; + + ret = mdio_device_register(mdiodev); + if (ret) + mdio_device_free(mdiodev); + + return ret; +} + +static struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr, bool c45) +{ + struct phy_device *phydev = ERR_PTR(-ENODEV); + struct fwnode_handle *fwnode; + char node_name[16]; + int err; + + phydev = get_phy_device(bus, addr, c45); + if (IS_ERR(phydev)) + return phydev; + +#if IS_ENABLED(CONFIG_OF_MDIO) + /* For DT, see if the auto-probed phy has a corresponding child + * in the bus node, and set the of_node pointer in this case. + */ + of_mdiobus_link_mdiodev(bus, &phydev->mdio); +#endif + + /* Search for a swnode for the phy in the swnode hierarchy of the bus. + * If there is no swnode for the phy provided, just ignore it. + */ + if (dev_fwnode(&bus->dev) && !dev_fwnode(&phydev->mdio.dev)) { + snprintf(node_name, sizeof(node_name), "ethernet-phy@%d", + addr); + fwnode = fwnode_get_named_child_node(dev_fwnode(&bus->dev), + node_name); + if (fwnode) + device_set_node(&phydev->mdio.dev, fwnode); + } + + err = phy_device_register(phydev); + if (err) { + phy_device_free(phydev); + return ERR_PTR(-ENODEV); + } + + return phydev; +} + +/** + * mdiobus_scan_c22 - scan one address on a bus for C22 MDIO devices. + * @bus: mii_bus to scan + * @addr: address on bus to scan + * + * This function scans one address on the MDIO bus, looking for + * devices which can be identified using a vendor/product ID in + * registers 2 and 3. Not all MDIO devices have such registers, but + * PHY devices typically do. Hence this function assumes anything + * found is a PHY, or can be treated as a PHY. Other MDIO devices, + * such as switches, will probably not be found during the scan. + */ +struct phy_device *mdiobus_scan_c22(struct mii_bus *bus, int addr) +{ + return mdiobus_scan(bus, addr, false); +} +EXPORT_SYMBOL(mdiobus_scan_c22); + +/** + * mdiobus_scan_c45 - scan one address on a bus for C45 MDIO devices. + * @bus: mii_bus to scan + * @addr: address on bus to scan + * + * This function scans one address on the MDIO bus, looking for + * devices which can be identified using a vendor/product ID in + * registers 2 and 3. Not all MDIO devices have such registers, but + * PHY devices typically do. Hence this function assumes anything + * found is a PHY, or can be treated as a PHY. Other MDIO devices, + * such as switches, will probably not be found during the scan. + */ +static struct phy_device *mdiobus_scan_c45(struct mii_bus *bus, int addr) +{ + return mdiobus_scan(bus, addr, true); +} + +static int mdiobus_scan_bus_c22(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if ((bus->phy_mask & BIT(i)) == 0) { + struct phy_device *phydev; + + phydev = mdiobus_scan_c22(bus, i); + if (IS_ERR(phydev) && (PTR_ERR(phydev) != -ENODEV)) + return PTR_ERR(phydev); + } + } + return 0; +} + +static int mdiobus_scan_bus_c45(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if ((bus->phy_mask & BIT(i)) == 0) { + struct phy_device *phydev; + + /* Don't scan C45 if we already have a C22 device */ + if (bus->mdio_map[i]) + continue; + + phydev = mdiobus_scan_c45(bus, i); + if (IS_ERR(phydev) && (PTR_ERR(phydev) != -ENODEV)) + return PTR_ERR(phydev); + } + } + return 0; +} + +/* There are some C22 PHYs which do bad things when where is a C45 + * transaction on the bus, like accepting a read themselves, and + * stomping over the true devices reply, to performing a write to + * themselves which was intended for another device. Now that C22 + * devices have been found, see if any of them are bad for C45, and if we + * should skip the C45 scan. + */ +static bool mdiobus_prevent_c45_scan(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + struct phy_device *phydev; + u32 oui; + + phydev = mdiobus_get_phy(bus, i); + if (!phydev) + continue; + oui = phydev->phy_id >> 10; + + if (oui == MICREL_OUI) + return true; + } + return false; +} + +/** + * __mdiobus_register - bring up all the PHYs on a given bus and attach them to bus + * @bus: target mii_bus + * @owner: module containing bus accessor functions + * + * Description: Called by a bus driver to bring up all the PHYs + * on a given bus, and attach them to the bus. Drivers should use + * mdiobus_register() rather than __mdiobus_register() unless they + * need to pass a specific owner module. MDIO devices which are not + * PHYs will not be brought up by this function. They are expected + * to be explicitly listed in DT and instantiated by of_mdiobus_register(). + * + * Returns 0 on success or < 0 on error. + */ +int __mdiobus_register(struct mii_bus *bus, struct module *owner) +{ + struct mdio_device *mdiodev; + struct gpio_desc *gpiod; + bool prevent_c45_scan; + int i, err; + + if (!bus || !bus->name) + return -EINVAL; + + /* An access method always needs both read and write operations */ + if (!!bus->read != !!bus->write || !!bus->read_c45 != !!bus->write_c45) + return -EINVAL; + + /* At least one method is mandatory */ + if (!bus->read && !bus->read_c45) + return -EINVAL; + + if (bus->parent && bus->parent->of_node) + bus->parent->of_node->fwnode.flags |= + FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD; + + WARN(bus->state != MDIOBUS_ALLOCATED && + bus->state != MDIOBUS_UNREGISTERED, + "%s: not in ALLOCATED or UNREGISTERED state\n", bus->id); + + bus->owner = owner; + bus->dev.parent = bus->parent; + bus->dev.class = &mdio_bus_class; + bus->dev.groups = NULL; + dev_set_name(&bus->dev, "%s", bus->id); + + /* If the bus state is allocated, we're registering a fresh bus + * that may have a fwnode associated with it. Grab a reference + * to the fwnode. This will be dropped when the bus is released. + * If the bus was set to unregistered, it means that the bus was + * previously registered, and we've already grabbed a reference. + */ + if (bus->state == MDIOBUS_ALLOCATED) + fwnode_handle_get(dev_fwnode(&bus->dev)); + + /* We need to set state to MDIOBUS_UNREGISTERED to correctly release + * the device in mdiobus_free() + * + * State will be updated later in this function in case of success + */ + bus->state = MDIOBUS_UNREGISTERED; + + err = device_register(&bus->dev); + if (err) { + pr_err("mii_bus %s failed to register\n", bus->id); + return -EINVAL; + } + + mutex_init(&bus->mdio_lock); + mutex_init(&bus->shared_lock); + + /* assert bus level PHY GPIO reset */ + gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(gpiod)) { + err = dev_err_probe(&bus->dev, PTR_ERR(gpiod), + "mii_bus %s couldn't get reset GPIO\n", + bus->id); + device_del(&bus->dev); + return err; + } else if (gpiod) { + bus->reset_gpiod = gpiod; + fsleep(bus->reset_delay_us); + gpiod_set_value_cansleep(gpiod, 0); + if (bus->reset_post_delay_us > 0) + fsleep(bus->reset_post_delay_us); + } + + if (bus->reset) { + err = bus->reset(bus); + if (err) + goto error_reset_gpiod; + } + + if (bus->read) { + err = mdiobus_scan_bus_c22(bus); + if (err) + goto error; + } + + prevent_c45_scan = mdiobus_prevent_c45_scan(bus); + + if (!prevent_c45_scan && bus->read_c45) { + err = mdiobus_scan_bus_c45(bus); + if (err) + goto error; + } + + mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device); + + bus->state = MDIOBUS_REGISTERED; + dev_dbg(&bus->dev, "probed\n"); + return 0; + +error: + for (i = 0; i < PHY_MAX_ADDR; i++) { + mdiodev = bus->mdio_map[i]; + if (!mdiodev) + continue; + + mdiodev->device_remove(mdiodev); + mdiodev->device_free(mdiodev); + } +error_reset_gpiod: + /* Put PHYs in RESET to save power */ + if (bus->reset_gpiod) + gpiod_set_value_cansleep(bus->reset_gpiod, 1); + + device_del(&bus->dev); + return err; +} +EXPORT_SYMBOL(__mdiobus_register); + +void mdiobus_unregister(struct mii_bus *bus) +{ + struct mdio_device *mdiodev; + int i; + + if (WARN_ON_ONCE(bus->state != MDIOBUS_REGISTERED)) + return; + bus->state = MDIOBUS_UNREGISTERED; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + mdiodev = bus->mdio_map[i]; + if (!mdiodev) + continue; + + if (mdiodev->reset_gpio) + gpiod_put(mdiodev->reset_gpio); + + mdiodev->device_remove(mdiodev); + mdiodev->device_free(mdiodev); + } + + /* Put PHYs in RESET to save power */ + if (bus->reset_gpiod) + gpiod_set_value_cansleep(bus->reset_gpiod, 1); + + device_del(&bus->dev); +} +EXPORT_SYMBOL(mdiobus_unregister); + +/** + * mdiobus_free - free a struct mii_bus + * @bus: mii_bus to free + * + * This function releases the reference to the underlying device + * object in the mii_bus. If this is the last reference, the mii_bus + * will be freed. + */ +void mdiobus_free(struct mii_bus *bus) +{ + /* For compatibility with error handling in drivers. */ + if (bus->state == MDIOBUS_ALLOCATED) { + kfree(bus); + return; + } + + WARN(bus->state != MDIOBUS_UNREGISTERED, + "%s: not in UNREGISTERED state\n", bus->id); + bus->state = MDIOBUS_RELEASED; + + put_device(&bus->dev); +} +EXPORT_SYMBOL(mdiobus_free); diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig index 2a8ac5aed0f8..4308002bb82c 100644 --- a/drivers/net/phy/mediatek/Kconfig +++ b/drivers/net/phy/mediatek/Kconfig @@ -15,8 +15,9 @@ config MEDIATEK_GE_PHY config MEDIATEK_GE_SOC_PHY tristate "MediaTek SoC Ethernet PHYs" - depends on (ARM64 && ARCH_MEDIATEK) || COMPILE_TEST - depends on NVMEM_MTK_EFUSE + depends on ARM64 || COMPILE_TEST + depends on ARCH_AIROHA || (ARCH_MEDIATEK && NVMEM_MTK_EFUSE) || \ + COMPILE_TEST select MTK_NET_PHYLIB help Supports MediaTek SoC built-in Gigabit Ethernet PHYs. diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index 175cf5239bba..cd09684780a4 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -11,8 +11,11 @@ #include "../phylib.h" #include "mtk.h" +#define MTK_PHY_MAX_LEDS 2 + #define MTK_GPHY_ID_MT7981 0x03a29461 #define MTK_GPHY_ID_MT7988 0x03a29481 +#define MTK_GPHY_ID_AN7581 0x03a294c1 #define MTK_EXT_PAGE_ACCESS 0x1f #define MTK_PHY_PAGE_STANDARD 0x0000 @@ -1406,6 +1409,52 @@ static int mt7981_phy_probe(struct phy_device *phydev) return mt798x_phy_calibration(phydev); } +static int an7581_phy_probe(struct phy_device *phydev) +{ + struct mtk_socphy_priv *priv; + struct pinctrl *pinctrl; + + /* Toggle pinctrl to enable PHY LED */ + pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "gbe-led"); + if (IS_ERR(pinctrl)) + dev_err(&phydev->mdio.bus->dev, + "Failed to setup PHY LED pinctrl\n"); + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + + return 0; +} + +static int an7581_phy_led_polarity_set(struct phy_device *phydev, int index, + unsigned long modes) +{ + u16 val = 0; + u32 mode; + + if (index >= MTK_PHY_MAX_LEDS) + return -EINVAL; + + for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) { + switch (mode) { + case PHY_LED_ACTIVE_LOW: + val = MTK_PHY_LED_ON_POLARITY; + break; + case PHY_LED_ACTIVE_HIGH: + break; + default: + return -EINVAL; + } + } + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, index ? + MTK_PHY_LED1_ON_CTRL : MTK_PHY_LED0_ON_CTRL, + MTK_PHY_LED_ON_POLARITY, val); +} + static struct phy_driver mtk_socphy_driver[] = { { PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7981), @@ -1441,6 +1490,17 @@ static struct phy_driver mtk_socphy_driver[] = { .led_hw_control_set = mt798x_phy_led_hw_control_set, .led_hw_control_get = mt798x_phy_led_hw_control_get, }, + { + PHY_ID_MATCH_EXACT(MTK_GPHY_ID_AN7581), + .name = "Airoha AN7581 PHY", + .probe = an7581_phy_probe, + .led_blink_set = mt798x_phy_led_blink_set, + .led_brightness_set = mt798x_phy_led_brightness_set, + .led_hw_is_supported = mt798x_phy_led_hw_is_supported, + .led_hw_control_set = mt798x_phy_led_hw_control_set, + .led_hw_control_get = mt798x_phy_led_hw_control_get, + .led_polarity_set = an7581_phy_led_polarity_set, + }, }; module_phy_driver(mtk_socphy_driver); @@ -1448,6 +1508,7 @@ module_phy_driver(mtk_socphy_driver); static const struct mdio_device_id __maybe_unused mtk_socphy_tbl[] = { { PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7981) }, { PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7988) }, + { PHY_ID_MATCH_EXACT(MTK_GPHY_ID_AN7581) }, { } }; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index e2c6569d8c45..c2e5be404f07 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -3230,10 +3230,6 @@ static int lan8814_ptp_perout(struct ptp_clock_info *ptpci, int pulse_width; int pin, event; - /* Reject requests with unsupported flags */ - if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE) - return -EOPNOTSUPP; - mutex_lock(&shared->shared_lock); event = rq->perout.index; pin = ptp_find_pin(shared->ptp_clock, PTP_PF_PEROUT, event); @@ -3400,11 +3396,6 @@ static int lan8814_ptp_extts(struct ptp_clock_info *ptpci, struct phy_device *phydev = shared->phydev; int pin; - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_EXTTS_EDGES | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - pin = ptp_find_pin(shared->ptp_clock, PTP_PF_EXTTS, rq->extts.index); if (pin == -1 || pin != LAN8814_PTP_EXTTS_NUM) @@ -3911,6 +3902,10 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) shared->ptp_clock_info.n_ext_ts = LAN8814_PTP_EXTTS_NUM; shared->ptp_clock_info.n_pins = LAN8814_PTP_GPIO_NUM; shared->ptp_clock_info.pps = 0; + shared->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + shared->ptp_clock_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; shared->ptp_clock_info.pin_config = shared->pin_config; shared->ptp_clock_info.n_per_out = LAN8814_PTP_PEROUT_NUM; shared->ptp_clock_info.adjfine = lan8814_ptpci_adjfine; @@ -5062,9 +5057,6 @@ static int lan8841_ptp_perout(struct ptp_clock_info *ptp, int pin; int ret; - if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE) - return -EOPNOTSUPP; - pin = ptp_find_pin(ptp_priv->ptp_clock, PTP_PF_PEROUT, rq->perout.index); if (pin == -1 || pin >= LAN8841_PTP_GPIO_NUM) return -EINVAL; @@ -5308,6 +5300,7 @@ static struct ptp_clock_info lan8841_ptp_clock_info = { .n_per_out = LAN8841_PTP_GPIO_NUM, .n_ext_ts = LAN8841_PTP_GPIO_NUM, .n_pins = LAN8841_PTP_GPIO_NUM, + .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE, }; #define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER 3 diff --git a/drivers/net/phy/microchip_rds_ptp.c b/drivers/net/phy/microchip_rds_ptp.c index 3e6bf10cdeed..e6514ce04c29 100644 --- a/drivers/net/phy/microchip_rds_ptp.c +++ b/drivers/net/phy/microchip_rds_ptp.c @@ -224,10 +224,6 @@ static int mchp_rds_ptp_perout(struct ptp_clock_info *ptpci, struct phy_device *phydev = clock->phydev; int ret, event_pin, pulsewidth; - /* Reject requests with unsupported flags */ - if (perout->flags & ~PTP_PEROUT_DUTY_CYCLE) - return -EOPNOTSUPP; - event_pin = ptp_find_pin(clock->ptp_clock, PTP_PF_PEROUT, perout->index); if (event_pin != clock->event_pin) @@ -1259,6 +1255,7 @@ struct mchp_rds_ptp_clock *mchp_rds_ptp_probe(struct phy_device *phydev, u8 mmd, clock->caps.pps = 0; clock->caps.n_pins = MCHP_RDS_PTP_N_PIN; clock->caps.n_per_out = MCHP_RDS_PTP_N_PEROUT; + clock->caps.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; clock->caps.pin_config = clock->pin_config; clock->caps.adjfine = mchp_rds_ptp_ltc_adjfine; clock->caps.adjtime = mchp_rds_ptp_ltc_adjtime; diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 250a018d5546..f11dd32494c3 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -763,9 +763,6 @@ static int nxp_c45_perout_enable(struct nxp_c45_phy *priv, struct phy_device *phydev = priv->phydev; int pin; - if (perout->flags & ~PTP_PEROUT_PHASE) - return -EOPNOTSUPP; - pin = ptp_find_pin(priv->ptp_clock, PTP_PF_PEROUT, perout->index); if (pin < 0) return pin; @@ -861,12 +858,6 @@ static int nxp_c45_extts_enable(struct nxp_c45_phy *priv, const struct nxp_c45_phy_data *data = nxp_c45_get_data(priv->phydev); int pin; - if (extts->flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Sampling on both edges is not supported */ if ((extts->flags & PTP_RISING_EDGE) && (extts->flags & PTP_FALLING_EDGE) && @@ -962,6 +953,10 @@ static int nxp_c45_init_ptp_clock(struct nxp_c45_phy *priv) .n_pins = ARRAY_SIZE(nxp_c45_ptp_pins), .n_ext_ts = 1, .n_per_out = 1, + .supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS, + .supported_perout_flags = PTP_PEROUT_PHASE, }; priv->ptp_clock = ptp_clock_register(&priv->caps, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index cc1bfd22fb81..2eb735e68dd8 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2975,6 +2975,21 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, } EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain); +/** + * phy_get_mac_termination - stores MAC termination in @val + * @phydev: phy_device struct + * @dev: pointer to the devices device struct + * @val: MAC termination + * + * Returns: 0 on success, < 0 on failure + */ +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val) +{ + return phy_get_u32_property(dev, "mac-termination-ohms", val); +} +EXPORT_SYMBOL_GPL(phy_get_mac_termination); + static int phy_led_set_brightness(struct led_classdev *led_cdev, enum led_brightness value) { @@ -3236,18 +3251,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) EXPORT_SYMBOL(fwnode_phy_find_device); /** - * device_phy_find_device - For the given device, get the phy_device - * @dev: Pointer to the given device - * - * Refer return conditions of fwnode_phy_find_device(). - */ -struct phy_device *device_phy_find_device(struct device *dev) -{ - return fwnode_phy_find_device(dev_fwnode(dev)); -} -EXPORT_SYMBOL_GPL(device_phy_find_device); - -/** * fwnode_get_phy_node - Get the phy_node using the named reference. * @fwnode: Pointer to fwnode from which phy_node has to be obtained. * @@ -3262,12 +3265,12 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode) /* Only phy-handle is used for ACPI */ phy_node = fwnode_find_reference(fwnode, "phy-handle", 0); - if (is_acpi_node(fwnode) || !IS_ERR(phy_node)) + if (!IS_ERR(phy_node) || is_acpi_node(fwnode)) return phy_node; phy_node = fwnode_find_reference(fwnode, "phy", 0); - if (IS_ERR(phy_node)) - phy_node = fwnode_find_reference(fwnode, "phy-device", 0); - return phy_node; + if (!IS_ERR(phy_node)) + return phy_node; + return fwnode_find_reference(fwnode, "phy-device", 0); } EXPORT_SYMBOL_GPL(fwnode_get_phy_node); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 1bdd5d8bb5b0..0faa3d97e06b 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -24,13 +24,6 @@ #include "sfp.h" #include "swphy.h" -#define SUPPORTED_INTERFACES \ - (SUPPORTED_TP | SUPPORTED_MII | SUPPORTED_FIBRE | \ - SUPPORTED_BNC | SUPPORTED_AUI | SUPPORTED_Backplane) -#define ADVERTISED_INTERFACES \ - (ADVERTISED_TP | ADVERTISED_MII | ADVERTISED_FIBRE | \ - ADVERTISED_BNC | ADVERTISED_AUI | ADVERTISED_Backplane) - enum { PHYLINK_DISABLE_STOPPED, PHYLINK_DISABLE_LINK, diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 893c82479671..301fbe141b9b 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -10,6 +10,7 @@ #include <linux/bitops.h> #include <linux/of.h> #include <linux/phy.h> +#include <linux/netdevice.h> #include <linux/module.h> #include <linux/delay.h> #include <linux/clk.h> @@ -17,9 +18,15 @@ #include "realtek.h" -#define RTL821x_PHYSR 0x11 -#define RTL821x_PHYSR_DUPLEX BIT(13) -#define RTL821x_PHYSR_SPEED GENMASK(15, 14) +#define RTL8201F_IER 0x13 + +#define RTL8201F_ISR 0x1e +#define RTL8201F_ISR_ANERR BIT(15) +#define RTL8201F_ISR_DUPLEX BIT(13) +#define RTL8201F_ISR_LINK BIT(11) +#define RTL8201F_ISR_MASK (RTL8201F_ISR_ANERR | \ + RTL8201F_ISR_DUPLEX | \ + RTL8201F_ISR_LINK) #define RTL821x_INER 0x12 #define RTL8211B_INER_INIT 0x6400 @@ -29,15 +36,48 @@ #define RTL821x_INSR 0x13 #define RTL821x_EXT_PAGE_SELECT 0x1e + #define RTL821x_PAGE_SELECT 0x1f +#define RTL821x_SET_EXT_PAGE 0x07 + +/* RTL8211E extension page 44/0x2c */ +#define RTL8211E_LEDCR_EXT_PAGE 0x2c +#define RTL8211E_LEDCR1 0x1a +#define RTL8211E_LEDCR1_ACT_TXRX BIT(4) +#define RTL8211E_LEDCR1_MASK BIT(4) +#define RTL8211E_LEDCR1_SHIFT 1 + +#define RTL8211E_LEDCR2 0x1c +#define RTL8211E_LEDCR2_LINK_1000 BIT(2) +#define RTL8211E_LEDCR2_LINK_100 BIT(1) +#define RTL8211E_LEDCR2_LINK_10 BIT(0) +#define RTL8211E_LEDCR2_MASK GENMASK(2, 0) +#define RTL8211E_LEDCR2_SHIFT 4 + +/* RTL8211E extension page 164/0xa4 */ +#define RTL8211E_RGMII_EXT_PAGE 0xa4 +#define RTL8211E_RGMII_DELAY 0x1c +#define RTL8211E_CTRL_DELAY BIT(13) +#define RTL8211E_TX_DELAY BIT(12) +#define RTL8211E_RX_DELAY BIT(11) +#define RTL8211E_DELAY_MASK GENMASK(13, 11) +/* RTL8211F PHY configuration */ +#define RTL8211F_PHYCR_PAGE 0xa43 #define RTL8211F_PHYCR1 0x18 +#define RTL8211F_ALDPS_PLL_OFF BIT(1) +#define RTL8211F_ALDPS_ENABLE BIT(2) +#define RTL8211F_ALDPS_XTAL_OFF BIT(12) + #define RTL8211F_PHYCR2 0x19 #define RTL8211F_CLKOUT_EN BIT(0) #define RTL8211F_PHYCR2_PHY_EEE_ENABLE BIT(5) +#define RTL8211F_INSR_PAGE 0xa43 #define RTL8211F_INSR 0x1d +/* RTL8211F LED configuration */ +#define RTL8211F_LEDCR_PAGE 0xd04 #define RTL8211F_LEDCR 0x10 #define RTL8211F_LEDCR_MODE BIT(15) #define RTL8211F_LEDCR_ACT_TXRX BIT(4) @@ -47,25 +87,32 @@ #define RTL8211F_LEDCR_MASK GENMASK(4, 0) #define RTL8211F_LEDCR_SHIFT 5 +/* RTL8211F RGMII configuration */ +#define RTL8211F_RGMII_PAGE 0xd08 + +#define RTL8211F_TXCR 0x11 #define RTL8211F_TX_DELAY BIT(8) + +#define RTL8211F_RXCR 0x15 #define RTL8211F_RX_DELAY BIT(3) -#define RTL8211F_ALDPS_PLL_OFF BIT(1) -#define RTL8211F_ALDPS_ENABLE BIT(2) -#define RTL8211F_ALDPS_XTAL_OFF BIT(12) +/* RTL8211F WOL interrupt configuration */ +#define RTL8211F_INTBCR_PAGE 0xd40 +#define RTL8211F_INTBCR 0x16 +#define RTL8211F_INTBCR_INTB_PMEB BIT(5) -#define RTL8211E_CTRL_DELAY BIT(13) -#define RTL8211E_TX_DELAY BIT(12) -#define RTL8211E_RX_DELAY BIT(11) +/* RTL8211F WOL settings */ +#define RTL8211F_WOL_SETTINGS_PAGE 0xd8a +#define RTL8211F_WOL_SETTINGS_EVENTS 16 +#define RTL8211F_WOL_EVENT_MAGIC BIT(12) +#define RTL8211F_WOL_SETTINGS_STATUS 17 +#define RTL8211F_WOL_STATUS_RESET (BIT(15) | 0x1fff) -#define RTL8201F_ISR 0x1e -#define RTL8201F_ISR_ANERR BIT(15) -#define RTL8201F_ISR_DUPLEX BIT(13) -#define RTL8201F_ISR_LINK BIT(11) -#define RTL8201F_ISR_MASK (RTL8201F_ISR_ANERR | \ - RTL8201F_ISR_DUPLEX | \ - RTL8201F_ISR_LINK) -#define RTL8201F_IER 0x13 +/* RTL8211F Unique phyiscal and multicast address (WOL) */ +#define RTL8211F_PHYSICAL_ADDR_PAGE 0xd8c +#define RTL8211F_PHYSICAL_ADDR_WORD0 16 +#define RTL8211F_PHYSICAL_ADDR_WORD1 17 +#define RTL8211F_PHYSICAL_ADDR_WORD2 18 #define RTL822X_VND1_SERDES_OPTION 0x697a #define RTL822X_VND1_SERDES_OPTION_MODE_MASK GENMASK(5, 0) @@ -112,7 +159,8 @@ #define RTL_8221B_VN_CG 0x001cc84a #define RTL_8251B 0x001cc862 -#define RTL8211F_LED_COUNT 3 +/* RTL8211E and RTL8211F support up to three LEDs */ +#define RTL8211x_LED_COUNT 3 MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); @@ -123,6 +171,7 @@ struct rtl821x_priv { u16 phycr2; bool has_phycr2; struct clk *clk; + u32 saved_wolopts; }; static int rtl821x_read_page(struct phy_device *phydev) @@ -135,6 +184,36 @@ static int rtl821x_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, RTL821x_PAGE_SELECT, page); } +static int rtl821x_read_ext_page(struct phy_device *phydev, u16 ext_page, + u32 regnum) +{ + int oldpage, ret = 0; + + oldpage = phy_select_page(phydev, RTL821x_SET_EXT_PAGE); + if (oldpage >= 0) { + ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, ext_page); + if (ret == 0) + ret = __phy_read(phydev, regnum); + } + + return phy_restore_page(phydev, oldpage, ret); +} + +static int rtl821x_modify_ext_page(struct phy_device *phydev, u16 ext_page, + u32 regnum, u16 mask, u16 set) +{ + int oldpage, ret = 0; + + oldpage = phy_select_page(phydev, RTL821x_SET_EXT_PAGE); + if (oldpage >= 0) { + ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, ext_page); + if (ret == 0) + ret = __phy_modify(phydev, regnum, mask, set); + } + + return phy_restore_page(phydev, oldpage, ret); +} + static int rtl821x_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -151,7 +230,7 @@ static int rtl821x_probe(struct phy_device *phydev) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to get phy clock\n"); - ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1); + ret = phy_read_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR1); if (ret < 0) return ret; @@ -161,7 +240,7 @@ static int rtl821x_probe(struct phy_device *phydev) priv->has_phycr2 = !(phy_id == RTL_8211FVD_PHYID); if (priv->has_phycr2) { - ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2); + ret = phy_read_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2); if (ret < 0) return ret; @@ -197,7 +276,7 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) { int err; - err = phy_read_paged(phydev, 0xa43, RTL8211F_INSR); + err = phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR); return (err < 0) ? err : 0; } @@ -340,7 +419,7 @@ static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev) { int irq_status; - irq_status = phy_read_paged(phydev, 0xa43, RTL8211F_INSR); + irq_status = phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR); if (irq_status < 0) { phy_error(phydev); return IRQ_NONE; @@ -354,6 +433,53 @@ static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } +static void rtl8211f_get_wol(struct phy_device *dev, struct ethtool_wolinfo *wol) +{ + wol->supported = WAKE_MAGIC; + if (phy_read_paged(dev, RTL8211F_WOL_SETTINGS_PAGE, RTL8211F_WOL_SETTINGS_EVENTS) + & RTL8211F_WOL_EVENT_MAGIC) + wol->wolopts = WAKE_MAGIC; +} + +static int rtl8211f_set_wol(struct phy_device *dev, struct ethtool_wolinfo *wol) +{ + const u8 *mac_addr = dev->attached_dev->dev_addr; + int oldpage; + + oldpage = phy_save_page(dev); + if (oldpage < 0) + goto err; + + if (wol->wolopts & WAKE_MAGIC) { + /* Store the device address for the magic packet */ + rtl821x_write_page(dev, RTL8211F_PHYSICAL_ADDR_PAGE); + __phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD0, mac_addr[1] << 8 | (mac_addr[0])); + __phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD1, mac_addr[3] << 8 | (mac_addr[2])); + __phy_write(dev, RTL8211F_PHYSICAL_ADDR_WORD2, mac_addr[5] << 8 | (mac_addr[4])); + + /* Enable magic packet matching and reset WOL status */ + rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE); + __phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, RTL8211F_WOL_EVENT_MAGIC); + __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET); + + /* Enable the WOL interrupt */ + rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE); + __phy_set_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB); + } else { + /* Disable the WOL interrupt */ + rtl821x_write_page(dev, RTL8211F_INTBCR_PAGE); + __phy_clear_bits(dev, RTL8211F_INTBCR, RTL8211F_INTBCR_INTB_PMEB); + + /* Disable magic packet matching and reset WOL status */ + rtl821x_write_page(dev, RTL8211F_WOL_SETTINGS_PAGE); + __phy_write(dev, RTL8211F_WOL_SETTINGS_EVENTS, 0); + __phy_write(dev, RTL8211F_WOL_SETTINGS_STATUS, RTL8211F_WOL_STATUS_RESET); + } + +err: + return phy_restore_page(dev, oldpage, 0); +} + static int rtl8211_config_aneg(struct phy_device *phydev) { int ret; @@ -390,7 +516,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) u16 val_txdly, val_rxdly; int ret; - ret = phy_modify_paged_changed(phydev, 0xa43, RTL8211F_PHYCR1, + ret = phy_modify_paged_changed(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR1, RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF, priv->phycr1); if (ret < 0) { @@ -424,7 +550,8 @@ static int rtl8211f_config_init(struct phy_device *phydev) return 0; } - ret = phy_modify_paged_changed(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, + ret = phy_modify_paged_changed(phydev, RTL8211F_RGMII_PAGE, + RTL8211F_TXCR, RTL8211F_TX_DELAY, val_txdly); if (ret < 0) { dev_err(dev, "Failed to update the TX delay register\n"); @@ -439,7 +566,8 @@ static int rtl8211f_config_init(struct phy_device *phydev) str_enabled_disabled(val_txdly)); } - ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY, + ret = phy_modify_paged_changed(phydev, RTL8211F_RGMII_PAGE, + RTL8211F_RXCR, RTL8211F_RX_DELAY, val_rxdly); if (ret < 0) { dev_err(dev, "Failed to update the RX delay register\n"); @@ -455,14 +583,15 @@ static int rtl8211f_config_init(struct phy_device *phydev) } /* Disable PHY-mode EEE so LPI is passed to the MAC */ - ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); if (ret) return ret; if (priv->has_phycr2) { - ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, - RTL8211F_CLKOUT_EN, priv->phycr2); + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, + RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, + priv->phycr2); if (ret < 0) { dev_err(dev, "clkout configuration failed: %pe\n", ERR_PTR(ret)); @@ -509,7 +638,7 @@ static int rtl821x_resume(struct phy_device *phydev) return 0; } -static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, +static int rtl8211x_led_hw_is_supported(struct phy_device *phydev, u8 index, unsigned long rules) { const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) | @@ -528,9 +657,11 @@ static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, * rates and Active indication always at all three 10+100+1000 * link rates. * This code currently uses mode B only. + * + * RTL8211E PHY LED has one mode, which works like RTL8211F mode B. */ - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; /* Filter out any other unsupported triggers. */ @@ -549,7 +680,7 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, { int val; - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); @@ -560,17 +691,17 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, val &= RTL8211F_LEDCR_MASK; if (val & RTL8211F_LEDCR_LINK_10) - set_bit(TRIGGER_NETDEV_LINK_10, rules); + __set_bit(TRIGGER_NETDEV_LINK_10, rules); if (val & RTL8211F_LEDCR_LINK_100) - set_bit(TRIGGER_NETDEV_LINK_100, rules); + __set_bit(TRIGGER_NETDEV_LINK_100, rules); if (val & RTL8211F_LEDCR_LINK_1000) - set_bit(TRIGGER_NETDEV_LINK_1000, rules); + __set_bit(TRIGGER_NETDEV_LINK_1000, rules); if (val & RTL8211F_LEDCR_ACT_TXRX) { - set_bit(TRIGGER_NETDEV_RX, rules); - set_bit(TRIGGER_NETDEV_TX, rules); + __set_bit(TRIGGER_NETDEV_RX, rules); + __set_bit(TRIGGER_NETDEV_TX, rules); } return 0; @@ -582,7 +713,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); u16 reg = 0; - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) @@ -605,9 +736,86 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } +static int rtl8211e_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int ret; + u16 cr1, cr2; + + if (index >= RTL8211x_LED_COUNT) + return -EINVAL; + + ret = rtl821x_read_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR1); + if (ret < 0) + return ret; + + cr1 = ret >> RTL8211E_LEDCR1_SHIFT * index; + if (cr1 & RTL8211E_LEDCR1_ACT_TXRX) { + __set_bit(TRIGGER_NETDEV_RX, rules); + __set_bit(TRIGGER_NETDEV_TX, rules); + } + + ret = rtl821x_read_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR2); + if (ret < 0) + return ret; + + cr2 = ret >> RTL8211E_LEDCR2_SHIFT * index; + if (cr2 & RTL8211E_LEDCR2_LINK_10) + __set_bit(TRIGGER_NETDEV_LINK_10, rules); + + if (cr2 & RTL8211E_LEDCR2_LINK_100) + __set_bit(TRIGGER_NETDEV_LINK_100, rules); + + if (cr2 & RTL8211E_LEDCR2_LINK_1000) + __set_bit(TRIGGER_NETDEV_LINK_1000, rules); + + return ret; +} + +static int rtl8211e_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const u16 cr1mask = + RTL8211E_LEDCR1_MASK << (RTL8211E_LEDCR1_SHIFT * index); + const u16 cr2mask = + RTL8211E_LEDCR2_MASK << (RTL8211E_LEDCR2_SHIFT * index); + u16 cr1 = 0, cr2 = 0; + int ret; + + if (index >= RTL8211x_LED_COUNT) + return -EINVAL; + + if (test_bit(TRIGGER_NETDEV_RX, &rules) || + test_bit(TRIGGER_NETDEV_TX, &rules)) { + cr1 |= RTL8211E_LEDCR1_ACT_TXRX; + } + + cr1 <<= RTL8211E_LEDCR1_SHIFT * index; + ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR1, cr1mask, cr1); + if (ret < 0) + return ret; + + if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_10; + + if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_100; + + if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_1000; + + cr2 <<= RTL8211E_LEDCR2_SHIFT * index; + ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR2, cr2mask, cr2); + + return ret; +} + static int rtl8211e_config_init(struct phy_device *phydev) { - int ret = 0, oldpage; u16 val; /* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */ @@ -637,20 +845,9 @@ static int rtl8211e_config_init(struct phy_device *phydev) * 12 = RX Delay, 11 = TX Delay * 10:0 = Test && debug settings reserved by realtek */ - oldpage = phy_select_page(phydev, 0x7); - if (oldpage < 0) - goto err_restore_page; - - ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, 0xa4); - if (ret) - goto err_restore_page; - - ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY - | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, - val); - -err_restore_page: - return phy_restore_page(phydev, oldpage, ret); + return rtl821x_modify_ext_page(phydev, RTL8211E_RGMII_EXT_PAGE, + RTL8211E_RGMII_DELAY, + RTL8211E_DELAY_MASK, val); } static int rtl8211b_suspend(struct phy_device *phydev) @@ -1392,6 +1589,9 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, + .led_hw_is_supported = rtl8211x_led_hw_is_supported, + .led_hw_control_get = rtl8211e_led_hw_control_get, + .led_hw_control_set = rtl8211e_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(0x001cc916), .name = "RTL8211F Gigabit Ethernet", @@ -1400,12 +1600,14 @@ static struct phy_driver realtek_drvs[] = { .read_status = rtlgen_read_status, .config_intr = &rtl8211f_config_intr, .handle_interrupt = rtl8211f_handle_interrupt, + .set_wol = rtl8211f_set_wol, + .get_wol = rtl8211f_get_wol, .suspend = rtl821x_suspend, .resume = rtl821x_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, - .led_hw_is_supported = rtl8211f_led_hw_is_supported, + .led_hw_is_supported = rtl8211x_led_hw_is_supported, .led_hw_control_get = rtl8211f_led_hw_control_get, .led_hw_control_set = rtl8211f_led_hw_control_set, }, { diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 53463767cc43..def84e87e05b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1131,6 +1131,8 @@ static const struct file_operations ppp_device_fops = { .llseek = noop_llseek, }; +static void ppp_nl_dellink(struct net_device *dev, struct list_head *head); + static __net_init int ppp_init_net(struct net *net) { struct ppp_net *pn = net_generic(net, ppp_net_id); @@ -1146,28 +1148,20 @@ static __net_init int ppp_init_net(struct net *net) return 0; } -static __net_exit void ppp_exit_net(struct net *net) +static __net_exit void ppp_exit_rtnl_net(struct net *net, + struct list_head *dev_to_kill) { struct ppp_net *pn = net_generic(net, ppp_net_id); - struct net_device *dev; - struct net_device *aux; struct ppp *ppp; - LIST_HEAD(list); int id; - rtnl_lock(); - for_each_netdev_safe(net, dev, aux) { - if (dev->netdev_ops == &ppp_netdev_ops) - unregister_netdevice_queue(dev, &list); - } - idr_for_each_entry(&pn->units_idr, ppp, id) - /* Skip devices already unregistered by previous loop */ - if (!net_eq(dev_net(ppp->dev), net)) - unregister_netdevice_queue(ppp->dev, &list); + ppp_nl_dellink(ppp->dev, dev_to_kill); +} - unregister_netdevice_many(&list); - rtnl_unlock(); +static __net_exit void ppp_exit_net(struct net *net) +{ + struct ppp_net *pn = net_generic(net, ppp_net_id); mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); @@ -1177,6 +1171,7 @@ static __net_exit void ppp_exit_net(struct net *net) static struct pernet_operations ppp_net_ops = { .init = ppp_init_net, + .exit_rtnl = ppp_exit_rtnl_net, .exit = ppp_exit_net, .id = &ppp_net_id, .size = sizeof(struct ppp_net), diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3c360d4f0635..370b32fc2588 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -101,9 +101,7 @@ config USB_RTL8152 select MII select PHYLIB select CRC32 - select CRYPTO - select CRYPTO_HASH - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 help This option adds support for Realtek RTL8152 based USB 2.0 10/100 Ethernet adapters and RTL8153 based USB 3.0 10/100/1000 diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 74162190bccc..8531b804021a 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -224,7 +224,6 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm); u16 asix_read_medium_status(struct usbnet *dev, int in_pm); int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm); -void asix_adjust_link(struct net_device *netdev); int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 72ffc89b477a..7fd763917ae2 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -414,28 +414,6 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm) return ret; } -/* set MAC link settings according to information from phylib */ -void asix_adjust_link(struct net_device *netdev) -{ - struct phy_device *phydev = netdev->phydev; - struct usbnet *dev = netdev_priv(netdev); - u16 mode = 0; - - if (phydev->link) { - mode = AX88772_MEDIUM_DEFAULT; - - if (phydev->duplex == DUPLEX_HALF) - mode &= ~AX_MEDIUM_FD; - - if (phydev->speed != SPEED_100) - mode &= ~AX_MEDIUM_PS; - } - - asix_write_medium_mode(dev, mode, 0); - phy_print_status(phydev); - usbnet_link_change(dev, phydev->link, 0); -} - int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) { int ret; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index da24941a6e44..9b0318fb50b5 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -752,7 +752,6 @@ static void ax88772_mac_link_down(struct phylink_config *config, struct usbnet *dev = netdev_priv(to_net_dev(config->dev)); asix_write_medium_mode(dev, 0, 0); - usbnet_link_change(dev, false, false); } static void ax88772_mac_link_up(struct phylink_config *config, @@ -783,7 +782,6 @@ static void ax88772_mac_link_up(struct phylink_config *config, m |= AX_MEDIUM_RFC; asix_write_medium_mode(dev, m, 0); - usbnet_link_change(dev, true, false); } static const struct phylink_mac_ops ax88772_phylink_mac_ops = { @@ -1350,10 +1348,9 @@ static const struct driver_info ax88772_info = { .description = "ASIX AX88772 USB 2.0 Ethernet", .bind = ax88772_bind, .unbind = ax88772_unbind, - .status = asix_status, .reset = ax88772_reset, .stop = ax88772_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, }; @@ -1362,11 +1359,9 @@ static const struct driver_info ax88772b_info = { .description = "ASIX AX88772B USB 2.0 Ethernet", .bind = ax88772_bind, .unbind = ax88772_unbind, - .status = asix_status, .reset = ax88772_reset, .stop = ax88772_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | - FLAG_MULTI_PACKET, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, .data = FLAG_EEPROM_MAC, @@ -1376,11 +1371,9 @@ static const struct driver_info lxausb_t1l_info = { .description = "Linux Automation GmbH USB 10Base-T1L", .bind = ax88772_bind, .unbind = ax88772_unbind, - .status = asix_status, .reset = ax88772_reset, .stop = ax88772_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | - FLAG_MULTI_PACKET, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, .data = FLAG_EEPROM_MAC, @@ -1412,10 +1405,8 @@ static const struct driver_info hg20f9_info = { .description = "HG20F9 USB 2.0 Ethernet", .bind = ax88772_bind, .unbind = ax88772_unbind, - .status = asix_status, .reset = ax88772_reset, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | - FLAG_MULTI_PACKET, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, .data = FLAG_EEPROM_MAC, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index e4f1663b6204..58e3589e3b89 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1554,10 +1554,12 @@ static void lan78xx_set_multicast(struct net_device *netdev) schedule_work(&pdata->set_multicast); } +static int lan78xx_configure_flowcontrol(struct lan78xx_net *dev, + bool tx_pause, bool rx_pause); + static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, u16 lcladv, u16 rmtadv) { - u32 flow = 0, fct_flow = 0; u8 cap; if (dev->fc_autoneg) @@ -1565,27 +1567,13 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, else cap = dev->fc_request_control; - if (cap & FLOW_CTRL_TX) - flow |= (FLOW_CR_TX_FCEN_ | 0xFFFF); - - if (cap & FLOW_CTRL_RX) - flow |= FLOW_CR_RX_FCEN_; - - if (dev->udev->speed == USB_SPEED_SUPER) - fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_SS, FLOW_OFF_SS); - else if (dev->udev->speed == USB_SPEED_HIGH) - fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_HS, FLOW_OFF_HS); - netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s", (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); - lan78xx_write_reg(dev, FCT_FLOW, fct_flow); - - /* threshold value should be set before enabling flow */ - lan78xx_write_reg(dev, FLOW, flow); - - return 0; + return lan78xx_configure_flowcontrol(dev, + cap & FLOW_CTRL_TX, + cap & FLOW_CTRL_RX); } static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev); @@ -1636,15 +1624,30 @@ exit_unlock: return ret; } +/** + * lan78xx_phy_int_ack - Acknowledge PHY interrupt + * @dev: pointer to the LAN78xx device structure + * + * This function acknowledges the PHY interrupt by setting the + * INT_STS_PHY_INT_ bit in the interrupt status register (INT_STS). + * + * Return: 0 on success or a negative error code on failure. + */ +static int lan78xx_phy_int_ack(struct lan78xx_net *dev) +{ + return lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_); +} + +static int lan78xx_configure_usb(struct lan78xx_net *dev, int speed); + static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; int ladv, radv, ret, link; - u32 buf; /* clear LAN78xx interrupt status */ - ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_); + ret = lan78xx_phy_int_ack(dev); if (unlikely(ret < 0)) return ret; @@ -1667,36 +1670,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) phy_ethtool_ksettings_get(phydev, &ecmd); - if (dev->udev->speed == USB_SPEED_SUPER) { - if (ecmd.base.speed == 1000) { - /* disable U2 */ - ret = lan78xx_read_reg(dev, USB_CFG1, &buf); - if (ret < 0) - return ret; - buf &= ~USB_CFG1_DEV_U2_INIT_EN_; - ret = lan78xx_write_reg(dev, USB_CFG1, buf); - if (ret < 0) - return ret; - /* enable U1 */ - ret = lan78xx_read_reg(dev, USB_CFG1, &buf); - if (ret < 0) - return ret; - buf |= USB_CFG1_DEV_U1_INIT_EN_; - ret = lan78xx_write_reg(dev, USB_CFG1, buf); - if (ret < 0) - return ret; - } else { - /* enable U1 & U2 */ - ret = lan78xx_read_reg(dev, USB_CFG1, &buf); - if (ret < 0) - return ret; - buf |= USB_CFG1_DEV_U2_INIT_EN_; - buf |= USB_CFG1_DEV_U1_INIT_EN_; - ret = lan78xx_write_reg(dev, USB_CFG1, buf); - if (ret < 0) - return ret; - } - } + ret = lan78xx_configure_usb(dev, ecmd.base.speed); + if (ret < 0) + return ret; ladv = phy_read(phydev, MII_ADVERTISE); if (ladv < 0) @@ -2508,48 +2484,323 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) dev->domain_data.irqdomain = NULL; } -static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) +/** + * lan78xx_configure_usb - Configure USB link power settings + * @dev: pointer to the LAN78xx device structure + * @speed: negotiated Ethernet link speed (in Mbps) + * + * This function configures U1/U2 link power management for SuperSpeed + * USB devices based on the current Ethernet link speed. It uses the + * USB_CFG1 register to enable or disable U1 and U2 low-power states. + * + * Note: Only LAN7800 and LAN7801 support SuperSpeed (USB 3.x). + * LAN7850 is a High-Speed-only (USB 2.0) device and is skipped. + * + * Return: 0 on success or a negative error code on failure. + */ +static int lan78xx_configure_usb(struct lan78xx_net *dev, int speed) { - u32 buf; + u32 mask, val; int ret; + + /* Only configure USB settings for SuperSpeed devices */ + if (dev->udev->speed != USB_SPEED_SUPER) + return 0; + + /* LAN7850 does not support USB 3.x */ + if (dev->chipid == ID_REV_CHIP_ID_7850_) { + netdev_warn_once(dev->net, "Unexpected SuperSpeed for LAN7850 (USB 2.0 only)\n"); + return 0; + } + + switch (speed) { + case SPEED_1000: + /* Disable U2, enable U1 */ + ret = lan78xx_update_reg(dev, USB_CFG1, + USB_CFG1_DEV_U2_INIT_EN_, 0); + if (ret < 0) + return ret; + + return lan78xx_update_reg(dev, USB_CFG1, + USB_CFG1_DEV_U1_INIT_EN_, + USB_CFG1_DEV_U1_INIT_EN_); + + case SPEED_100: + case SPEED_10: + /* Enable both U1 and U2 */ + mask = USB_CFG1_DEV_U1_INIT_EN_ | USB_CFG1_DEV_U2_INIT_EN_; + val = mask; + return lan78xx_update_reg(dev, USB_CFG1, mask, val); + + default: + netdev_warn(dev->net, "Unsupported link speed: %d\n", speed); + return -EINVAL; + } +} + +/** + * lan78xx_configure_flowcontrol - Set MAC and FIFO flow control configuration + * @dev: pointer to the LAN78xx device structure + * @tx_pause: enable transmission of pause frames + * @rx_pause: enable reception of pause frames + * + * This function configures the LAN78xx flow control settings by writing + * to the FLOW and FCT_FLOW registers. The pause time is set to the + * maximum allowed value (65535 quanta). FIFO thresholds are selected + * based on USB speed. + * + * The Pause Time field is measured in units of 512-bit times (quanta): + * - At 1 Gbps: 1 quanta = 512 ns → max ~33.6 ms pause + * - At 100 Mbps: 1 quanta = 5.12 µs → max ~335 ms pause + * - At 10 Mbps: 1 quanta = 51.2 µs → max ~3.3 s pause + * + * Flow control thresholds (FCT_FLOW) are used to trigger pause/resume: + * - RXUSED is the number of bytes used in the RX FIFO + * - Flow is turned ON when RXUSED ≥ FLOW_ON threshold + * - Flow is turned OFF when RXUSED ≤ FLOW_OFF threshold + * - Both thresholds are encoded in units of 512 bytes (rounded up) + * + * Thresholds differ by USB speed because available USB bandwidth + * affects how fast packets can be drained from the RX FIFO: + * - USB 3.x (SuperSpeed): + * FLOW_ON = 9216 bytes → 18 units + * FLOW_OFF = 4096 bytes → 8 units + * - USB 2.0 (High-Speed): + * FLOW_ON = 8704 bytes → 17 units + * FLOW_OFF = 1024 bytes → 2 units + * + * Note: The FCT_FLOW register must be configured before enabling TX pause + * (i.e., before setting FLOW_CR_TX_FCEN_), as required by the hardware. + * + * Return: 0 on success or a negative error code on failure. + */ +static int lan78xx_configure_flowcontrol(struct lan78xx_net *dev, + bool tx_pause, bool rx_pause) +{ + /* Use maximum pause time: 65535 quanta (512-bit times) */ + const u32 pause_time_quanta = 65535; + u32 fct_flow = 0; + u32 flow = 0; + int ret; + + /* Prepare MAC flow control bits */ + if (tx_pause) + flow |= FLOW_CR_TX_FCEN_ | pause_time_quanta; + + if (rx_pause) + flow |= FLOW_CR_RX_FCEN_; + + /* Select RX FIFO thresholds based on USB speed + * + * FCT_FLOW layout: + * bits [6:0] FLOW_ON threshold (RXUSED ≥ ON → assert pause) + * bits [14:8] FLOW_OFF threshold (RXUSED ≤ OFF → deassert pause) + * thresholds are expressed in units of 512 bytes + */ + switch (dev->udev->speed) { + case USB_SPEED_SUPER: + fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_SS, FLOW_OFF_SS); + break; + case USB_SPEED_HIGH: + fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_HS, FLOW_OFF_HS); + break; + default: + netdev_warn(dev->net, "Unsupported USB speed: %d\n", + dev->udev->speed); + return -EINVAL; + } + + /* Step 1: Write FIFO thresholds before enabling pause frames */ + ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow); + if (ret < 0) + return ret; + + /* Step 2: Enable MAC pause functionality */ + return lan78xx_write_reg(dev, FLOW, flow); +} + +/** + * lan78xx_register_fixed_phy() - Register a fallback fixed PHY + * @dev: LAN78xx device + * + * Registers a fixed PHY with 1 Gbps full duplex. This is used in special cases + * like EVB-KSZ9897-1, where LAN7801 acts as a USB-to-Ethernet interface to a + * switch without a visible PHY. + * + * Return: pointer to the registered fixed PHY, or ERR_PTR() on error. + */ +static struct phy_device *lan78xx_register_fixed_phy(struct lan78xx_net *dev) +{ struct fixed_phy_status fphy_status = { .link = 1, .speed = SPEED_1000, .duplex = DUPLEX_FULL, }; + + netdev_info(dev->net, + "No PHY found on LAN7801 – registering fixed PHY (e.g. EVB-KSZ9897-1)\n"); + + return fixed_phy_register(PHY_POLL, &fphy_status, NULL); +} + +/** + * lan78xx_get_phy() - Probe or register PHY device and set interface mode + * @dev: LAN78xx device structure + * + * This function attempts to find a PHY on the MDIO bus. If no PHY is found + * and the chip is LAN7801, it registers a fixed PHY as fallback. It also + * sets dev->interface based on chip ID and detected PHY type. + * + * Return: a valid PHY device pointer, or ERR_PTR() on failure. + */ +static struct phy_device *lan78xx_get_phy(struct lan78xx_net *dev) +{ struct phy_device *phydev; + /* Attempt to locate a PHY on the MDIO bus */ phydev = phy_find_first(dev->mdiobus); - if (!phydev) { - netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n"); - phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); - if (IS_ERR(phydev)) { - netdev_err(dev->net, "No PHY/fixed_PHY found\n"); - return NULL; + + switch (dev->chipid) { + case ID_REV_CHIP_ID_7801_: + if (phydev) { + /* External RGMII PHY detected */ + dev->interface = PHY_INTERFACE_MODE_RGMII_ID; + phydev->is_internal = false; + + if (!phydev->drv) + netdev_warn(dev->net, + "PHY driver not found – assuming RGMII delays are on PCB or strapped for the PHY\n"); + + return phydev; } - netdev_dbg(dev->net, "Registered FIXED PHY\n"); + dev->interface = PHY_INTERFACE_MODE_RGMII; + /* No PHY found – fallback to fixed PHY (e.g. KSZ switch board) */ + return lan78xx_register_fixed_phy(dev); + + case ID_REV_CHIP_ID_7800_: + case ID_REV_CHIP_ID_7850_: + if (!phydev) + return ERR_PTR(-ENODEV); + + /* These use internal GMII-connected PHY */ + dev->interface = PHY_INTERFACE_MODE_GMII; + phydev->is_internal = true; + return phydev; + + default: + netdev_err(dev->net, "Unknown CHIP ID: 0x%08x\n", dev->chipid); + return ERR_PTR(-ENODEV); + } +} + +/** + * lan78xx_mac_prepare_for_phy() - Preconfigure MAC-side interface settings + * @dev: LAN78xx device + * + * Configure MAC-side registers according to dev->interface, which should be + * set by lan78xx_get_phy(). + * + * - For PHY_INTERFACE_MODE_RGMII: + * Enable MAC-side TXC delay. This mode seems to be used in a special setup + * without a real PHY, likely on EVB-KSZ9897-1. In that design, LAN7801 is + * connected to the KSZ9897 switch, and the link timing is expected to be + * hardwired (e.g. via strapping or board layout). No devicetree support is + * assumed here. + * + * - For PHY_INTERFACE_MODE_RGMII_ID: + * Disable MAC-side delay and rely on the PHY driver to provide delay. + * + * - For GMII, no MAC-specific config is needed. + * + * Return: 0 on success or a negative error code. + */ +static int lan78xx_mac_prepare_for_phy(struct lan78xx_net *dev) +{ + int ret; + + switch (dev->interface) { + case PHY_INTERFACE_MODE_RGMII: + /* Enable MAC-side TX clock delay */ ret = lan78xx_write_reg(dev, MAC_RGMII_ID, MAC_RGMII_ID_TXC_DELAY_EN_); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); - ret = lan78xx_read_reg(dev, HW_CFG, &buf); - buf |= HW_CFG_CLK125_EN_; - buf |= HW_CFG_REFCLK25_EN_; - ret = lan78xx_write_reg(dev, HW_CFG, buf); - } else { - if (!phydev->drv) { - netdev_err(dev->net, "no PHY driver found\n"); - return NULL; - } - dev->interface = PHY_INTERFACE_MODE_RGMII_ID; - /* The PHY driver is responsible to configure proper RGMII - * interface delays. Disable RGMII delays on MAC side. - */ - lan78xx_write_reg(dev, MAC_RGMII_ID, 0); + if (ret < 0) + return ret; + + ret = lan78xx_update_reg(dev, HW_CFG, + HW_CFG_CLK125_EN_ | HW_CFG_REFCLK25_EN_, + HW_CFG_CLK125_EN_ | HW_CFG_REFCLK25_EN_); + if (ret < 0) + return ret; + + break; + + case PHY_INTERFACE_MODE_RGMII_ID: + /* Disable MAC-side TXC delay, PHY provides it */ + ret = lan78xx_write_reg(dev, MAC_RGMII_ID, 0); + if (ret < 0) + return ret; + + break; - phydev->is_internal = false; + case PHY_INTERFACE_MODE_GMII: + /* No MAC-specific configuration required */ + break; + + default: + netdev_warn(dev->net, "Unsupported interface mode: %d\n", + dev->interface); + break; } - return phydev; + + return 0; +} + +/** + * lan78xx_configure_leds_from_dt() - Configure LED enables based on DT + * @dev: LAN78xx device + * @phydev: PHY device (must be valid) + * + * Reads "microchip,led-modes" property from the PHY's DT node and enables + * the corresponding number of LEDs by writing to HW_CFG. + * + * This helper preserves the original logic, enabling up to 4 LEDs. + * If the property is not present, this function does nothing. + * + * Return: 0 on success or a negative error code. + */ +static int lan78xx_configure_leds_from_dt(struct lan78xx_net *dev, + struct phy_device *phydev) +{ + struct device_node *np = phydev->mdio.dev.of_node; + u32 reg; + int len, ret; + + if (!np) + return 0; + + len = of_property_count_elems_of_size(np, "microchip,led-modes", + sizeof(u32)); + if (len < 0) + return 0; + + ret = lan78xx_read_reg(dev, HW_CFG, ®); + if (ret < 0) + return ret; + + reg &= ~(HW_CFG_LED0_EN_ | HW_CFG_LED1_EN_ | + HW_CFG_LED2_EN_ | HW_CFG_LED3_EN_); + + reg |= (len > 0) * HW_CFG_LED0_EN_ | + (len > 1) * HW_CFG_LED1_EN_ | + (len > 2) * HW_CFG_LED2_EN_ | + (len > 3) * HW_CFG_LED3_EN_; + + return lan78xx_write_reg(dev, HW_CFG, reg); } static int lan78xx_phy_init(struct lan78xx_net *dev) @@ -2559,30 +2810,13 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) u32 mii_adv; struct phy_device *phydev; - switch (dev->chipid) { - case ID_REV_CHIP_ID_7801_: - phydev = lan7801_phy_init(dev); - if (!phydev) { - netdev_err(dev->net, "lan7801: PHY Init Failed"); - return -EIO; - } - break; - - case ID_REV_CHIP_ID_7800_: - case ID_REV_CHIP_ID_7850_: - phydev = phy_find_first(dev->mdiobus); - if (!phydev) { - netdev_err(dev->net, "no PHY found\n"); - return -EIO; - } - phydev->is_internal = true; - dev->interface = PHY_INTERFACE_MODE_GMII; - break; + phydev = lan78xx_get_phy(dev); + if (IS_ERR(phydev)) + return PTR_ERR(phydev); - default: - netdev_err(dev->net, "Unknown CHIP ID found\n"); - return -EIO; - } + ret = lan78xx_mac_prepare_for_phy(dev); + if (ret < 0) + goto free_phy; /* if phyirq is not set, use polling mode in phylib */ if (dev->domain_data.phyirq > 0) @@ -2624,33 +2858,23 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) phy_support_eee(phydev); - if (phydev->mdio.dev.of_node) { - u32 reg; - int len; - - len = of_property_count_elems_of_size(phydev->mdio.dev.of_node, - "microchip,led-modes", - sizeof(u32)); - if (len >= 0) { - /* Ensure the appropriate LEDs are enabled */ - lan78xx_read_reg(dev, HW_CFG, ®); - reg &= ~(HW_CFG_LED0_EN_ | - HW_CFG_LED1_EN_ | - HW_CFG_LED2_EN_ | - HW_CFG_LED3_EN_); - reg |= (len > 0) * HW_CFG_LED0_EN_ | - (len > 1) * HW_CFG_LED1_EN_ | - (len > 2) * HW_CFG_LED2_EN_ | - (len > 3) * HW_CFG_LED3_EN_; - lan78xx_write_reg(dev, HW_CFG, reg); - } - } + ret = lan78xx_configure_leds_from_dt(dev, phydev); + if (ret) + goto free_phy; genphy_config_aneg(phydev); dev->fc_autoneg = phydev->autoneg; return 0; + +free_phy: + if (phy_is_pseudo_fixed_link(phydev)) { + fixed_phy_unregister(phydev); + phy_device_free(phydev); + } + + return ret; } static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2cab046749a9..67f5d30ffcba 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -26,7 +26,7 @@ #include <linux/atomic.h> #include <linux/acpi.h> #include <linux/firmware.h> -#include <crypto/hash.h> +#include <crypto/sha2.h> #include <linux/usb/r8152.h> #include <net/gso.h> @@ -4628,48 +4628,16 @@ out: static long rtl8152_fw_verify_checksum(struct r8152 *tp, struct fw_header *fw_hdr, size_t size) { - unsigned char checksum[sizeof(fw_hdr->checksum)]; - struct crypto_shash *alg; - struct shash_desc *sdesc; - size_t len; - long rc; - - alg = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(alg)) { - rc = PTR_ERR(alg); - goto out; - } - - if (crypto_shash_digestsize(alg) != sizeof(fw_hdr->checksum)) { - rc = -EFAULT; - dev_err(&tp->intf->dev, "digestsize incorrect (%u)\n", - crypto_shash_digestsize(alg)); - goto free_shash; - } + u8 checksum[sizeof(fw_hdr->checksum)]; - len = sizeof(*sdesc) + crypto_shash_descsize(alg); - sdesc = kmalloc(len, GFP_KERNEL); - if (!sdesc) { - rc = -ENOMEM; - goto free_shash; - } - sdesc->tfm = alg; - - len = size - sizeof(fw_hdr->checksum); - rc = crypto_shash_digest(sdesc, fw_hdr->version, len, checksum); - kfree(sdesc); - if (rc) - goto free_shash; + BUILD_BUG_ON(sizeof(checksum) != SHA256_DIGEST_SIZE); + sha256(fw_hdr->version, size - sizeof(checksum), checksum); - if (memcmp(fw_hdr->checksum, checksum, sizeof(fw_hdr->checksum))) { + if (memcmp(fw_hdr->checksum, checksum, sizeof(checksum))) { dev_err(&tp->intf->dev, "checksum fail\n"); - rc = -EFAULT; + return -EFAULT; } - -free_shash: - crypto_free_shash(alg); -out: - return rc; + return 0; } static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 7bb53961c0ea..e58a0f1b5c5b 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -307,12 +307,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) { - if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { - dev_kfree_skb_any(skb); - return NET_RX_DROP; - } + if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) + return NETDEV_TX_BUSY; /* signal qdisc layer */ - return NET_RX_SUCCESS; + return NET_RX_SUCCESS; /* same as NETDEV_TX_OK */ } static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -346,11 +344,11 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_rq *rq = NULL; - int ret = NETDEV_TX_OK; + struct netdev_queue *txq; struct net_device *rcv; int length = skb->len; bool use_napi = false; - int rxq; + int ret, rxq; rcu_read_lock(); rcv = rcu_dereference(priv->peer); @@ -373,17 +371,45 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); - if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { + + ret = veth_forward_skb(rcv, skb, rq, use_napi); + switch (ret) { + case NET_RX_SUCCESS: /* same as NETDEV_TX_OK */ if (!use_napi) dev_sw_netstats_tx_add(dev, 1, length); else __veth_xdp_flush(rq); - } else { + break; + case NETDEV_TX_BUSY: + /* If a qdisc is attached to our virtual device, returning + * NETDEV_TX_BUSY is allowed. + */ + txq = netdev_get_tx_queue(dev, rxq); + + if (qdisc_txq_has_no_queue(txq)) { + dev_kfree_skb_any(skb); + goto drop; + } + /* Restore Eth hdr pulled by dev_forward_skb/eth_type_trans */ + __skb_push(skb, ETH_HLEN); + /* Depend on prior success packets started NAPI consumer via + * __veth_xdp_flush(). Cancel TXQ stop if consumer stopped, + * paired with empty check in veth_poll(). + */ + netif_tx_stop_queue(txq); + smp_mb__after_atomic(); + if (unlikely(__ptr_ring_empty(&rq->xdp_ring))) + netif_tx_wake_queue(txq); + break; + case NET_RX_DROP: /* same as NET_XMIT_DROP */ drop: atomic64_inc(&priv->dropped); ret = NET_XMIT_DROP; + break; + default: + net_crit_ratelimited("%s(%s): Invalid return code(%d)", + __func__, dev->name, ret); } - rcu_read_unlock(); return ret; @@ -874,9 +900,17 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, struct veth_xdp_tx_bq *bq, struct veth_stats *stats) { + struct veth_priv *priv = netdev_priv(rq->dev); + int queue_idx = rq->xdp_rxq.queue_index; + struct netdev_queue *peer_txq; + struct net_device *peer_dev; int i, done = 0, n_xdpf = 0; void *xdpf[VETH_XDP_BATCH]; + /* NAPI functions as RCU section */ + peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); + peer_txq = netdev_get_tx_queue(peer_dev, queue_idx); + for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); @@ -925,6 +959,9 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); + if (unlikely(netif_tx_queue_stopped(peer_txq))) + netif_tx_wake_queue(peer_txq); + return done; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7168b33adadb..9a4beea6ee0c 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -343,15 +343,13 @@ unlock: static bool qdisc_tx_is_default(const struct net_device *dev) { struct netdev_queue *txq; - struct Qdisc *qdisc; if (dev->num_tx_queues > 1) return false; txq = netdev_get_tx_queue(dev, 0); - qdisc = rcu_access_pointer(txq->qdisc); - return !qdisc->enqueue; + return qdisc_txq_has_no_queue(txq); } /* Local traffic destined to local address. Reinsert the packet to rx diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 9ccc3f09f71b..a56d7239b127 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -15,6 +15,7 @@ #include <linux/igmp.h> #include <linux/if_ether.h> #include <linux/ethtool.h> +#include <linux/rhashtable.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/gro.h> @@ -63,8 +64,12 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan); static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); -/* salt for hash table */ -static u32 vxlan_salt __read_mostly; +static const struct rhashtable_params vxlan_fdb_rht_params = { + .head_offset = offsetof(struct vxlan_fdb, rhnode), + .key_offset = offsetof(struct vxlan_fdb, key), + .key_len = sizeof(struct vxlan_fdb_key), + .automatic_shrinking = true, +}; static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) { @@ -186,7 +191,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, } else if (nh) { ndm->ndm_family = nh_family; } - send_eth = !is_zero_ether_addr(fdb->eth_addr); + send_eth = !is_zero_ether_addr(fdb->key.eth_addr); } else ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; @@ -201,7 +206,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, peernet2id(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; - if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) + if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.eth_addr)) goto nla_put_failure; if (nh) { if (nla_put_u32(skb, NDA_NH_ID, nh_id)) @@ -223,9 +228,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, goto nla_put_failure; } - if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && + if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->key.vni && nla_put_u32(skb, NDA_SRC_VNI, - be32_to_cpu(fdb->vni))) + be32_to_cpu(fdb->key.vni))) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used)); @@ -293,8 +298,8 @@ static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan, fdb_info->remote_port = rd->remote_port; fdb_info->remote_vni = rd->remote_vni; fdb_info->remote_ifindex = rd->remote_ifindex; - memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN); - fdb_info->vni = fdb->vni; + memcpy(fdb_info->eth_addr, fdb->key.eth_addr, ETH_ALEN); + fdb_info->vni = fdb->key.vni; fdb_info->offloaded = rd->offloaded; fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER; } @@ -366,67 +371,42 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) }; struct vxlan_rdst remote = { }; - memcpy(f.eth_addr, eth_addr, ETH_ALEN); + memcpy(f.key.eth_addr, eth_addr, ETH_ALEN); vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL); } -/* Hash Ethernet address */ -static u32 eth_hash(const unsigned char *addr) -{ - u64 value = get_unaligned((u64 *)addr); - - /* only want 6 bytes */ -#ifdef __BIG_ENDIAN - value >>= 16; -#else - value <<= 16; -#endif - return hash_64(value, FDB_HASH_BITS); -} - -u32 eth_vni_hash(const unsigned char *addr, __be32 vni) +/* Look up Ethernet address in forwarding table */ +static struct vxlan_fdb *vxlan_find_mac_rcu(struct vxlan_dev *vxlan, + const u8 *mac, __be32 vni) { - /* use 1 byte of OUI and 3 bytes of NIC */ - u32 key = get_unaligned((u32 *)(addr + 2)); - - return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1); -} + struct vxlan_fdb_key key; -u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni) -{ - if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) - return eth_vni_hash(mac, vni); + memset(&key, 0, sizeof(key)); + memcpy(key.eth_addr, mac, sizeof(key.eth_addr)); + if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) + key.vni = vxlan->default_dst.remote_vni; else - return eth_hash(mac); -} + key.vni = vni; -/* Hash chain to use given mac address */ -static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, - const u8 *mac, __be32 vni) -{ - return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)]; + return rhashtable_lookup(&vxlan->fdb_hash_tbl, &key, + vxlan_fdb_rht_params); } -/* Look up Ethernet address in forwarding table */ -static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, - const u8 *mac, __be32 vni) +static struct vxlan_fdb *vxlan_find_mac_tx(struct vxlan_dev *vxlan, + const u8 *mac, __be32 vni) { - struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni); struct vxlan_fdb *f; - hlist_for_each_entry_rcu(f, head, hlist) { - if (ether_addr_equal(mac, f->eth_addr)) { - if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) { - if (vni == f->vni) - return f; - } else { - return f; - } - } + f = vxlan_find_mac_rcu(vxlan, mac, vni); + if (f) { + unsigned long now = jiffies; + + if (READ_ONCE(f->used) != now) + WRITE_ONCE(f->used, now); } - return NULL; + return f; } static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, @@ -434,13 +414,11 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, { struct vxlan_fdb *f; - f = __vxlan_find_mac(vxlan, mac, vni); - if (f) { - unsigned long now = jiffies; + lockdep_assert_held_once(&vxlan->hash_lock); - if (READ_ONCE(f->used) != now) - WRITE_ONCE(f->used, now); - } + rcu_read_lock(); + f = vxlan_find_mac_rcu(vxlan, mac, vni); + rcu_read_unlock(); return f; } @@ -480,7 +458,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, rcu_read_lock(); - f = __vxlan_find_mac(vxlan, eth_addr, vni); + f = vxlan_find_mac_rcu(vxlan, eth_addr, vni); if (!f) { rc = -ENOENT; goto out; @@ -517,32 +495,28 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, struct vxlan_dev *vxlan; struct vxlan_rdst *rdst; struct vxlan_fdb *f; - unsigned int h; int rc = 0; if (!netif_is_vxlan(dev)) return -EINVAL; vxlan = netdev_priv(dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) { - if (f->vni == vni) { - list_for_each_entry(rdst, &f->remotes, list) { - rc = vxlan_fdb_notify_one(nb, vxlan, - f, rdst, - extack); - if (rc) - goto unlock; - } + spin_lock_bh(&vxlan->hash_lock); + hlist_for_each_entry(f, &vxlan->fdb_list, fdb_node) { + if (f->key.vni == vni) { + list_for_each_entry(rdst, &f->remotes, list) { + rc = vxlan_fdb_notify_one(nb, vxlan, f, rdst, + extack); + if (rc) + goto unlock; } } - spin_unlock_bh(&vxlan->hash_lock[h]); } + spin_unlock_bh(&vxlan->hash_lock); return 0; unlock: - spin_unlock_bh(&vxlan->hash_lock[h]); + spin_unlock_bh(&vxlan->hash_lock); return rc; } EXPORT_SYMBOL_GPL(vxlan_fdb_replay); @@ -552,20 +526,19 @@ void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni) struct vxlan_dev *vxlan; struct vxlan_rdst *rdst; struct vxlan_fdb *f; - unsigned int h; if (!netif_is_vxlan(dev)) return; vxlan = netdev_priv(dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) - if (f->vni == vni) - list_for_each_entry(rdst, &f->remotes, list) - rdst->offloaded = false; - spin_unlock_bh(&vxlan->hash_lock[h]); + spin_lock_bh(&vxlan->hash_lock); + hlist_for_each_entry(f, &vxlan->fdb_list, fdb_node) { + if (f->key.vni == vni) { + list_for_each_entry(rdst, &f->remotes, list) + rdst->offloaded = false; + } } + spin_unlock_bh(&vxlan->hash_lock); } EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload); @@ -610,10 +583,10 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, if (rd == NULL) return -ENOMEM; - if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { - kfree(rd); - return -ENOMEM; - } + /* The driver can work correctly without a dst cache, so do not treat + * dst cache initialization errors as fatal. + */ + dst_cache_init(&rd->dst_cache, GFP_ATOMIC | __GFP_NOWARN); rd->remote_ip = *ip; rd->remote_port = port; @@ -803,27 +776,20 @@ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac, f = kmalloc(sizeof(*f), GFP_ATOMIC); if (!f) return NULL; + memset(&f->key, 0, sizeof(f->key)); f->state = state; f->flags = ndm_flags; f->updated = f->used = jiffies; - f->vni = src_vni; + f->key.vni = src_vni; f->nh = NULL; RCU_INIT_POINTER(f->vdev, vxlan); INIT_LIST_HEAD(&f->nh_list); INIT_LIST_HEAD(&f->remotes); - memcpy(f->eth_addr, mac, ETH_ALEN); + memcpy(f->key.eth_addr, mac, ETH_ALEN); return f; } -static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac, - __be32 src_vni, struct vxlan_fdb *f) -{ - ++vxlan->addrcnt; - hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac, src_vni)); -} - static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, u32 nhid, struct netlink_ext_ack *extack) { @@ -913,10 +879,27 @@ int vxlan_fdb_create(struct vxlan_dev *vxlan, if (rc < 0) goto errout; + rc = rhashtable_lookup_insert_fast(&vxlan->fdb_hash_tbl, &f->rhnode, + vxlan_fdb_rht_params); + if (rc) + goto destroy_remote; + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->fdb_node, &vxlan->fdb_list); + *fdb = f; return 0; +destroy_remote: + if (rcu_access_pointer(f->nh)) { + list_del_rcu(&f->nh_list); + nexthop_put(rtnl_dereference(f->nh)); + } else { + list_del(&rd->list); + dst_cache_destroy(&rd->dst_cache); + kfree(rd); + } errout: kfree(f); return rc; @@ -953,7 +936,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, { struct vxlan_rdst *rd; - netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); + netdev_dbg(vxlan->dev, "delete %pM\n", f->key.eth_addr); --vxlan->addrcnt; if (do_notify) { @@ -966,7 +949,9 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, swdev_notify, NULL); } - hlist_del_rcu(&f->hlist); + hlist_del_init_rcu(&f->fdb_node); + rhashtable_remove_fast(&vxlan->fdb_hash_tbl, &f->rhnode, + vxlan_fdb_rht_params); list_del_rcu(&f->nh_list); call_rcu(&f->rcu, vxlan_fdb_free); } @@ -1024,8 +1009,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, if ((flags & NLM_F_REPLACE)) { /* Only change unicasts */ - if (!(is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { + if (!(is_multicast_ether_addr(f->key.eth_addr) || + is_zero_ether_addr(f->key.eth_addr))) { if (nhid) { rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack); if (rc < 0) @@ -1041,8 +1026,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, } } if ((flags & NLM_F_APPEND) && - (is_multicast_ether_addr(f->eth_addr) || - is_zero_ether_addr(f->eth_addr))) { + (is_multicast_ether_addr(f->key.eth_addr) || + is_zero_ether_addr(f->key.eth_addr))) { rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); if (rc < 0) @@ -1101,7 +1086,6 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan, if (rc < 0) return rc; - vxlan_fdb_insert(vxlan, mac, src_vni, f); rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, swdev_notify, extack); if (rc) @@ -1125,7 +1109,7 @@ int vxlan_fdb_update(struct vxlan_dev *vxlan, { struct vxlan_fdb *f; - f = __vxlan_find_mac(vxlan, mac, src_vni); + f = vxlan_find_mac(vxlan, mac, src_vni); if (f) { if (flags & NLM_F_EXCL) { netdev_dbg(vxlan->dev, @@ -1253,7 +1237,6 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], __be16 port; __be32 src_vni, vni; u32 ifindex, nhid; - u32 hash_index; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -1273,13 +1256,12 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family) return -EAFNOSUPPORT; - hash_index = fdb_head_index(vxlan, addr, src_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER, nhid, true, extack); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); if (!err) *notified = true; @@ -1296,7 +1278,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan, struct vxlan_fdb *f; int err = -ENOENT; - f = __vxlan_find_mac(vxlan, addr, src_vni); + f = vxlan_find_mac(vxlan, addr, src_vni); if (!f) return err; @@ -1330,7 +1312,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], union vxlan_addr ip; __be32 src_vni, vni; u32 ifindex, nhid; - u32 hash_index; __be16 port; int err; @@ -1339,11 +1320,10 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], if (err) return err; - hash_index = fdb_head_index(vxlan, addr, src_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); if (!err) *notified = true; @@ -1358,52 +1338,46 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); - unsigned int h; + struct vxlan_fdb *f; int err = 0; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct vxlan_fdb *f; - - rcu_read_lock(); - hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { - struct vxlan_rdst *rd; - - if (rcu_access_pointer(f->nh)) { - if (*idx < ctx->fdb_idx) - goto skip_nh; - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI, NULL); - if (err < 0) { - rcu_read_unlock(); - goto out; - } -skip_nh: - *idx += 1; - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + struct vxlan_rdst *rd; + + if (rcu_access_pointer(f->nh)) { + if (*idx < ctx->fdb_idx) + goto skip_nh; + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI, NULL); + if (err < 0) { + rcu_read_unlock(); + goto out; } +skip_nh: + *idx += 1; + continue; + } - list_for_each_entry_rcu(rd, &f->remotes, list) { - if (*idx < ctx->fdb_idx) - goto skip; - - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI, rd); - if (err < 0) { - rcu_read_unlock(); - goto out; - } -skip: - *idx += 1; + list_for_each_entry_rcu(rd, &f->remotes, list) { + if (*idx < ctx->fdb_idx) + goto skip; + + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI, rd); + if (err < 0) { + rcu_read_unlock(); + goto out; } +skip: + *idx += 1; } - rcu_read_unlock(); } + rcu_read_unlock(); out: return err; } @@ -1427,7 +1401,7 @@ static int vxlan_fdb_get(struct sk_buff *skb, rcu_read_lock(); - f = __vxlan_find_mac(vxlan, addr, vni); + f = vxlan_find_mac_rcu(vxlan, addr, vni); if (!f) { NL_SET_ERR_MSG(extack, "Fdb entry not found"); err = -ENOENT; @@ -1463,7 +1437,7 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, ifindex = src_ifindex; #endif - f = __vxlan_find_mac(vxlan, src_mac, vni); + f = vxlan_find_mac_rcu(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); unsigned long now = jiffies; @@ -1491,10 +1465,8 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, rdst->remote_ip = *src_ip; vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { - u32 hash_index = fdb_head_index(vxlan, src_mac, vni); - /* learned new entry */ - spin_lock(&vxlan->hash_lock[hash_index]); + spin_lock(&vxlan->hash_lock); /* close off race between vxlan_flush and incoming packets */ if (netif_running(dev)) @@ -1505,7 +1477,7 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, vni, vxlan->default_dst.remote_vni, ifindex, NTF_SELF, 0, true, NULL); - spin_unlock(&vxlan->hash_lock[hash_index]); + spin_unlock(&vxlan->hash_lock); } return SKB_NOT_DROPPED_YET; @@ -1916,12 +1888,15 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; } - f = vxlan_find_mac(vxlan, n->ha, vni); + rcu_read_lock(); + f = vxlan_find_mac_tx(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); + rcu_read_unlock(); goto out; } + rcu_read_unlock(); reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, n->ha, sha); @@ -2080,7 +2055,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; } - f = vxlan_find_mac(vxlan, n->ha, vni); + f = vxlan_find_mac_tx(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); @@ -2648,14 +2623,10 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, memset(&nh_rdst, 0, sizeof(struct vxlan_rdst)); hash = skb_get_hash(skb); - rcu_read_lock(); nh = rcu_dereference(f->nh); - if (!nh) { - rcu_read_unlock(); + if (!nh) goto drop; - } do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst); - rcu_read_unlock(); if (likely(do_xmit)) vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc); @@ -2782,7 +2753,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) } eth = eth_hdr(skb); - f = vxlan_find_mac(vxlan, eth->h_dest, vni); + rcu_read_lock(); + f = vxlan_find_mac_tx(vxlan, eth->h_dest, vni); did_rsc = false; if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) && @@ -2790,11 +2762,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ntohs(eth->h_proto) == ETH_P_IPV6)) { did_rsc = route_shortcircuit(dev, skb); if (did_rsc) - f = vxlan_find_mac(vxlan, eth->h_dest, vni); + f = vxlan_find_mac_tx(vxlan, eth->h_dest, vni); } if (f == NULL) { - f = vxlan_find_mac(vxlan, all_zeros_mac, vni); + f = vxlan_find_mac_tx(vxlan, all_zeros_mac, vni); if (f == NULL) { if ((vxlan->cfg.flags & VXLAN_F_L2MISS) && !is_multicast_ether_addr(eth->h_dest)) @@ -2804,7 +2776,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); - return NETDEV_TX_OK; + goto out; } } @@ -2829,6 +2801,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); } +out: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -2837,38 +2811,36 @@ static void vxlan_cleanup(struct timer_list *t) { struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer); unsigned long next_timer = jiffies + FDB_AGE_INTERVAL; - unsigned int h; + struct vxlan_fdb *f; if (!netif_running(vxlan->dev)) return; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct hlist_node *p, *n; - - spin_lock(&vxlan->hash_lock[h]); - hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { - struct vxlan_fdb *f - = container_of(p, struct vxlan_fdb, hlist); - unsigned long timeout; + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + unsigned long timeout; - if (f->state & (NUD_PERMANENT | NUD_NOARP)) - continue; + if (f->state & (NUD_PERMANENT | NUD_NOARP)) + continue; - if (f->flags & NTF_EXT_LEARNED) - continue; + if (f->flags & NTF_EXT_LEARNED) + continue; - timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; - if (time_before_eq(timeout, jiffies)) { - netdev_dbg(vxlan->dev, - "garbage collect %pM\n", - f->eth_addr); + timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; + if (time_before_eq(timeout, jiffies)) { + spin_lock(&vxlan->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) { + netdev_dbg(vxlan->dev, "garbage collect %pM\n", + f->key.eth_addr); f->state = NUD_STALE; vxlan_fdb_destroy(vxlan, f, true, true); - } else if (time_before(timeout, next_timer)) - next_timer = timeout; + } + spin_unlock(&vxlan->hash_lock); + } else if (time_before(timeout, next_timer)) { + next_timer = timeout; } - spin_unlock(&vxlan->hash_lock[h]); } + rcu_read_unlock(); mod_timer(&vxlan->age_timer, next_timer); } @@ -2903,10 +2875,14 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; + err = rhashtable_init(&vxlan->fdb_hash_tbl, &vxlan_fdb_rht_params); + if (err) + return err; + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { err = vxlan_vnigroup_init(vxlan); if (err) - return err; + goto err_rhashtable_destroy; } err = gro_cells_init(&vxlan->gro_cells, dev); @@ -2925,21 +2901,11 @@ err_gro_cells_destroy: err_vnigroup_uninit: if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) vxlan_vnigroup_uninit(vxlan); +err_rhashtable_destroy: + rhashtable_destroy(&vxlan->fdb_hash_tbl); return err; } -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) -{ - struct vxlan_fdb *f; - u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); - - spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); - if (f) - vxlan_fdb_destroy(vxlan, f, true, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); -} - static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2951,7 +2917,7 @@ static void vxlan_uninit(struct net_device *dev) gro_cells_destroy(&vxlan->gro_cells); - vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); + rhashtable_destroy(&vxlan->fdb_hash_tbl); } /* Start ageing timer and join group when device is brought up */ @@ -2992,7 +2958,8 @@ struct vxlan_fdb_flush_desc { static bool vxlan_fdb_is_default_entry(const struct vxlan_fdb *f, const struct vxlan_dev *vxlan) { - return is_zero_ether_addr(f->eth_addr) && f->vni == vxlan->cfg.vni; + return is_zero_ether_addr(f->key.eth_addr) && + f->key.vni == vxlan->cfg.vni; } static bool vxlan_fdb_nhid_matches(const struct vxlan_fdb *f, u32 nhid) @@ -3015,7 +2982,7 @@ static bool vxlan_fdb_flush_matches(const struct vxlan_fdb *f, if (desc->ignore_default_entry && vxlan_fdb_is_default_entry(f, vxlan)) return false; - if (desc->src_vni && f->vni != desc->src_vni) + if (desc->src_vni && f->key.vni != desc->src_vni) return false; if (desc->nhid && !vxlan_fdb_nhid_matches(f, desc->nhid)) @@ -3071,33 +3038,32 @@ static void vxlan_flush(struct vxlan_dev *vxlan, const struct vxlan_fdb_flush_desc *desc) { bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc); - unsigned int h; - - for (h = 0; h < FDB_HASH_SIZE; ++h) { - struct hlist_node *p, *n; + struct vxlan_fdb *f; - spin_lock_bh(&vxlan->hash_lock[h]); - hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { - struct vxlan_fdb *f - = container_of(p, struct vxlan_fdb, hlist); + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_list, fdb_node) { + if (!vxlan_fdb_flush_matches(f, vxlan, desc)) + continue; - if (!vxlan_fdb_flush_matches(f, vxlan, desc)) - continue; + spin_lock_bh(&vxlan->hash_lock); + if (hlist_unhashed(&f->fdb_node)) + goto unlock; - if (match_remotes) { - bool destroy_fdb = false; + if (match_remotes) { + bool destroy_fdb = false; - vxlan_fdb_flush_match_remotes(f, vxlan, desc, - &destroy_fdb); + vxlan_fdb_flush_match_remotes(f, vxlan, desc, + &destroy_fdb); - if (!destroy_fdb) - continue; - } - - vxlan_fdb_destroy(vxlan, f, true, true); + if (!destroy_fdb) + goto unlock; } - spin_unlock_bh(&vxlan->hash_lock[h]); + + vxlan_fdb_destroy(vxlan, f, true, true); +unlock: + spin_unlock_bh(&vxlan->hash_lock); } + rcu_read_unlock(); } static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = { @@ -3185,7 +3151,7 @@ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb_flush_desc desc = { - /* Default entry is deleted at vxlan_uninit. */ + /* Default entry is deleted at vxlan_dellink. */ .ignore_default_entry = true, .state = 0, .state_mask = NUD_PERMANENT | NUD_NOARP, @@ -3348,7 +3314,6 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push) static void vxlan_setup(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - unsigned int h; eth_hw_addr_random(dev); ether_setup(dev); @@ -3375,15 +3340,13 @@ static void vxlan_setup(struct net_device *dev) dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; INIT_LIST_HEAD(&vxlan->next); + spin_lock_init(&vxlan->hash_lock); timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE); vxlan->dev = dev; - for (h = 0; h < FDB_HASH_SIZE; ++h) { - spin_lock_init(&vxlan->hash_lock[h]); - INIT_HLIST_HEAD(&vxlan->fdb_head[h]); - } + INIT_HLIST_HEAD(&vxlan->fdb_list); } static void vxlan_ether_setup(struct net_device *dev) @@ -3960,8 +3923,6 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct net_device *remote_dev = NULL; - struct vxlan_fdb *f = NULL; - bool unregister = false; struct vxlan_rdst *dst; int err; @@ -3972,72 +3933,54 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, dev->ethtool_ops = &vxlan_ethtool_ops; - /* create an fdb entry for a valid default destination */ - if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, - &dst->remote_ip, - NUD_REACHABLE | NUD_PERMANENT, - vxlan->cfg.dst_port, - dst->remote_vni, - dst->remote_vni, - dst->remote_ifindex, - NTF_SELF, 0, &f, extack); - if (err) - return err; - } - err = register_netdevice(dev); if (err) - goto errout; - unregister = true; + return err; if (dst->remote_ifindex) { remote_dev = __dev_get_by_index(net, dst->remote_ifindex); if (!remote_dev) { err = -ENODEV; - goto errout; + goto unregister; } err = netdev_upper_dev_link(remote_dev, dev, extack); if (err) - goto errout; + goto unregister; + + dst->remote_dev = remote_dev; } err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto unlink; - if (f) { - vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f); - - /* notify default fdb entry */ - err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), - RTM_NEWNEIGH, true, extack); - if (err) { - vxlan_fdb_destroy(vxlan, f, false, false); - if (remote_dev) - netdev_upper_dev_unlink(remote_dev, dev); - goto unregister; - } + /* create an fdb entry for a valid default destination */ + if (!vxlan_addr_any(&dst->remote_ip)) { + spin_lock_bh(&vxlan->hash_lock); + err = vxlan_fdb_update(vxlan, all_zeros_mac, + &dst->remote_ip, + NUD_REACHABLE | NUD_PERMANENT, + NLM_F_EXCL | NLM_F_CREATE, + vxlan->cfg.dst_port, + dst->remote_vni, + dst->remote_vni, + dst->remote_ifindex, + NTF_SELF, 0, true, extack); + spin_unlock_bh(&vxlan->hash_lock); + if (err) + goto unlink; } list_add(&vxlan->next, &vn->vxlan_list); - if (remote_dev) - dst->remote_dev = remote_dev; + return 0; + unlink: if (remote_dev) netdev_upper_dev_unlink(remote_dev, dev); -errout: - /* unregister_netdevice() destroys the default FDB entry with deletion - * notification. But the addition notification was not sent yet, so - * destroy the entry by hand here. - */ - if (f) - __vxlan_fdb_free(f); unregister: - if (unregister) - unregister_netdevice(dev); + unregister_netdevice(dev); return err; } @@ -4454,9 +4397,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], /* handle default dst entry */ if (rem_ip_changed) { - u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni); - - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); if (!vxlan_addr_any(&conf.remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, &conf.remote_ip, @@ -4467,7 +4408,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], conf.remote_ifindex, NTF_SELF, 0, true, extack); if (err) { - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); netdev_adjacent_change_abort(dst->remote_dev, lowerdev, dev); return err; @@ -4481,7 +4422,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], dst->remote_vni, dst->remote_ifindex, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); /* If vni filtering device, also update fdb entries of * all vnis that were using default remote ip @@ -4518,10 +4459,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_fdb_flush_desc desc = { - /* Default entry is deleted at vxlan_uninit. */ - .ignore_default_entry = true, - }; + struct vxlan_fdb_flush_desc desc = {}; vxlan_flush(vxlan, &desc); @@ -4784,13 +4722,10 @@ vxlan_fdb_offloaded_set(struct net_device *dev, struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *rdst; struct vxlan_fdb *f; - u32 hash_index; - - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); - f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) goto out; @@ -4804,7 +4739,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev, rdst->offloaded = fdb_info->offloaded; out: - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); } static int @@ -4813,13 +4748,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev, { struct vxlan_dev *vxlan = netdev_priv(dev); struct netlink_ext_ack *extack; - u32 hash_index; int err; - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); extack = switchdev_notifier_info_to_extack(&fdb_info->info); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip, NUD_REACHABLE, NLM_F_CREATE | NLM_F_REPLACE, @@ -4829,7 +4762,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev, fdb_info->remote_ifindex, NTF_USE | NTF_SELF | NTF_EXT_LEARNED, 0, false, extack); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } @@ -4840,13 +4773,11 @@ vxlan_fdb_external_learn_del(struct net_device *dev, { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; - u32 hash_index; int err = 0; - hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); - f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) err = -ENOENT; else if (f->flags & NTF_EXT_LEARNED) @@ -4858,7 +4789,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev, fdb_info->remote_ifindex, false); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } @@ -4907,18 +4838,15 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh) { struct vxlan_fdb *fdb; struct vxlan_dev *vxlan; - u32 hash_index; rcu_read_lock(); list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) { vxlan = rcu_dereference(fdb->vdev); WARN_ON(!vxlan); - hash_index = fdb_head_index(vxlan, fdb->eth_addr, - vxlan->default_dst.remote_vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); - if (!hlist_unhashed(&fdb->hlist)) + spin_lock_bh(&vxlan->hash_lock); + if (!hlist_unhashed(&fdb->fdb_node)) vxlan_fdb_destroy(vxlan, fdb, false, false); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); } rcu_read_unlock(); } @@ -4966,19 +4894,15 @@ static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn, vxlan_dellink(vxlan->dev, dev_to_kill); } -static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) +static void __net_exit vxlan_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_net *vn = net_generic(net, vxlan_net_id); - __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); + ASSERT_RTNL_NET(net); - vxlan_destroy_tunnels(vn, dev_to_kill); - } + __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); + vxlan_destroy_tunnels(vn, dev_to_kill); } static void __net_exit vxlan_exit_net(struct net *net) @@ -4992,7 +4916,7 @@ static void __net_exit vxlan_exit_net(struct net *net) static struct pernet_operations vxlan_net_ops = { .init = vxlan_init_net, - .exit_batch_rtnl = vxlan_exit_batch_rtnl, + .exit_rtnl = vxlan_exit_rtnl, .exit = vxlan_exit_net, .id = &vxlan_net_id, .size = sizeof(struct vxlan_net), @@ -5002,8 +4926,6 @@ static int __init vxlan_init_module(void) { int rc; - get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); - rc = register_pernet_subsys(&vxlan_net_ops); if (rc) goto out1; diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 76a351a997d5..d328aed9feef 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -24,18 +24,23 @@ struct vxlan_net { struct notifier_block nexthop_notifier_block; }; +struct vxlan_fdb_key { + u8 eth_addr[ETH_ALEN]; + __be32 vni; +}; + /* Forwarding table entry */ struct vxlan_fdb { - struct hlist_node hlist; /* linked list of entries */ + struct rhash_head rhnode; struct rcu_head rcu; unsigned long updated; /* jiffies */ unsigned long used; struct list_head remotes; - u8 eth_addr[ETH_ALEN]; + struct vxlan_fdb_key key; u16 state; /* see ndm_state */ - __be32 vni; u16 flags; /* see ndm_flags and below */ struct list_head nh_list; + struct hlist_node fdb_node; struct nexthop __rcu *nh; struct vxlan_dev __rcu *vdev; }; diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 06d19e90eadb..186d0660669a 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -411,13 +411,12 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb struct tunnel_msg *tmsg; struct net_device *dev; - if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) { + tmsg = nlmsg_payload(cb->nlh, sizeof(*tmsg)); + if (!tmsg) { NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); return -EINVAL; } - tmsg = nlmsg_data(cb->nlh); - if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); return -EINVAL; @@ -483,11 +482,9 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, struct netlink_ext_ack *extack) { struct vxlan_rdst *dst = &vxlan->default_dst; - u32 hash_index; int err = 0; - hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); if (remote_ip && !vxlan_addr_any(remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, remote_ip, @@ -499,7 +496,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, dst->remote_ifindex, NTF_SELF, 0, true, extack); if (err) { - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } } @@ -512,7 +509,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, dst->remote_ifindex, true); } - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); return err; } @@ -628,10 +625,7 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, */ if (!vxlan_addr_any(&vninode->remote_ip) || !vxlan_addr_any(&dst->remote_ip)) { - u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, - vninode->vni); - - spin_lock_bh(&vxlan->hash_lock[hash_index]); + spin_lock_bh(&vxlan->hash_lock); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -639,7 +633,7 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); - spin_unlock_bh(&vxlan->hash_lock[hash_index]); + spin_unlock_bh(&vxlan->hash_lock); } if (vxlan->dev->flags & IFF_UP) { diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index db9f9ebcb62d..eb8b35b6224d 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -497,7 +497,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "irq: %d\n", ar_ahb->irq); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%pK mem_len: %lu gcc mem: 0x%pK tcsr_mem: 0x%pK\n", + ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%p mem_len: %lu gcc mem: 0x%p tcsr_mem: 0x%p\n", ar_ahb->mem, ar_ahb->mem_len, ar_ahb->gcc_mem, ar_ahb->tcsr_mem); return 0; diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c index 9a4f8e815412..48efdc71d54d 100644 --- a/drivers/net/wireless/ath/ath10k/bmi.c +++ b/drivers/net/wireless/ath/ath10k/bmi.c @@ -349,7 +349,7 @@ static int ath10k_bmi_lz_data_large(struct ath10k *ar, const void *buffer, u32 l int ret; size_t buf_len; - ath10k_dbg(ar, ATH10K_DBG_BMI, "large bmi lz data buffer 0x%pK length %d\n", + ath10k_dbg(ar, ATH10K_DBG_BMI, "large bmi lz data buffer 0x%p length %d\n", buffer, length); if (ar->bmi.done_sent) { @@ -395,7 +395,7 @@ int ath10k_bmi_lz_data(struct ath10k *ar, const void *buffer, u32 length) u32 txlen; int ret; - ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%pK length %d\n", + ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%p length %d\n", buffer, length); if (ar->bmi.done_sent) { @@ -461,7 +461,7 @@ int ath10k_bmi_fast_download(struct ath10k *ar, int ret; ath10k_dbg(ar, ATH10K_DBG_BMI, - "bmi fast download address 0x%x buffer 0x%pK length %d\n", + "bmi fast download address 0x%x buffer 0x%p length %d\n", address, buffer, length); ret = ath10k_bmi_lz_stream_start(ar, address); diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index afae4a8027f8..4fc81ae17a8a 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -1388,7 +1388,7 @@ static int ath10k_ce_init_src_ring(struct ath10k *ar, ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot init ce src ring id %d entries %d base_addr %pK\n", + "boot init ce src ring id %d entries %d base_addr %p\n", ce_id, nentries, src_ring->base_addr_owner_space); return 0; @@ -1426,7 +1426,7 @@ static int ath10k_ce_init_dest_ring(struct ath10k *ar, ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot ce dest ring id %d entries %d base_addr %pK\n", + "boot ce dest ring id %d entries %d base_addr %p\n", ce_id, nentries, dest_ring->base_addr_owner_space); return 0; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 6d336e39d673..fe3a8f4a1cc1 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1200,7 +1200,7 @@ static int ath10k_download_fw(struct ath10k *ar) } ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot uploading firmware image %pK len %d\n", + "boot uploading firmware image %p len %d\n", data, data_len); /* Check if device supports to download firmware via @@ -1826,7 +1826,7 @@ static int ath10k_download_and_run_otp(struct ath10k *ar) if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { - ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n", + ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index a6e21ce90bad..2da08dfebd3e 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -34,7 +34,7 @@ static struct sk_buff *ath10k_htc_build_tx_ctrl_skb(void *ar) skb_cb = ATH10K_SKB_CB(skb); memset(skb_cb, 0, sizeof(*skb_cb)); - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %pK\n", __func__, skb); + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %p\n", __func__, skb); return skb; } @@ -54,7 +54,7 @@ void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep, struct ath10k *ar = ep->htc->ar; struct ath10k_htc_hdr *hdr; - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__, + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %p\n", __func__, ep->eid, skb); /* A corner case where the copy completion is reaching to host but still @@ -515,7 +515,7 @@ void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb) /* zero length packet with trailer data, just drop these */ goto out; - ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %pK\n", + ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %p\n", eid, skb); ep->ep_ops.ep_rx_complete(ar, skb); diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 83eab7479f06..52981052e211 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1373,7 +1373,7 @@ static void ath10k_process_rx(struct ath10k *ar, struct sk_buff *skb) } ath10k_dbg(ar, ATH10K_DBG_DATA, - "rx skb %pK len %u peer %pM %s %s sn %u %s%s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%x fcs-err %i mic-err %i amsdu-more %i\n", + "rx skb %p len %u peer %pM %s %s sn %u %s%s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%x fcs-err %i mic-err %i amsdu-more %i\n", skb, skb->len, ieee80211_get_SA(hdr), diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index c61b95a928da..8c7ffea0fa44 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -875,7 +875,7 @@ static void ath10k_peer_map_cleanup(struct ath10k *ar, struct ath10k_peer *peer) */ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { if (ar->peer_map[i] == peer) { - ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", + ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n", peer->addr, peer, i); ar->peer_map[i] = NULL; } @@ -4063,7 +4063,7 @@ static int ath10k_mac_tx(struct ath10k *ar, if (!noque_offchan && info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %pK len %d\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %p len %d\n", skb, skb->len); skb_queue_tail(&ar->offchan_tx_queue, skb); @@ -4126,7 +4126,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK len %d\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p len %d\n", skb, skb->len); hdr = (struct ieee80211_hdr *)skb->data; @@ -4181,7 +4181,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %pK, len: %d\n", + ath10k_warn(ar, "timed out waiting for offchannel skb %p, len: %d\n", skb, skb->len); if (!peer && tmp_peer_created) { @@ -7604,7 +7604,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, * Existing station deletion. */ ath10k_dbg(ar, ATH10K_DBG_STA, - "mac vdev %d peer delete %pM sta %pK (sta gone)\n", + "mac vdev %d peer delete %pM sta %p (sta gone)\n", arvif->vdev_id, sta->addr, sta); if (sta->tdls) { @@ -7631,7 +7631,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, continue; if (peer->sta == sta) { - ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n", + ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n", sta->addr, peer, i, arvif->vdev_id); peer->sta = NULL; @@ -8811,7 +8811,7 @@ ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx add freq %u width %d ptr %pK\n", + "mac chanctx add freq %u width %d ptr %p\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -8835,7 +8835,7 @@ ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx remove freq %u width %d ptr %pK\n", + "mac chanctx remove freq %u width %d ptr %p\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -8900,7 +8900,7 @@ ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx change freq %u width %d ptr %pK changed %x\n", + "mac chanctx change freq %u width %d ptr %p changed %x\n", ctx->def.chan->center_freq, ctx->def.width, ctx, changed); /* This shouldn't really happen because channel switching should use @@ -8959,7 +8959,7 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx assign ptr %pK vdev_id %i\n", + "mac chanctx assign ptr %p vdev_id %i\n", ctx, arvif->vdev_id); if (WARN_ON(arvif->is_started)) { @@ -9039,7 +9039,7 @@ ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx unassign ptr %pK vdev_id %i\n", + "mac chanctx unassign ptr %p vdev_id %i\n", ctx, arvif->vdev_id); WARN_ON(!arvif->is_started); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index fb2c60ee433c..20ec0a6d0f71 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3411,7 +3411,7 @@ static int ath10k_pci_claim(struct ath10k *ar) goto err_region; } - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%pK\n", ar_pci->mem); + ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem); return 0; err_region: diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 7a9b9bbcdbfc..3fcefc55b74f 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -35,7 +35,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) int ret; ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode event wmi cmd_id %d skb %pK skb->len %d\n", + "testmode event wmi cmd_id %d skb %p skb->len %d\n", cmd_id, skb, skb->len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); @@ -397,7 +397,7 @@ static int ath10k_tm_cmd_wmi(struct ath10k *ar, struct nlattr *tb[]) cmd_id = nla_get_u32(tb[ATH10K_TM_ATTR_WMI_CMDID]); ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n", + "testmode cmd wmi cmd_id %d buf %p buf_len %d\n", cmd_id, buf, buf_len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len); diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index da3bc35e41aa..493bfb410aff 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -35,7 +35,7 @@ static void ath10k_report_offchan_tx(struct ath10k *ar, struct sk_buff *skb) complete(&ar->offchan_tx_completed); ar->offchan_tx_skb = NULL; /* just for sanity */ - ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %pK\n", skb); + ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %p\n", skb); out: spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 3b51b7f52130..1732a4f98418 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -131,7 +131,7 @@ static void ath10k_usb_recv_complete(struct urb *urb) int status = 0; ath10k_dbg(ar, ATH10K_DBG_USB_BULK, - "usb recv pipe %d stat %d len %d urb 0x%pK\n", + "usb recv pipe %d stat %d len %d urb 0x%p\n", pipe->logical_pipe_num, urb->status, urb->actual_length, urb); @@ -230,7 +230,7 @@ static void ath10k_usb_post_recv_transfers(struct ath10k *ar, ath10k_usb_recv_complete, urb_context); ath10k_dbg(ar, ATH10K_DBG_USB_BULK, - "usb bulk recv submit %d 0x%x ep 0x%2.2x len %d buf 0x%pK\n", + "usb bulk recv submit %d 0x%x ep 0x%2.2x len %d buf 0x%p\n", recv_pipe->logical_pipe_num, recv_pipe->usb_pipe_handle, recv_pipe->ep_address, ATH10K_USB_RX_BUFFER_SIZE, urb_context->skb); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 5e061f7525a6..df6a24f8f8d5 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2029,7 +2029,7 @@ ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu) ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr)); memcpy(cmd->buf, msdu->data, msdu->len); - ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n", + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %p len %d ftype %02x stype %02x\n", msdu, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); trace_ath10k_tx_hdr(ar, skb->data, skb->len); @@ -2637,7 +2637,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) status->boottime_ns = ktime_get_boottime_ns(); ath10k_dbg(ar, ATH10K_DBG_MGMT, - "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", + "event mgmt rx skb %p len %d ftype %02x stype %02x\n", skb, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 2f862f8f10ca..fde1ce43c499 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -413,7 +413,7 @@ static int ath11k_ahb_power_up(struct ath11k_base *ab) return ret; } -static void ath11k_ahb_power_down(struct ath11k_base *ab) +static void ath11k_ahb_power_down(struct ath11k_base *ab, bool is_suspend) { struct ath11k_ahb *ab_ahb = ath11k_ahb_priv(ab); @@ -1280,7 +1280,7 @@ static void ath11k_ahb_remove(struct platform_device *pdev) struct ath11k_base *ab = platform_get_drvdata(pdev); if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { - ath11k_ahb_power_down(ab); + ath11k_ahb_power_down(ab, false); ath11k_debugfs_soc_destroy(ab); ath11k_qmi_deinit_service(ab); goto qmi_fail; diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 3d39ff85ba94..2e9f8a5e61e4 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -907,12 +907,51 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { }, }; -static inline struct ath11k_pdev *ath11k_core_get_single_pdev(struct ath11k_base *ab) -{ - WARN_ON(!ab->hw_params.single_pdev_only); - - return &ab->pdevs[0]; -} +static const struct dmi_system_id ath11k_pm_quirk_table[] = { + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J4"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K4"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K6"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K8"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21KA"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21F9"), + }, + }, + {} +}; void ath11k_fw_stats_pdevs_free(struct list_head *head) { @@ -972,23 +1011,33 @@ bool ath11k_core_coldboot_cal_support(struct ath11k_base *ab) return ab->hw_params.coldboot_cal_mm; } -int ath11k_core_suspend(struct ath11k_base *ab) +/* Check if we need to continue with suspend/resume operation. + * Return: + * a negative value: error happens and don't continue. + * 0: no error but don't continue. + * positive value: no error and do continue. + */ +static int ath11k_core_continue_suspend_resume(struct ath11k_base *ab) { - int ret; - struct ath11k_pdev *pdev; struct ath11k *ar; if (!ab->hw_params.supports_suspend) return -EOPNOTSUPP; /* so far single_pdev_only chips have supports_suspend as true - * and only the first pdev is valid. + * so pass 0 as a dummy pdev_id here. */ - pdev = ath11k_core_get_single_pdev(ab); - ar = pdev->ar; + ar = ab->pdevs[0].ar; if (!ar || ar->state != ATH11K_STATE_OFF) return 0; + return 1; +} + +static int ath11k_core_suspend_wow(struct ath11k_base *ab) +{ + int ret; + ret = ath11k_dp_rx_pktlog_stop(ab, true); if (ret) { ath11k_warn(ab, "failed to stop dp rx (and timer) pktlog during suspend: %d\n", @@ -996,7 +1045,10 @@ int ath11k_core_suspend(struct ath11k_base *ab) return ret; } - ret = ath11k_mac_wait_tx_complete(ar); + /* So far only single_pdev_only devices can reach here, + * so it is valid to handle the first, and the only, pdev. + */ + ret = ath11k_mac_wait_tx_complete(ab->pdevs[0].ar); if (ret) { ath11k_warn(ab, "failed to wait tx complete: %d\n", ret); return ret; @@ -1029,24 +1081,146 @@ int ath11k_core_suspend(struct ath11k_base *ab) return 0; } + +static int ath11k_core_suspend_default(struct ath11k_base *ab) +{ + int ret; + + ret = ath11k_dp_rx_pktlog_stop(ab, true); + if (ret) { + ath11k_warn(ab, "failed to stop dp rx (and timer) pktlog during suspend: %d\n", + ret); + return ret; + } + + /* So far only single_pdev_only devices can reach here, + * so it is valid to handle the first, and the only, pdev. + */ + ret = ath11k_mac_wait_tx_complete(ab->pdevs[0].ar); + if (ret) { + ath11k_warn(ab, "failed to wait tx complete: %d\n", ret); + return ret; + } + + ret = ath11k_dp_rx_pktlog_stop(ab, false); + if (ret) { + ath11k_warn(ab, "failed to stop dp rx pktlog during suspend: %d\n", + ret); + return ret; + } + + ath11k_ce_stop_shadow_timers(ab); + ath11k_dp_stop_shadow_timers(ab); + + /* PM framework skips suspend_late/resume_early callbacks + * if other devices report errors in their suspend callbacks. + * However ath11k_core_resume() would still be called because + * here we return success thus kernel put us on dpm_suspended_list. + * Since we won't go through a power down/up cycle, there is + * no chance to call complete(&ab->restart_completed) in + * ath11k_core_restart(), making ath11k_core_resume() timeout. + * So call it here to avoid this issue. This also works in case + * no error happens thus suspend_late/resume_early get called, + * because it will be reinitialized in ath11k_core_resume_early(). + */ + complete(&ab->restart_completed); + + return 0; +} + +int ath11k_core_suspend(struct ath11k_base *ab) +{ + int ret; + + ret = ath11k_core_continue_suspend_resume(ab); + if (ret <= 0) + return ret; + + if (ab->actual_pm_policy == ATH11K_PM_WOW) + return ath11k_core_suspend_wow(ab); + + return ath11k_core_suspend_default(ab); +} EXPORT_SYMBOL(ath11k_core_suspend); -int ath11k_core_resume(struct ath11k_base *ab) +int ath11k_core_suspend_late(struct ath11k_base *ab) { int ret; - struct ath11k_pdev *pdev; + + ret = ath11k_core_continue_suspend_resume(ab); + if (ret <= 0) + return ret; + + if (ab->actual_pm_policy == ATH11K_PM_WOW) + return 0; + + ath11k_hif_irq_disable(ab); + ath11k_hif_ce_irq_disable(ab); + + ath11k_hif_power_down(ab, true); + + return 0; +} +EXPORT_SYMBOL(ath11k_core_suspend_late); + +int ath11k_core_resume_early(struct ath11k_base *ab) +{ + int ret; + + ret = ath11k_core_continue_suspend_resume(ab); + if (ret <= 0) + return ret; + + if (ab->actual_pm_policy == ATH11K_PM_WOW) + return 0; + + reinit_completion(&ab->restart_completed); + ret = ath11k_hif_power_up(ab); + if (ret) + ath11k_warn(ab, "failed to power up hif during resume: %d\n", ret); + + return ret; +} +EXPORT_SYMBOL(ath11k_core_resume_early); + +static int ath11k_core_resume_default(struct ath11k_base *ab) +{ struct ath11k *ar; + long time_left; + int ret; - if (!ab->hw_params.supports_suspend) - return -EOPNOTSUPP; + time_left = wait_for_completion_timeout(&ab->restart_completed, + ATH11K_RESET_TIMEOUT_HZ); + if (time_left == 0) { + ath11k_warn(ab, "timeout while waiting for restart complete"); + return -ETIMEDOUT; + } - /* so far signle_pdev_only chips have supports_suspend as true - * and only the first pdev is valid. + /* So far only single_pdev_only devices can reach here, + * so it is valid to handle the first, and the only, pdev. */ - pdev = ath11k_core_get_single_pdev(ab); - ar = pdev->ar; - if (!ar || ar->state != ATH11K_STATE_OFF) - return 0; + ar = ab->pdevs[0].ar; + if (ab->hw_params.current_cc_support && + ar->alpha2[0] != 0 && ar->alpha2[1] != 0) { + ret = ath11k_reg_set_cc(ar); + if (ret) { + ath11k_warn(ab, "failed to set country code during resume: %d\n", + ret); + return ret; + } + } + + ret = ath11k_dp_rx_pktlog_start(ab); + if (ret) + ath11k_warn(ab, "failed to start rx pktlog during resume: %d\n", + ret); + + return ret; +} + +static int ath11k_core_resume_wow(struct ath11k_base *ab) +{ + int ret; ret = ath11k_hif_resume(ab); if (ret) { @@ -1072,6 +1246,20 @@ int ath11k_core_resume(struct ath11k_base *ab) return 0; } + +int ath11k_core_resume(struct ath11k_base *ab) +{ + int ret; + + ret = ath11k_core_continue_suspend_resume(ab); + if (ret <= 0) + return ret; + + if (ab->actual_pm_policy == ATH11K_PM_WOW) + return ath11k_core_resume_wow(ab); + + return ath11k_core_resume_default(ab); +} EXPORT_SYMBOL(ath11k_core_resume); static void ath11k_core_check_cc_code_bdfext(const struct dmi_header *hdr, void *data) @@ -2050,6 +2238,7 @@ err_hal_srng_deinit: void ath11k_core_halt(struct ath11k *ar) { struct ath11k_base *ab = ar->ab; + struct list_head *pos, *n; lockdep_assert_held(&ar->conf_mutex); @@ -2065,7 +2254,12 @@ void ath11k_core_halt(struct ath11k *ar) rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } @@ -2205,6 +2399,8 @@ static void ath11k_core_restart(struct work_struct *work) if (!ab->is_reset) ath11k_core_post_reconfigure_recovery(ab); + + complete(&ab->restart_completed); } static void ath11k_core_reset(struct work_struct *work) @@ -2275,7 +2471,7 @@ static void ath11k_core_reset(struct work_struct *work) ath11k_hif_irq_disable(ab); ath11k_hif_ce_irq_disable(ab); - ath11k_hif_power_down(ab); + ath11k_hif_power_down(ab, false); ath11k_hif_power_up(ab); ath11k_dbg(ab, ATH11K_DBG_BOOT, "reset started\n"); @@ -2325,10 +2521,62 @@ int ath11k_core_pre_init(struct ath11k_base *ab) } EXPORT_SYMBOL(ath11k_core_pre_init); +static int ath11k_core_pm_notify(struct notifier_block *nb, + unsigned long action, void *nouse) +{ + struct ath11k_base *ab = container_of(nb, struct ath11k_base, + pm_nb); + + switch (action) { + case PM_SUSPEND_PREPARE: + ab->actual_pm_policy = ab->pm_policy; + break; + case PM_HIBERNATION_PREPARE: + ab->actual_pm_policy = ATH11K_PM_DEFAULT; + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int ath11k_core_pm_notifier_register(struct ath11k_base *ab) +{ + ab->pm_nb.notifier_call = ath11k_core_pm_notify; + return register_pm_notifier(&ab->pm_nb); +} + +void ath11k_core_pm_notifier_unregister(struct ath11k_base *ab) +{ + int ret; + + ret = unregister_pm_notifier(&ab->pm_nb); + if (ret) + /* just warn here, there is nothing can be done in fail case */ + ath11k_warn(ab, "failed to unregister PM notifier %d\n", ret); +} +EXPORT_SYMBOL(ath11k_core_pm_notifier_unregister); + int ath11k_core_init(struct ath11k_base *ab) { + const struct dmi_system_id *dmi_id; int ret; + dmi_id = dmi_first_match(ath11k_pm_quirk_table); + if (dmi_id) + ab->pm_policy = (kernel_ulong_t)dmi_id->driver_data; + else + ab->pm_policy = ATH11K_PM_DEFAULT; + + ath11k_dbg(ab, ATH11K_DBG_BOOT, "pm policy %u\n", ab->pm_policy); + + ret = ath11k_core_pm_notifier_register(ab); + if (ret) { + ath11k_err(ab, "failed to register PM notifier: %d\n", ret); + return ret; + } + ret = ath11k_core_soc_create(ab); if (ret) { ath11k_err(ab, "failed to create soc core: %d\n", ret); @@ -2348,9 +2596,10 @@ void ath11k_core_deinit(struct ath11k_base *ab) mutex_unlock(&ab->core_lock); - ath11k_hif_power_down(ab); + ath11k_hif_power_down(ab, false); ath11k_mac_destroy(ab); ath11k_core_soc_destroy(ab); + ath11k_core_pm_notifier_unregister(ab); } EXPORT_SYMBOL(ath11k_core_deinit); @@ -2401,6 +2650,7 @@ struct ath11k_base *ath11k_core_alloc(struct device *dev, size_t priv_size, timer_setup(&ab->rx_replenish_retry, ath11k_ce_rx_replenish_retry, 0); init_completion(&ab->htc_suspend); init_completion(&ab->wow.wakeup_completed); + init_completion(&ab->restart_completed); ab->dev = dev; ab->hif.bus = bus; diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 1a3d0de4afde..339d4fca1ed5 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -16,6 +16,7 @@ #include <linux/rhashtable.h> #include <linux/average.h> #include <linux/firmware.h> +#include <linux/suspend.h> #include "qmi.h" #include "htc.h" @@ -892,6 +893,11 @@ struct ath11k_msi_config { u16 hw_rev; }; +enum ath11k_pm_policy { + ATH11K_PM_DEFAULT, + ATH11K_PM_WOW, +}; + /* Master structure to hold the hw data which may be used in core module */ struct ath11k_base { enum ath11k_hw_rev hw_rev; @@ -1050,6 +1056,8 @@ struct ath11k_base { DECLARE_BITMAP(fw_features, ATH11K_FW_FEATURE_COUNT); } fw; + struct completion restart_completed; + #ifdef CONFIG_NL80211_TESTMODE struct { u32 data_pos; @@ -1058,6 +1066,10 @@ struct ath11k_base { } testmode; #endif + enum ath11k_pm_policy pm_policy; + enum ath11k_pm_policy actual_pm_policy; + struct notifier_block pm_nb; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -1249,8 +1261,10 @@ void ath11k_core_free_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd); int ath11k_core_check_dt(struct ath11k_base *ath11k); int ath11k_core_check_smbios(struct ath11k_base *ab); void ath11k_core_halt(struct ath11k *ar); +int ath11k_core_resume_early(struct ath11k_base *ab); int ath11k_core_resume(struct ath11k_base *ab); int ath11k_core_suspend(struct ath11k_base *ab); +int ath11k_core_suspend_late(struct ath11k_base *ab); void ath11k_core_pre_reconfigure_recovery(struct ath11k_base *ab); bool ath11k_core_coldboot_cal_support(struct ath11k_base *ab); @@ -1322,4 +1336,6 @@ static inline const char *ath11k_bus_str(enum ath11k_bus bus) return "unknown"; } +void ath11k_core_pm_notifier_unregister(struct ath11k_base *ab); + #endif /* _CORE_H_ */ diff --git a/drivers/net/wireless/ath/ath11k/hif.h b/drivers/net/wireless/ath/ath11k/hif.h index 770c39ff99b4..cd9c4b838246 100644 --- a/drivers/net/wireless/ath/ath11k/hif.h +++ b/drivers/net/wireless/ath/ath11k/hif.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _HIF_H_ @@ -18,7 +18,7 @@ struct ath11k_hif_ops { int (*start)(struct ath11k_base *ab); void (*stop)(struct ath11k_base *ab); int (*power_up)(struct ath11k_base *ab); - void (*power_down)(struct ath11k_base *ab); + void (*power_down)(struct ath11k_base *ab, bool is_suspend); int (*suspend)(struct ath11k_base *ab); int (*resume)(struct ath11k_base *ab); int (*map_service_to_pipe)(struct ath11k_base *ab, u16 service_id, @@ -68,12 +68,18 @@ static inline void ath11k_hif_irq_disable(struct ath11k_base *ab) static inline int ath11k_hif_power_up(struct ath11k_base *ab) { + if (!ab->hif.ops->power_up) + return -EOPNOTSUPP; + return ab->hif.ops->power_up(ab); } -static inline void ath11k_hif_power_down(struct ath11k_base *ab) +static inline void ath11k_hif_power_down(struct ath11k_base *ab, bool is_suspend) { - ab->hif.ops->power_down(ab); + if (!ab->hif.ops->power_down) + return; + + ab->hif.ops->power_down(ab, is_suspend); } static inline int ath11k_hif_suspend(struct ath11k_base *ab) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 97816916abac..8191ee14a1fd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1531,8 +1531,14 @@ static int ath11k_mac_set_vif_params(struct ath11k_vif *arvif, static struct ath11k_vif *ath11k_mac_get_tx_arvif(struct ath11k_vif *arvif) { - if (arvif->vif->mbssid_tx_vif) - return ath11k_vif_to_arvif(arvif->vif->mbssid_tx_vif); + struct ieee80211_bss_conf *link_conf, *tx_bss_conf; + + lockdep_assert_wiphy(arvif->ar->hw->wiphy); + + link_conf = &arvif->vif->bss_conf; + tx_bss_conf = wiphy_dereference(arvif->ar->hw->wiphy, link_conf->tx_bss_conf); + if (tx_bss_conf) + return ath11k_vif_to_arvif(tx_bss_conf->vif); return NULL; } diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index fc77eac83e95..acd76e9392d3 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/msi.h> @@ -454,9 +454,17 @@ int ath11k_mhi_start(struct ath11k_pci *ab_pci) return 0; } -void ath11k_mhi_stop(struct ath11k_pci *ab_pci) +void ath11k_mhi_stop(struct ath11k_pci *ab_pci, bool is_suspend) { - mhi_power_down(ab_pci->mhi_ctrl, true); + /* During suspend we need to use mhi_power_down_keep_dev() + * workaround, otherwise ath11k_core_resume() will timeout + * during resume. + */ + if (is_suspend) + mhi_power_down_keep_dev(ab_pci->mhi_ctrl, true); + else + mhi_power_down(ab_pci->mhi_ctrl, true); + mhi_unprepare_after_power_down(ab_pci->mhi_ctrl); } diff --git a/drivers/net/wireless/ath/ath11k/mhi.h b/drivers/net/wireless/ath/ath11k/mhi.h index 651470091bd5..5c5c2b03c81f 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.h +++ b/drivers/net/wireless/ath/ath11k/mhi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_MHI_H #define _ATH11K_MHI_H @@ -18,7 +18,7 @@ #define MHICTRL_RESET_MASK 0x2 int ath11k_mhi_start(struct ath11k_pci *ar_pci); -void ath11k_mhi_stop(struct ath11k_pci *ar_pci); +void ath11k_mhi_stop(struct ath11k_pci *ar_pci, bool is_suspend); int ath11k_mhi_register(struct ath11k_pci *ar_pci); void ath11k_mhi_unregister(struct ath11k_pci *ar_pci); void ath11k_mhi_set_mhictrl_reset(struct ath11k_base *ab); diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 412f4a134e4a..78444f8ea153 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -821,7 +821,7 @@ static int ath11k_pci_power_up(struct ath11k_base *ab) return 0; } -static void ath11k_pci_power_down(struct ath11k_base *ab) +static void ath11k_pci_power_down(struct ath11k_base *ab, bool is_suspend) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); @@ -832,7 +832,7 @@ static void ath11k_pci_power_down(struct ath11k_base *ab) ath11k_pci_msi_disable(ab_pci); - ath11k_mhi_stop(ab_pci); + ath11k_mhi_stop(ab_pci, is_suspend); clear_bit(ATH11K_FLAG_DEVICE_INIT_DONE, &ab->dev_flags); ath11k_pci_sw_reset(ab_pci->ab, false); } @@ -929,7 +929,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, { struct ath11k_base *ab; struct ath11k_pci *ab_pci; - u32 soc_hw_version_major, soc_hw_version_minor, addr; + u32 soc_hw_version_major, soc_hw_version_minor; int ret; u32 sub_version; @@ -955,8 +955,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, * from DT. If memory is reserved from DT for FW, ath11k driver need not * allocate memory. */ - ret = of_property_read_u32(ab->dev->of_node, "memory-region", &addr); - if (!ret) + if (of_property_present(ab->dev->of_node, "memory-region")) set_bit(ATH11K_FLAG_FIXED_MEM_RGN, &ab->dev_flags); ret = ath11k_pci_claim(ab_pci, pdev); @@ -1161,9 +1160,10 @@ static void ath11k_pci_remove(struct pci_dev *pdev) ath11k_pci_set_irq_affinity_hint(ab_pci, NULL); if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { - ath11k_pci_power_down(ab); + ath11k_pci_power_down(ab, false); ath11k_debugfs_soc_destroy(ab); ath11k_qmi_deinit_service(ab); + ath11k_core_pm_notifier_unregister(ab); goto qmi_fail; } @@ -1192,7 +1192,7 @@ static void ath11k_pci_shutdown(struct pci_dev *pdev) struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); ath11k_pci_set_irq_affinity_hint(ab_pci, NULL); - ath11k_pci_power_down(ab); + ath11k_pci_power_down(ab, false); } static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev) @@ -1229,9 +1229,39 @@ static __maybe_unused int ath11k_pci_pm_resume(struct device *dev) return ret; } -static SIMPLE_DEV_PM_OPS(ath11k_pci_pm_ops, - ath11k_pci_pm_suspend, - ath11k_pci_pm_resume); +static __maybe_unused int ath11k_pci_pm_suspend_late(struct device *dev) +{ + struct ath11k_base *ab = dev_get_drvdata(dev); + int ret; + + ret = ath11k_core_suspend_late(ab); + if (ret) + ath11k_warn(ab, "failed to late suspend core: %d\n", ret); + + /* Similar to ath11k_pci_pm_suspend(), we return success here + * even error happens, to allow system suspend/hibernation survive. + */ + return 0; +} + +static __maybe_unused int ath11k_pci_pm_resume_early(struct device *dev) +{ + struct ath11k_base *ab = dev_get_drvdata(dev); + int ret; + + ret = ath11k_core_resume_early(ab); + if (ret) + ath11k_warn(ab, "failed to early resume core: %d\n", ret); + + return ret; +} + +static const struct dev_pm_ops __maybe_unused ath11k_pci_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend, + ath11k_pci_pm_resume) + SET_LATE_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend_late, + ath11k_pci_pm_resume_early) +}; static struct pci_driver ath11k_pci_driver = { .name = "ath11k_pci", diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 4f8b08ed1bbc..47b9d4126d3a 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/elf.h> @@ -2887,7 +2887,7 @@ int ath11k_qmi_fwreset_from_cold_boot(struct ath11k_base *ab) } /* reset the firmware */ - ath11k_hif_power_down(ab); + ath11k_hif_power_down(ab, false); ath11k_hif_power_up(ab); ath11k_dbg(ab, ATH11K_DBG_QMI, "exit wait for cold boot done\n"); return 0; diff --git a/drivers/net/wireless/ath/ath11k/testmode.c b/drivers/net/wireless/ath/ath11k/testmode.c index 9be1cd742339..a9751ea2a0b7 100644 --- a/drivers/net/wireless/ath/ath11k/testmode.c +++ b/drivers/net/wireless/ath/ath11k/testmode.c @@ -107,7 +107,7 @@ static int ath11k_tm_process_event(struct ath11k_base *ab, u32 cmd_id, u32 pdev_id; ath11k_dbg(ab, ATH11K_DBG_TESTMODE, - "event wmi cmd_id %d ftm event msg %pK datalen %d\n", + "event wmi cmd_id %d ftm event msg %p datalen %d\n", cmd_id, ftm_msg, length); ath11k_dbg_dump(ab, ATH11K_DBG_TESTMODE, NULL, "", ftm_msg, length); pdev_id = DP_HW2SW_MACID(ftm_msg->seg_hdr.pdev_id); diff --git a/drivers/net/wireless/ath/ath12k/Kconfig b/drivers/net/wireless/ath/ath12k/Kconfig index 52a1bb19e3da..b3b15e1eb282 100644 --- a/drivers/net/wireless/ath/ath12k/Kconfig +++ b/drivers/net/wireless/ath/ath12k/Kconfig @@ -15,6 +15,14 @@ config ATH12K If you choose to build a module, it'll be called ath12k. +config ATH12K_AHB + bool "QTI ath12k AHB support" + depends on ATH12K && REMOTEPROC + select QCOM_MDT_LOADER + select QCOM_SCM + help + Enable support for Ath12k AHB bus chipsets, example IPQ5332. + config ATH12K_DEBUG bool "ath12k debugging" depends on ATH12K diff --git a/drivers/net/wireless/ath/ath12k/Makefile b/drivers/net/wireless/ath/ath12k/Makefile index 60644cb42c76..d95ee525a6cd 100644 --- a/drivers/net/wireless/ath/ath12k/Makefile +++ b/drivers/net/wireless/ath/ath12k/Makefile @@ -23,6 +23,7 @@ ath12k-y += core.o \ fw.o \ p2p.o +ath12k-$(CONFIG_ATH12K_AHB) += ahb.o ath12k-$(CONFIG_ATH12K_DEBUGFS) += debugfs.o debugfs_htt_stats.o debugfs_sta.o ath12k-$(CONFIG_ACPI) += acpi.o ath12k-$(CONFIG_ATH12K_TRACING) += trace.o diff --git a/drivers/net/wireless/ath/ath12k/ahb.c b/drivers/net/wireless/ath/ath12k/ahb.c new file mode 100644 index 000000000000..636dfe237a79 --- /dev/null +++ b/drivers/net/wireless/ath/ath12k/ahb.c @@ -0,0 +1,1156 @@ +// SPDX-License-Identifier: BSD-3-Clause-Clear +/* + * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include <linux/dma-mapping.h> +#include <linux/firmware/qcom/qcom_scm.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/remoteproc.h> +#include <linux/soc/qcom/mdt_loader.h> +#include <linux/soc/qcom/smem_state.h> +#include "ahb.h" +#include "debug.h" +#include "hif.h" + +static const struct of_device_id ath12k_ahb_of_match[] = { + { .compatible = "qcom,ipq5332-wifi", + .data = (void *)ATH12K_HW_IPQ5332_HW10, + }, + { } +}; + +MODULE_DEVICE_TABLE(of, ath12k_ahb_of_match); + +#define ATH12K_IRQ_CE0_OFFSET 4 +#define ATH12K_MAX_UPDS 1 +#define ATH12K_UPD_IRQ_WRD_LEN 18 +static const char ath12k_userpd_irq[][9] = {"spawn", + "ready", + "stop-ack"}; + +static const char *irq_name[ATH12K_IRQ_NUM_MAX] = { + "misc-pulse1", + "misc-latch", + "sw-exception", + "watchdog", + "ce0", + "ce1", + "ce2", + "ce3", + "ce4", + "ce5", + "ce6", + "ce7", + "ce8", + "ce9", + "ce10", + "ce11", + "host2wbm-desc-feed", + "host2reo-re-injection", + "host2reo-command", + "host2rxdma-monitor-ring3", + "host2rxdma-monitor-ring2", + "host2rxdma-monitor-ring1", + "reo2ost-exception", + "wbm2host-rx-release", + "reo2host-status", + "reo2host-destination-ring4", + "reo2host-destination-ring3", + "reo2host-destination-ring2", + "reo2host-destination-ring1", + "rxdma2host-monitor-destination-mac3", + "rxdma2host-monitor-destination-mac2", + "rxdma2host-monitor-destination-mac1", + "ppdu-end-interrupts-mac3", + "ppdu-end-interrupts-mac2", + "ppdu-end-interrupts-mac1", + "rxdma2host-monitor-status-ring-mac3", + "rxdma2host-monitor-status-ring-mac2", + "rxdma2host-monitor-status-ring-mac1", + "host2rxdma-host-buf-ring-mac3", + "host2rxdma-host-buf-ring-mac2", + "host2rxdma-host-buf-ring-mac1", + "rxdma2host-destination-ring-mac3", + "rxdma2host-destination-ring-mac2", + "rxdma2host-destination-ring-mac1", + "host2tcl-input-ring4", + "host2tcl-input-ring3", + "host2tcl-input-ring2", + "host2tcl-input-ring1", + "wbm2host-tx-completions-ring4", + "wbm2host-tx-completions-ring3", + "wbm2host-tx-completions-ring2", + "wbm2host-tx-completions-ring1", + "tcl2host-status-ring", +}; + +enum ext_irq_num { + host2wbm_desc_feed = 16, + host2reo_re_injection, + host2reo_command, + host2rxdma_monitor_ring3, + host2rxdma_monitor_ring2, + host2rxdma_monitor_ring1, + reo2host_exception, + wbm2host_rx_release, + reo2host_status, + reo2host_destination_ring4, + reo2host_destination_ring3, + reo2host_destination_ring2, + reo2host_destination_ring1, + rxdma2host_monitor_destination_mac3, + rxdma2host_monitor_destination_mac2, + rxdma2host_monitor_destination_mac1, + ppdu_end_interrupts_mac3, + ppdu_end_interrupts_mac2, + ppdu_end_interrupts_mac1, + rxdma2host_monitor_status_ring_mac3, + rxdma2host_monitor_status_ring_mac2, + rxdma2host_monitor_status_ring_mac1, + host2rxdma_host_buf_ring_mac3, + host2rxdma_host_buf_ring_mac2, + host2rxdma_host_buf_ring_mac1, + rxdma2host_destination_ring_mac3, + rxdma2host_destination_ring_mac2, + rxdma2host_destination_ring_mac1, + host2tcl_input_ring4, + host2tcl_input_ring3, + host2tcl_input_ring2, + host2tcl_input_ring1, + wbm2host_tx_completions_ring4, + wbm2host_tx_completions_ring3, + wbm2host_tx_completions_ring2, + wbm2host_tx_completions_ring1, + tcl2host_status_ring, +}; + +static u32 ath12k_ahb_read32(struct ath12k_base *ab, u32 offset) +{ + if (ab->ce_remap && offset < HAL_SEQ_WCSS_CMEM_OFFSET) + return ioread32(ab->mem_ce + offset); + return ioread32(ab->mem + offset); +} + +static void ath12k_ahb_write32(struct ath12k_base *ab, u32 offset, + u32 value) +{ + if (ab->ce_remap && offset < HAL_SEQ_WCSS_CMEM_OFFSET) + iowrite32(value, ab->mem_ce + offset); + else + iowrite32(value, ab->mem + offset); +} + +static void ath12k_ahb_cancel_workqueue(struct ath12k_base *ab) +{ + int i; + + for (i = 0; i < ab->hw_params->ce_count; i++) { + struct ath12k_ce_pipe *ce_pipe = &ab->ce.ce_pipe[i]; + + if (ath12k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR) + continue; + + cancel_work_sync(&ce_pipe->intr_wq); + } +} + +static void ath12k_ahb_ext_grp_disable(struct ath12k_ext_irq_grp *irq_grp) +{ + int i; + + for (i = 0; i < irq_grp->num_irq; i++) + disable_irq_nosync(irq_grp->ab->irq_num[irq_grp->irqs[i]]); +} + +static void __ath12k_ahb_ext_irq_disable(struct ath12k_base *ab) +{ + int i; + + for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { + struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; + + ath12k_ahb_ext_grp_disable(irq_grp); + if (irq_grp->napi_enabled) { + napi_synchronize(&irq_grp->napi); + napi_disable(&irq_grp->napi); + irq_grp->napi_enabled = false; + } + } +} + +static void ath12k_ahb_ext_grp_enable(struct ath12k_ext_irq_grp *irq_grp) +{ + int i; + + for (i = 0; i < irq_grp->num_irq; i++) + enable_irq(irq_grp->ab->irq_num[irq_grp->irqs[i]]); +} + +static void ath12k_ahb_setbit32(struct ath12k_base *ab, u8 bit, u32 offset) +{ + u32 val; + + val = ath12k_ahb_read32(ab, offset); + ath12k_ahb_write32(ab, offset, val | BIT(bit)); +} + +static void ath12k_ahb_clearbit32(struct ath12k_base *ab, u8 bit, u32 offset) +{ + u32 val; + + val = ath12k_ahb_read32(ab, offset); + ath12k_ahb_write32(ab, offset, val & ~BIT(bit)); +} + +static void ath12k_ahb_ce_irq_enable(struct ath12k_base *ab, u16 ce_id) +{ + const struct ce_attr *ce_attr; + const struct ce_ie_addr *ce_ie_addr = ab->hw_params->ce_ie_addr; + u32 ie1_reg_addr, ie2_reg_addr, ie3_reg_addr; + + ie1_reg_addr = ce_ie_addr->ie1_reg_addr; + ie2_reg_addr = ce_ie_addr->ie2_reg_addr; + ie3_reg_addr = ce_ie_addr->ie3_reg_addr; + + ce_attr = &ab->hw_params->host_ce_config[ce_id]; + if (ce_attr->src_nentries) + ath12k_ahb_setbit32(ab, ce_id, ie1_reg_addr); + + if (ce_attr->dest_nentries) { + ath12k_ahb_setbit32(ab, ce_id, ie2_reg_addr); + ath12k_ahb_setbit32(ab, ce_id + CE_HOST_IE_3_SHIFT, + ie3_reg_addr); + } +} + +static void ath12k_ahb_ce_irq_disable(struct ath12k_base *ab, u16 ce_id) +{ + const struct ce_attr *ce_attr; + const struct ce_ie_addr *ce_ie_addr = ab->hw_params->ce_ie_addr; + u32 ie1_reg_addr, ie2_reg_addr, ie3_reg_addr; + + ie1_reg_addr = ce_ie_addr->ie1_reg_addr; + ie2_reg_addr = ce_ie_addr->ie2_reg_addr; + ie3_reg_addr = ce_ie_addr->ie3_reg_addr; + + ce_attr = &ab->hw_params->host_ce_config[ce_id]; + if (ce_attr->src_nentries) + ath12k_ahb_clearbit32(ab, ce_id, ie1_reg_addr); + + if (ce_attr->dest_nentries) { + ath12k_ahb_clearbit32(ab, ce_id, ie2_reg_addr); + ath12k_ahb_clearbit32(ab, ce_id + CE_HOST_IE_3_SHIFT, + ie3_reg_addr); + } +} + +static void ath12k_ahb_sync_ce_irqs(struct ath12k_base *ab) +{ + int i; + int irq_idx; + + for (i = 0; i < ab->hw_params->ce_count; i++) { + if (ath12k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR) + continue; + + irq_idx = ATH12K_IRQ_CE0_OFFSET + i; + synchronize_irq(ab->irq_num[irq_idx]); + } +} + +static void ath12k_ahb_sync_ext_irqs(struct ath12k_base *ab) +{ + int i, j; + int irq_idx; + + for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { + struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; + + for (j = 0; j < irq_grp->num_irq; j++) { + irq_idx = irq_grp->irqs[j]; + synchronize_irq(ab->irq_num[irq_idx]); + } + } +} + +static void ath12k_ahb_ce_irqs_enable(struct ath12k_base *ab) +{ + int i; + + for (i = 0; i < ab->hw_params->ce_count; i++) { + if (ath12k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR) + continue; + ath12k_ahb_ce_irq_enable(ab, i); + } +} + +static void ath12k_ahb_ce_irqs_disable(struct ath12k_base *ab) +{ + int i; + + for (i = 0; i < ab->hw_params->ce_count; i++) { + if (ath12k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR) + continue; + ath12k_ahb_ce_irq_disable(ab, i); + } +} + +static int ath12k_ahb_start(struct ath12k_base *ab) +{ + ath12k_ahb_ce_irqs_enable(ab); + ath12k_ce_rx_post_buf(ab); + + return 0; +} + +static void ath12k_ahb_ext_irq_enable(struct ath12k_base *ab) +{ + struct ath12k_ext_irq_grp *irq_grp; + int i; + + for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { + irq_grp = &ab->ext_irq_grp[i]; + if (!irq_grp->napi_enabled) { + napi_enable(&irq_grp->napi); + irq_grp->napi_enabled = true; + } + ath12k_ahb_ext_grp_enable(irq_grp); + } +} + +static void ath12k_ahb_ext_irq_disable(struct ath12k_base *ab) +{ + __ath12k_ahb_ext_irq_disable(ab); + ath12k_ahb_sync_ext_irqs(ab); +} + +static void ath12k_ahb_stop(struct ath12k_base *ab) +{ + if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags)) + ath12k_ahb_ce_irqs_disable(ab); + ath12k_ahb_sync_ce_irqs(ab); + ath12k_ahb_cancel_workqueue(ab); + timer_delete_sync(&ab->rx_replenish_retry); + ath12k_ce_cleanup_pipes(ab); +} + +static int ath12k_ahb_power_up(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + char fw_name[ATH12K_USERPD_FW_NAME_LEN]; + char fw2_name[ATH12K_USERPD_FW_NAME_LEN]; + struct device *dev = ab->dev; + const struct firmware *fw, *fw2; + struct reserved_mem *rmem = NULL; + unsigned long time_left; + phys_addr_t mem_phys; + void *mem_region; + size_t mem_size; + u32 pasid; + int ret; + + rmem = ath12k_core_get_reserved_mem(ab, 0); + if (!rmem) + return -ENODEV; + + mem_phys = rmem->base; + mem_size = rmem->size; + mem_region = devm_memremap(dev, mem_phys, mem_size, MEMREMAP_WC); + if (IS_ERR(mem_region)) { + ath12k_err(ab, "unable to map memory region: %pa+%pa\n", + &rmem->base, &rmem->size); + return PTR_ERR(mem_region); + } + + snprintf(fw_name, sizeof(fw_name), "%s/%s/%s%d%s", ATH12K_FW_DIR, + ab->hw_params->fw.dir, ATH12K_AHB_FW_PREFIX, ab_ahb->userpd_id, + ATH12K_AHB_FW_SUFFIX); + + ret = request_firmware(&fw, fw_name, dev); + if (ret < 0) { + ath12k_err(ab, "request_firmware failed\n"); + return ret; + } + + ath12k_dbg(ab, ATH12K_DBG_AHB, "Booting fw image %s, size %zd\n", fw_name, + fw->size); + + if (!fw->size) { + ath12k_err(ab, "Invalid firmware size\n"); + ret = -EINVAL; + goto err_fw; + } + + pasid = (u32_encode_bits(ab_ahb->userpd_id, ATH12K_USERPD_ID_MASK)) | + ATH12K_AHB_UPD_SWID; + + /* Load FW image to a reserved memory location */ + ret = qcom_mdt_load(dev, fw, fw_name, pasid, mem_region, mem_phys, mem_size, + &mem_phys); + if (ret) { + ath12k_err(ab, "Failed to load MDT segments: %d\n", ret); + goto err_fw; + } + + snprintf(fw2_name, sizeof(fw2_name), "%s/%s/%s", ATH12K_FW_DIR, + ab->hw_params->fw.dir, ATH12K_AHB_FW2); + + ret = request_firmware(&fw2, fw2_name, dev); + if (ret < 0) { + ath12k_err(ab, "request_firmware failed\n"); + goto err_fw; + } + + ath12k_dbg(ab, ATH12K_DBG_AHB, "Booting fw image %s, size %zd\n", fw2_name, + fw2->size); + + if (!fw2->size) { + ath12k_err(ab, "Invalid firmware size\n"); + ret = -EINVAL; + goto err_fw2; + } + + ret = qcom_mdt_load_no_init(dev, fw2, fw2_name, pasid, mem_region, mem_phys, + mem_size, &mem_phys); + if (ret) { + ath12k_err(ab, "Failed to load MDT segments: %d\n", ret); + goto err_fw2; + } + + /* Authenticate FW image using peripheral ID */ + ret = qcom_scm_pas_auth_and_reset(pasid); + if (ret) { + ath12k_err(ab, "failed to boot the remote processor %d\n", ret); + goto err_fw2; + } + + /* Instruct Q6 to spawn userPD thread */ + ret = qcom_smem_state_update_bits(ab_ahb->spawn_state, BIT(ab_ahb->spawn_bit), + BIT(ab_ahb->spawn_bit)); + if (ret) { + ath12k_err(ab, "Failed to update spawn state %d\n", ret); + goto err_fw2; + } + + time_left = wait_for_completion_timeout(&ab_ahb->userpd_spawned, + ATH12K_USERPD_SPAWN_TIMEOUT); + if (!time_left) { + ath12k_err(ab, "UserPD spawn wait timed out\n"); + ret = -ETIMEDOUT; + goto err_fw2; + } + + time_left = wait_for_completion_timeout(&ab_ahb->userpd_ready, + ATH12K_USERPD_READY_TIMEOUT); + if (!time_left) { + ath12k_err(ab, "UserPD ready wait timed out\n"); + ret = -ETIMEDOUT; + goto err_fw2; + } + + qcom_smem_state_update_bits(ab_ahb->spawn_state, BIT(ab_ahb->spawn_bit), 0); + + ath12k_dbg(ab, ATH12K_DBG_AHB, "UserPD%d is now UP\n", ab_ahb->userpd_id); + +err_fw2: + release_firmware(fw2); +err_fw: + release_firmware(fw); + return ret; +} + +static void ath12k_ahb_power_down(struct ath12k_base *ab, bool is_suspend) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + unsigned long time_left; + u32 pasid; + int ret; + + qcom_smem_state_update_bits(ab_ahb->stop_state, BIT(ab_ahb->stop_bit), + BIT(ab_ahb->stop_bit)); + + time_left = wait_for_completion_timeout(&ab_ahb->userpd_stopped, + ATH12K_USERPD_STOP_TIMEOUT); + if (!time_left) { + ath12k_err(ab, "UserPD stop wait timed out\n"); + return; + } + + qcom_smem_state_update_bits(ab_ahb->stop_state, BIT(ab_ahb->stop_bit), 0); + + pasid = (u32_encode_bits(ab_ahb->userpd_id, ATH12K_USERPD_ID_MASK)) | + ATH12K_AHB_UPD_SWID; + /* Release the firmware */ + ret = qcom_scm_pas_shutdown(pasid); + if (ret) + ath12k_err(ab, "scm pas shutdown failed for userPD%d: %d\n", + ab_ahb->userpd_id, ret); +} + +static void ath12k_ahb_init_qmi_ce_config(struct ath12k_base *ab) +{ + struct ath12k_qmi_ce_cfg *cfg = &ab->qmi.ce_cfg; + + cfg->tgt_ce_len = ab->hw_params->target_ce_count; + cfg->tgt_ce = ab->hw_params->target_ce_config; + cfg->svc_to_ce_map_len = ab->hw_params->svc_to_ce_map_len; + cfg->svc_to_ce_map = ab->hw_params->svc_to_ce_map; + ab->qmi.service_ins_id = ab->hw_params->qmi_service_ins_id; +} + +static void ath12k_ahb_ce_workqueue(struct work_struct *work) +{ + struct ath12k_ce_pipe *ce_pipe = from_work(ce_pipe, work, intr_wq); + + ath12k_ce_per_engine_service(ce_pipe->ab, ce_pipe->pipe_num); + + ath12k_ahb_ce_irq_enable(ce_pipe->ab, ce_pipe->pipe_num); +} + +static irqreturn_t ath12k_ahb_ce_interrupt_handler(int irq, void *arg) +{ + struct ath12k_ce_pipe *ce_pipe = arg; + + /* last interrupt received for this CE */ + ce_pipe->timestamp = jiffies; + + ath12k_ahb_ce_irq_disable(ce_pipe->ab, ce_pipe->pipe_num); + + queue_work(system_bh_wq, &ce_pipe->intr_wq); + + return IRQ_HANDLED; +} + +static int ath12k_ahb_ext_grp_napi_poll(struct napi_struct *napi, int budget) +{ + struct ath12k_ext_irq_grp *irq_grp = container_of(napi, + struct ath12k_ext_irq_grp, + napi); + struct ath12k_base *ab = irq_grp->ab; + int work_done; + + work_done = ath12k_dp_service_srng(ab, irq_grp, budget); + if (work_done < budget) { + napi_complete_done(napi, work_done); + ath12k_ahb_ext_grp_enable(irq_grp); + } + + if (work_done > budget) + work_done = budget; + + return work_done; +} + +static irqreturn_t ath12k_ahb_ext_interrupt_handler(int irq, void *arg) +{ + struct ath12k_ext_irq_grp *irq_grp = arg; + + /* last interrupt received for this group */ + irq_grp->timestamp = jiffies; + + ath12k_ahb_ext_grp_disable(irq_grp); + + napi_schedule(&irq_grp->napi); + + return IRQ_HANDLED; +} + +static int ath12k_ahb_config_ext_irq(struct ath12k_base *ab) +{ + const struct ath12k_hw_ring_mask *ring_mask; + struct ath12k_ext_irq_grp *irq_grp; + const struct hal_ops *hal_ops; + int i, j, irq, irq_idx, ret; + u32 num_irq; + + ring_mask = ab->hw_params->ring_mask; + hal_ops = ab->hw_params->hal_ops; + for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { + irq_grp = &ab->ext_irq_grp[i]; + num_irq = 0; + + irq_grp->ab = ab; + irq_grp->grp_id = i; + + irq_grp->napi_ndev = alloc_netdev_dummy(0); + if (!irq_grp->napi_ndev) + return -ENOMEM; + + netif_napi_add(irq_grp->napi_ndev, &irq_grp->napi, + ath12k_ahb_ext_grp_napi_poll); + + for (j = 0; j < ATH12K_EXT_IRQ_NUM_MAX; j++) { + /* For TX ring, ensure that the ring mask and the + * tcl_to_wbm_rbm_map point to the same ring number. + */ + if (ring_mask->tx[i] & + BIT(hal_ops->tcl_to_wbm_rbm_map[j].wbm_ring_num)) { + irq_grp->irqs[num_irq++] = + wbm2host_tx_completions_ring1 - j; + } + + if (ring_mask->rx[i] & BIT(j)) { + irq_grp->irqs[num_irq++] = + reo2host_destination_ring1 - j; + } + + if (ring_mask->rx_err[i] & BIT(j)) + irq_grp->irqs[num_irq++] = reo2host_exception; + + if (ring_mask->rx_wbm_rel[i] & BIT(j)) + irq_grp->irqs[num_irq++] = wbm2host_rx_release; + + if (ring_mask->reo_status[i] & BIT(j)) + irq_grp->irqs[num_irq++] = reo2host_status; + + if (ring_mask->rx_mon_dest[i] & BIT(j)) + irq_grp->irqs[num_irq++] = + rxdma2host_monitor_destination_mac1; + } + + irq_grp->num_irq = num_irq; + + for (j = 0; j < irq_grp->num_irq; j++) { + irq_idx = irq_grp->irqs[j]; + + irq = platform_get_irq_byname(ab->pdev, + irq_name[irq_idx]); + ab->irq_num[irq_idx] = irq; + irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY); + ret = devm_request_irq(ab->dev, irq, + ath12k_ahb_ext_interrupt_handler, + IRQF_TRIGGER_RISING, + irq_name[irq_idx], irq_grp); + if (ret) + ath12k_warn(ab, "failed request_irq for %d\n", irq); + } + } + + return 0; +} + +static int ath12k_ahb_config_irq(struct ath12k_base *ab) +{ + int irq, irq_idx, i; + int ret; + + /* Configure CE irqs */ + for (i = 0; i < ab->hw_params->ce_count; i++) { + struct ath12k_ce_pipe *ce_pipe = &ab->ce.ce_pipe[i]; + + if (ath12k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR) + continue; + + irq_idx = ATH12K_IRQ_CE0_OFFSET + i; + + INIT_WORK(&ce_pipe->intr_wq, ath12k_ahb_ce_workqueue); + irq = platform_get_irq_byname(ab->pdev, irq_name[irq_idx]); + ret = devm_request_irq(ab->dev, irq, ath12k_ahb_ce_interrupt_handler, + IRQF_TRIGGER_RISING, irq_name[irq_idx], + ce_pipe); + if (ret) + return ret; + + ab->irq_num[irq_idx] = irq; + } + + /* Configure external interrupts */ + ret = ath12k_ahb_config_ext_irq(ab); + + return ret; +} + +static int ath12k_ahb_map_service_to_pipe(struct ath12k_base *ab, u16 service_id, + u8 *ul_pipe, u8 *dl_pipe) +{ + const struct service_to_pipe *entry; + bool ul_set = false, dl_set = false; + u32 pipedir; + int i; + + for (i = 0; i < ab->hw_params->svc_to_ce_map_len; i++) { + entry = &ab->hw_params->svc_to_ce_map[i]; + + if (__le32_to_cpu(entry->service_id) != service_id) + continue; + + pipedir = __le32_to_cpu(entry->pipedir); + if (pipedir == PIPEDIR_IN || pipedir == PIPEDIR_INOUT) { + WARN_ON(dl_set); + *dl_pipe = __le32_to_cpu(entry->pipenum); + dl_set = true; + } + + if (pipedir == PIPEDIR_OUT || pipedir == PIPEDIR_INOUT) { + WARN_ON(ul_set); + *ul_pipe = __le32_to_cpu(entry->pipenum); + ul_set = true; + } + } + + if (WARN_ON(!ul_set || !dl_set)) + return -ENOENT; + + return 0; +} + +static const struct ath12k_hif_ops ath12k_ahb_hif_ops_ipq5332 = { + .start = ath12k_ahb_start, + .stop = ath12k_ahb_stop, + .read32 = ath12k_ahb_read32, + .write32 = ath12k_ahb_write32, + .irq_enable = ath12k_ahb_ext_irq_enable, + .irq_disable = ath12k_ahb_ext_irq_disable, + .map_service_to_pipe = ath12k_ahb_map_service_to_pipe, + .power_up = ath12k_ahb_power_up, + .power_down = ath12k_ahb_power_down, +}; + +static irqreturn_t ath12k_userpd_irq_handler(int irq, void *data) +{ + struct ath12k_base *ab = data; + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + + if (irq == ab_ahb->userpd_irq_num[ATH12K_USERPD_SPAWN_IRQ]) { + complete(&ab_ahb->userpd_spawned); + } else if (irq == ab_ahb->userpd_irq_num[ATH12K_USERPD_READY_IRQ]) { + complete(&ab_ahb->userpd_ready); + } else if (irq == ab_ahb->userpd_irq_num[ATH12K_USERPD_STOP_ACK_IRQ]) { + complete(&ab_ahb->userpd_stopped); + } else { + ath12k_err(ab, "Invalid userpd interrupt\n"); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +static int ath12k_ahb_config_rproc_irq(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + int i, ret; + char *upd_irq_name; + + for (i = 0; i < ATH12K_USERPD_MAX_IRQ; i++) { + ab_ahb->userpd_irq_num[i] = platform_get_irq_byname(ab->pdev, + ath12k_userpd_irq[i]); + if (ab_ahb->userpd_irq_num[i] < 0) + return ab_ahb->userpd_irq_num[i]; + + upd_irq_name = devm_kzalloc(&ab->pdev->dev, ATH12K_UPD_IRQ_WRD_LEN, + GFP_KERNEL); + if (!upd_irq_name) + return -ENOMEM; + + scnprintf(upd_irq_name, ATH12K_UPD_IRQ_WRD_LEN, "UserPD%u-%s", + ab_ahb->userpd_id, ath12k_userpd_irq[i]); + ret = devm_request_threaded_irq(&ab->pdev->dev, ab_ahb->userpd_irq_num[i], + NULL, ath12k_userpd_irq_handler, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + upd_irq_name, ab); + if (ret) + return dev_err_probe(&ab->pdev->dev, ret, + "Request %s irq failed: %d\n", + ath12k_userpd_irq[i], ret); + } + + ab_ahb->spawn_state = devm_qcom_smem_state_get(&ab->pdev->dev, "spawn", + &ab_ahb->spawn_bit); + if (IS_ERR(ab_ahb->spawn_state)) + return dev_err_probe(&ab->pdev->dev, PTR_ERR(ab_ahb->spawn_state), + "Failed to acquire spawn state\n"); + + ab_ahb->stop_state = devm_qcom_smem_state_get(&ab->pdev->dev, "stop", + &ab_ahb->stop_bit); + if (IS_ERR(ab_ahb->stop_state)) + return dev_err_probe(&ab->pdev->dev, PTR_ERR(ab_ahb->stop_state), + "Failed to acquire stop state\n"); + + init_completion(&ab_ahb->userpd_spawned); + init_completion(&ab_ahb->userpd_ready); + init_completion(&ab_ahb->userpd_stopped); + return 0; +} + +static int ath12k_ahb_root_pd_state_notifier(struct notifier_block *nb, + const unsigned long event, void *data) +{ + struct ath12k_ahb *ab_ahb = container_of(nb, struct ath12k_ahb, root_pd_nb); + struct ath12k_base *ab = ab_ahb->ab; + + if (event == ATH12K_RPROC_AFTER_POWERUP) { + ath12k_dbg(ab, ATH12K_DBG_AHB, "Root PD is UP\n"); + complete(&ab_ahb->rootpd_ready); + } + + return 0; +} + +static int ath12k_ahb_register_rproc_notifier(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + + ab_ahb->root_pd_nb.notifier_call = ath12k_ahb_root_pd_state_notifier; + init_completion(&ab_ahb->rootpd_ready); + + ab_ahb->root_pd_notifier = qcom_register_ssr_notifier(ab_ahb->tgt_rproc->name, + &ab_ahb->root_pd_nb); + if (IS_ERR(ab_ahb->root_pd_notifier)) + return PTR_ERR(ab_ahb->root_pd_notifier); + + return 0; +} + +static void ath12k_ahb_unregister_rproc_notifier(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + + if (!ab_ahb->root_pd_notifier) { + ath12k_err(ab, "Rproc notifier not registered\n"); + return; + } + + qcom_unregister_ssr_notifier(ab_ahb->root_pd_notifier, + &ab_ahb->root_pd_nb); + ab_ahb->root_pd_notifier = NULL; +} + +static int ath12k_ahb_get_rproc(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + struct device *dev = ab->dev; + struct device_node *np; + struct rproc *prproc; + + np = of_parse_phandle(dev->of_node, "qcom,rproc", 0); + if (!np) { + ath12k_err(ab, "failed to get q6_rproc handle\n"); + return -ENOENT; + } + + prproc = rproc_get_by_phandle(np->phandle); + of_node_put(np); + if (!prproc) + return dev_err_probe(&ab->pdev->dev, -EPROBE_DEFER, + "failed to get rproc\n"); + + ab_ahb->tgt_rproc = prproc; + + return 0; +} + +static int ath12k_ahb_boot_root_pd(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + unsigned long time_left; + int ret; + + ret = rproc_boot(ab_ahb->tgt_rproc); + if (ret < 0) { + ath12k_err(ab, "RootPD boot failed\n"); + return ret; + } + + time_left = wait_for_completion_timeout(&ab_ahb->rootpd_ready, + ATH12K_ROOTPD_READY_TIMEOUT); + if (!time_left) { + ath12k_err(ab, "RootPD ready wait timed out\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int ath12k_ahb_configure_rproc(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + int ret; + + ret = ath12k_ahb_get_rproc(ab); + if (ret < 0) + return ret; + + ret = ath12k_ahb_register_rproc_notifier(ab); + if (ret < 0) { + ret = dev_err_probe(&ab->pdev->dev, ret, + "failed to register rproc notifier\n"); + goto err_put_rproc; + } + + if (ab_ahb->tgt_rproc->state != RPROC_RUNNING) { + ret = ath12k_ahb_boot_root_pd(ab); + if (ret < 0) { + ath12k_err(ab, "failed to boot the remote processor Q6\n"); + goto err_unreg_notifier; + } + } + + return ath12k_ahb_config_rproc_irq(ab); + +err_unreg_notifier: + ath12k_ahb_unregister_rproc_notifier(ab); + +err_put_rproc: + rproc_put(ab_ahb->tgt_rproc); + return ret; +} + +static void ath12k_ahb_deconfigure_rproc(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + + ath12k_ahb_unregister_rproc_notifier(ab); + rproc_put(ab_ahb->tgt_rproc); +} + +static int ath12k_ahb_resource_init(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + struct platform_device *pdev = ab->pdev; + struct resource *mem_res; + int ret; + + ab->mem = devm_platform_get_and_ioremap_resource(pdev, 0, &mem_res); + if (IS_ERR(ab->mem)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(ab->mem), "ioremap error\n"); + goto out; + } + + ab->mem_len = resource_size(mem_res); + + if (ab->hw_params->ce_remap) { + const struct ce_remap *ce_remap = ab->hw_params->ce_remap; + /* CE register space is moved out of WCSS and the space is not + * contiguous, hence remapping the CE registers to a new space + * for accessing them. + */ + ab->mem_ce = ioremap(ce_remap->base, ce_remap->size); + if (!ab->mem_ce) { + dev_err(&pdev->dev, "ce ioremap error\n"); + ret = -ENOMEM; + goto err_mem_unmap; + } + ab->ce_remap = true; + ab->ce_remap_base_addr = HAL_IPQ5332_CE_WFSS_REG_BASE; + } + + ab_ahb->xo_clk = devm_clk_get(ab->dev, "xo"); + if (IS_ERR(ab_ahb->xo_clk)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(ab_ahb->xo_clk), + "failed to get xo clock\n"); + goto err_mem_ce_unmap; + } + + ret = clk_prepare_enable(ab_ahb->xo_clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable gcc_xo_clk: %d\n", ret); + goto err_clock_deinit; + } + + return 0; + +err_clock_deinit: + devm_clk_put(ab->dev, ab_ahb->xo_clk); + +err_mem_ce_unmap: + ab_ahb->xo_clk = NULL; + if (ab->hw_params->ce_remap) + iounmap(ab->mem_ce); + +err_mem_unmap: + ab->mem_ce = NULL; + devm_iounmap(ab->dev, ab->mem); + +out: + ab->mem = NULL; + return ret; +} + +static void ath12k_ahb_resource_deinit(struct ath12k_base *ab) +{ + struct ath12k_ahb *ab_ahb = ath12k_ab_to_ahb(ab); + + if (ab->mem) + devm_iounmap(ab->dev, ab->mem); + + if (ab->mem_ce) + iounmap(ab->mem_ce); + + ab->mem = NULL; + ab->mem_ce = NULL; + + clk_disable_unprepare(ab_ahb->xo_clk); + devm_clk_put(ab->dev, ab_ahb->xo_clk); + ab_ahb->xo_clk = NULL; +} + +static int ath12k_ahb_probe(struct platform_device *pdev) +{ + struct ath12k_base *ab; + const struct ath12k_hif_ops *hif_ops; + struct ath12k_ahb *ab_ahb; + enum ath12k_hw_rev hw_rev; + u32 addr, userpd_id; + int ret; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "Failed to set 32-bit coherent dma\n"); + return ret; + } + + ab = ath12k_core_alloc(&pdev->dev, sizeof(struct ath12k_ahb), + ATH12K_BUS_AHB); + if (!ab) + return -ENOMEM; + + hw_rev = (enum ath12k_hw_rev)(kernel_ulong_t)of_device_get_match_data(&pdev->dev); + switch (hw_rev) { + case ATH12K_HW_IPQ5332_HW10: + hif_ops = &ath12k_ahb_hif_ops_ipq5332; + userpd_id = ATH12K_IPQ5332_USERPD_ID; + break; + default: + ret = -EOPNOTSUPP; + goto err_core_free; + } + + ab->hif.ops = hif_ops; + ab->pdev = pdev; + ab->hw_rev = hw_rev; + platform_set_drvdata(pdev, ab); + ab_ahb = ath12k_ab_to_ahb(ab); + ab_ahb->ab = ab; + ab_ahb->userpd_id = userpd_id; + + /* Set fixed_mem_region to true for platforms that support fixed memory + * reservation from DT. If memory is reserved from DT for FW, ath12k driver + * need not to allocate memory. + */ + if (!of_property_read_u32(ab->dev->of_node, "memory-region", &addr)) + set_bit(ATH12K_FLAG_FIXED_MEM_REGION, &ab->dev_flags); + + ret = ath12k_core_pre_init(ab); + if (ret) + goto err_core_free; + + ret = ath12k_ahb_resource_init(ab); + if (ret) + goto err_core_free; + + ret = ath12k_hal_srng_init(ab); + if (ret) + goto err_resource_deinit; + + ret = ath12k_ce_alloc_pipes(ab); + if (ret) { + ath12k_err(ab, "failed to allocate ce pipes: %d\n", ret); + goto err_hal_srng_deinit; + } + + ath12k_ahb_init_qmi_ce_config(ab); + + ret = ath12k_ahb_configure_rproc(ab); + if (ret) + goto err_ce_free; + + ret = ath12k_ahb_config_irq(ab); + if (ret) { + ath12k_err(ab, "failed to configure irq: %d\n", ret); + goto err_rproc_deconfigure; + } + + ret = ath12k_core_init(ab); + if (ret) { + ath12k_err(ab, "failed to init core: %d\n", ret); + goto err_rproc_deconfigure; + } + + return 0; + +err_rproc_deconfigure: + ath12k_ahb_deconfigure_rproc(ab); + +err_ce_free: + ath12k_ce_free_pipes(ab); + +err_hal_srng_deinit: + ath12k_hal_srng_deinit(ab); + +err_resource_deinit: + ath12k_ahb_resource_deinit(ab); + +err_core_free: + ath12k_core_free(ab); + platform_set_drvdata(pdev, NULL); + + return ret; +} + +static void ath12k_ahb_remove_prepare(struct ath12k_base *ab) +{ + unsigned long left; + + if (test_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags)) { + left = wait_for_completion_timeout(&ab->driver_recovery, + ATH12K_AHB_RECOVERY_TIMEOUT); + if (!left) + ath12k_warn(ab, "failed to receive recovery response completion\n"); + } + + set_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags); + cancel_work_sync(&ab->restart_work); + cancel_work_sync(&ab->qmi.event_work); +} + +static void ath12k_ahb_free_resources(struct ath12k_base *ab) +{ + struct platform_device *pdev = ab->pdev; + + ath12k_hal_srng_deinit(ab); + ath12k_ce_free_pipes(ab); + ath12k_ahb_resource_deinit(ab); + ath12k_ahb_deconfigure_rproc(ab); + ath12k_core_free(ab); + platform_set_drvdata(pdev, NULL); +} + +static void ath12k_ahb_remove(struct platform_device *pdev) +{ + struct ath12k_base *ab = platform_get_drvdata(pdev); + + if (test_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags)) { + ath12k_ahb_power_down(ab, false); + ath12k_qmi_deinit_service(ab); + goto qmi_fail; + } + + ath12k_ahb_remove_prepare(ab); + ath12k_core_deinit(ab); + +qmi_fail: + ath12k_ahb_free_resources(ab); +} + +static struct platform_driver ath12k_ahb_driver = { + .driver = { + .name = "ath12k_ahb", + .of_match_table = ath12k_ahb_of_match, + }, + .probe = ath12k_ahb_probe, + .remove = ath12k_ahb_remove, +}; + +int ath12k_ahb_init(void) +{ + return platform_driver_register(&ath12k_ahb_driver); +} + +void ath12k_ahb_exit(void) +{ + platform_driver_unregister(&ath12k_ahb_driver); +} diff --git a/drivers/net/wireless/ath/ath12k/ahb.h b/drivers/net/wireless/ath/ath12k/ahb.h new file mode 100644 index 000000000000..d56244b20a6a --- /dev/null +++ b/drivers/net/wireless/ath/ath12k/ahb.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: BSD-3-Clause-Clear */ +/* + * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. + * Copyright (c) 2022-2025, Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef ATH12K_AHB_H +#define ATH12K_AHB_H + +#include <linux/clk.h> +#include <linux/remoteproc/qcom_rproc.h> +#include "core.h" + +#define ATH12K_AHB_RECOVERY_TIMEOUT (3 * HZ) + +#define ATH12K_AHB_SMP2P_SMEM_MSG GENMASK(15, 0) +#define ATH12K_AHB_SMP2P_SMEM_SEQ_NO GENMASK(31, 16) +#define ATH12K_AHB_SMP2P_SMEM_VALUE_MASK 0xFFFFFFFF +#define ATH12K_PCI_CE_WAKE_IRQ 2 +#define ATH12K_PCI_IRQ_CE0_OFFSET 3 +#define ATH12K_ROOTPD_READY_TIMEOUT (5 * HZ) +#define ATH12K_RPROC_AFTER_POWERUP QCOM_SSR_AFTER_POWERUP +#define ATH12K_AHB_FW_PREFIX "q6_fw" +#define ATH12K_AHB_FW_SUFFIX ".mdt" +#define ATH12K_AHB_FW2 "iu_fw.mdt" +#define ATH12K_AHB_UPD_SWID 0x12 +#define ATH12K_USERPD_SPAWN_TIMEOUT (5 * HZ) +#define ATH12K_USERPD_READY_TIMEOUT (10 * HZ) +#define ATH12K_USERPD_STOP_TIMEOUT (5 * HZ) +#define ATH12K_USERPD_ID_MASK GENMASK(9, 8) +#define ATH12K_USERPD_FW_NAME_LEN 35 + +enum ath12k_ahb_smp2p_msg_id { + ATH12K_AHB_POWER_SAVE_ENTER = 1, + ATH12K_AHB_POWER_SAVE_EXIT, +}; + +enum ath12k_ahb_userpd_irq { + ATH12K_USERPD_SPAWN_IRQ, + ATH12K_USERPD_READY_IRQ, + ATH12K_USERPD_STOP_ACK_IRQ, + ATH12K_USERPD_MAX_IRQ, +}; + +struct ath12k_base; + +struct ath12k_ahb { + struct ath12k_base *ab; + struct rproc *tgt_rproc; + struct clk *xo_clk; + struct completion rootpd_ready; + struct notifier_block root_pd_nb; + void *root_pd_notifier; + struct qcom_smem_state *spawn_state; + struct qcom_smem_state *stop_state; + struct completion userpd_spawned; + struct completion userpd_ready; + struct completion userpd_stopped; + u32 userpd_id; + u32 spawn_bit; + u32 stop_bit; + int userpd_irq_num[ATH12K_USERPD_MAX_IRQ]; +}; + +static inline struct ath12k_ahb *ath12k_ab_to_ahb(struct ath12k_base *ab) +{ + return (struct ath12k_ahb *)ab->drv_priv; +} + +#ifdef CONFIG_ATH12K_AHB +int ath12k_ahb_init(void); +void ath12k_ahb_exit(void); +#else +static inline int ath12k_ahb_init(void) +{ + return 0; +} + +static inline void ath12k_ahb_exit(void) {}; +#endif +#endif diff --git a/drivers/net/wireless/ath/ath12k/ce.c b/drivers/net/wireless/ath/ath12k/ce.c index be0d669d31fc..7a5656aceebe 100644 --- a/drivers/net/wireless/ath/ath12k/ce.c +++ b/drivers/net/wireless/ath/ath12k/ce.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "dp_rx.h" @@ -219,6 +219,96 @@ const struct ce_attr ath12k_host_ce_config_wcn7850[] = { }; +const struct ce_attr ath12k_host_ce_config_ipq5332[] = { + /* CE0: host->target HTC control and raw streams */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 16, + .src_sz_max = 2048, + .dest_nentries = 0, + }, + /* CE1: target->host HTT + HTC control */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 0, + .src_sz_max = 2048, + .dest_nentries = 512, + .recv_cb = ath12k_htc_rx_completion_handler, + }, + /* CE2: target->host WMI */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 0, + .src_sz_max = 2048, + .dest_nentries = 128, + .recv_cb = ath12k_htc_rx_completion_handler, + }, + /* CE3: host->target WMI */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 32, + .src_sz_max = 2048, + .dest_nentries = 0, + }, + /* CE4: host->target HTT */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 2048, + .src_sz_max = 256, + .dest_nentries = 0, + }, + /* CE5: target -> host PKTLOG */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 0, + .src_sz_max = 2048, + .dest_nentries = 512, + .recv_cb = ath12k_dp_htt_htc_t2h_msg_handler, + }, + /* CE6: Target autonomous HIF_memcpy */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 0, + .src_sz_max = 0, + .dest_nentries = 0, + }, + /* CE7: CV Prefetch */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 0, + .src_sz_max = 0, + .dest_nentries = 0, + }, + /* CE8: Target HIF memcpy (Generic HIF memcypy) */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 0, + .src_sz_max = 0, + .dest_nentries = 0, + }, + /* CE9: WMI logging/CFR/Spectral/Radar */ + { + .flags = CE_ATTR_FLAGS, + .src_nentries = 0, + .src_sz_max = 2048, + .dest_nentries = 128, + }, + /* CE10: Unused */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 0, + .src_sz_max = 0, + .dest_nentries = 0, + }, + /* CE11: Unused */ + { + .flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR, + .src_nentries = 0, + .src_sz_max = 0, + .dest_nentries = 0, + }, +}; + static int ath12k_ce_rx_buf_enqueue_pipe(struct ath12k_ce_pipe *pipe, struct sk_buff *skb, dma_addr_t paddr) { diff --git a/drivers/net/wireless/ath/ath12k/ce.h b/drivers/net/wireless/ath/ath12k/ce.h index 1a14b9fb86b8..57f75899ee03 100644 --- a/drivers/net/wireless/ath/ath12k/ce.h +++ b/drivers/net/wireless/ath/ath12k/ce.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_CE_H @@ -39,8 +39,8 @@ #define PIPEDIR_INOUT_H2H 4 /* bidirectional, host to host */ /* CE address/mask */ -#define CE_HOST_IE_ADDRESS 0x00A1803C -#define CE_HOST_IE_2_ADDRESS 0x00A18040 +#define CE_HOST_IE_ADDRESS 0x75804C +#define CE_HOST_IE_2_ADDRESS 0x758050 #define CE_HOST_IE_3_ADDRESS CE_HOST_IE_ADDRESS #define CE_HOST_IE_3_SHIFT 0xC @@ -76,6 +76,17 @@ struct ce_pipe_config { __le32 reserved; }; +struct ce_ie_addr { + u32 ie1_reg_addr; + u32 ie2_reg_addr; + u32 ie3_reg_addr; +}; + +struct ce_remap { + u32 base; + u32 size; +}; + struct ce_attr { /* CE_ATTR_* values */ unsigned int flags; @@ -164,6 +175,7 @@ struct ath12k_ce { extern const struct ce_attr ath12k_host_ce_config_qcn9274[]; extern const struct ce_attr ath12k_host_ce_config_wcn7850[]; +extern const struct ce_attr ath12k_host_ce_config_ipq5332[]; void ath12k_ce_cleanup_pipes(struct ath12k_base *ab); void ath12k_ce_rx_replenish_retry(struct timer_list *t); diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 0b2dec081c6e..7eccd9cf9036 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -10,15 +10,18 @@ #include <linux/firmware.h> #include <linux/of.h> #include <linux/of_graph.h> +#include "ahb.h" #include "core.h" #include "dp_tx.h" #include "dp_rx.h" #include "debug.h" -#include "hif.h" -#include "fw.h" #include "debugfs.h" +#include "fw.h" +#include "hif.h" +#include "pci.h" #include "wow.h" +static int ahb_err, pci_err; unsigned int ath12k_debug_mask; module_param_named(debug_mask, ath12k_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "Debugging mask"); @@ -612,9 +615,74 @@ u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab) return TARGET_NUM_TIDS(SINGLE); } +struct reserved_mem *ath12k_core_get_reserved_mem(struct ath12k_base *ab, + int index) +{ + struct device *dev = ab->dev; + struct reserved_mem *rmem; + struct device_node *node; + + node = of_parse_phandle(dev->of_node, "memory-region", index); + if (!node) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, + "failed to parse memory-region for index %d\n", index); + return NULL; + } + + rmem = of_reserved_mem_lookup(node); + of_node_put(node); + if (!rmem) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, + "unable to get memory-region for index %d\n", index); + return NULL; + } + + return rmem; +} + +static inline +void ath12k_core_to_group_ref_get(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag = ab->ag; + + lockdep_assert_held(&ag->mutex); + + if (ab->hw_group_ref) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, "core already attached to group %d\n", + ag->id); + return; + } + + ab->hw_group_ref = true; + ag->num_started++; + + ath12k_dbg(ab, ATH12K_DBG_BOOT, "core attached to group %d, num_started %d\n", + ag->id, ag->num_started); +} + +static inline +void ath12k_core_to_group_ref_put(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag = ab->ag; + + lockdep_assert_held(&ag->mutex); + + if (!ab->hw_group_ref) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, "core already de-attached from group %d\n", + ag->id); + return; + } + + ab->hw_group_ref = false; + ag->num_started--; + + ath12k_dbg(ab, ATH12K_DBG_BOOT, "core de-attached from group %d, num_started %d\n", + ag->id, ag->num_started); +} + static void ath12k_core_stop(struct ath12k_base *ab) { - ath12k_core_stopped(ab); + ath12k_core_to_group_ref_put(ab); if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags)) ath12k_qmi_firmware_stop(ab); @@ -851,9 +919,8 @@ static int ath12k_core_start(struct ath12k_base *ab) ath12k_acpi_set_dsm_func(ab); - if (!test_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags)) - /* Indicate the core start in the appropriate group */ - ath12k_core_started(ab); + /* Indicate the core start in the appropriate group */ + ath12k_core_to_group_ref_get(ab); return 0; @@ -891,6 +958,9 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) ab = ag->ab[i]; if (!ab) continue; + + clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ath12k_core_device_cleanup(ab); } @@ -1026,6 +1096,8 @@ core_pdev_create: mutex_lock(&ab->core_lock); + set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ret = ath12k_core_pdev_create(ab); if (ret) { ath12k_err(ab, "failed to create pdev core %d\n", ret); @@ -1084,6 +1156,59 @@ bool ath12k_core_hw_group_start_ready(struct ath12k_hw_group *ag) return (ag->num_started == ag->num_devices); } +static void ath12k_fw_stats_pdevs_free(struct list_head *head) +{ + struct ath12k_fw_stats_pdev *i, *tmp; + + list_for_each_entry_safe(i, tmp, head, list) { + list_del(&i->list); + kfree(i); + } +} + +void ath12k_fw_stats_bcn_free(struct list_head *head) +{ + struct ath12k_fw_stats_bcn *i, *tmp; + + list_for_each_entry_safe(i, tmp, head, list) { + list_del(&i->list); + kfree(i); + } +} + +static void ath12k_fw_stats_vdevs_free(struct list_head *head) +{ + struct ath12k_fw_stats_vdev *i, *tmp; + + list_for_each_entry_safe(i, tmp, head, list) { + list_del(&i->list); + kfree(i); + } +} + +void ath12k_fw_stats_init(struct ath12k *ar) +{ + INIT_LIST_HEAD(&ar->fw_stats.vdevs); + INIT_LIST_HEAD(&ar->fw_stats.pdevs); + INIT_LIST_HEAD(&ar->fw_stats.bcn); + init_completion(&ar->fw_stats_complete); +} + +void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) +{ + ath12k_fw_stats_pdevs_free(&stats->pdevs); + ath12k_fw_stats_vdevs_free(&stats->vdevs); + ath12k_fw_stats_bcn_free(&stats->bcn); +} + +void ath12k_fw_stats_reset(struct ath12k *ar) +{ + spin_lock_bh(&ar->data_lock); + ar->fw_stats.fw_stats_done = false; + ath12k_fw_stats_free(&ar->fw_stats); + spin_unlock_bh(&ar->data_lock); +} + static void ath12k_core_trigger_partner(struct ath12k_base *ab) { struct ath12k_hw_group *ag = ab->ag; @@ -1246,6 +1371,7 @@ static void ath12k_rfkill_work(struct work_struct *work) void ath12k_core_halt(struct ath12k *ar) { + struct list_head *pos, *n; struct ath12k_base *ab = ar->ab; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -1258,10 +1384,16 @@ void ath12k_core_halt(struct ath12k *ar) cancel_delayed_work_sync(&ar->scan.timeout); cancel_work_sync(&ar->regd_update_work); cancel_work_sync(&ab->rfkill_work); + cancel_work_sync(&ab->update_11d_work); rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } @@ -1285,14 +1417,28 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab) ah->state == ATH12K_HW_STATE_TM) continue; + wiphy_lock(ah->hw->wiphy); + + /* If queue 0 is stopped, it is safe to assume that all + * other queues are stopped by driver via + * ieee80211_stop_queues() below. This means, there is + * no need to stop it again and hence continue + */ + if (ieee80211_queue_stopped(ah->hw, 0)) { + wiphy_unlock(ah->hw->wiphy); + continue; + } + ieee80211_stop_queues(ah->hw); for (j = 0; j < ah->num_radio; j++) { ar = &ah->radio[j]; ath12k_mac_drain_tx(ar); + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); complete(&ar->scan.started); - complete(&ar->scan.completed); + complete_all(&ar->scan.completed); complete(&ar->scan.on_channel); complete(&ar->peer_assoc_done); complete(&ar->peer_delete_done); @@ -1306,13 +1452,47 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab) ath12k_mac_tx_mgmt_pending_free, ar); idr_destroy(&ar->txmgmt_idr); wake_up(&ar->txmgmt_empty_waitq); + + ar->monitor_vdev_id = -1; + ar->monitor_vdev_created = false; + ar->monitor_started = false; } + + wiphy_unlock(ah->hw->wiphy); } wake_up(&ab->wmi_ab.tx_credits_wq); wake_up(&ab->peer_mapping_wq); } +static void ath12k_update_11d(struct work_struct *work) +{ + struct ath12k_base *ab = container_of(work, struct ath12k_base, update_11d_work); + struct ath12k *ar; + struct ath12k_pdev *pdev; + struct wmi_set_current_country_arg arg = {}; + int ret, i; + + spin_lock_bh(&ab->base_lock); + memcpy(&arg.alpha2, &ab->new_alpha2, 2); + spin_unlock_bh(&ab->base_lock); + + ath12k_dbg(ab, ATH12K_DBG_WMI, "update 11d new cc %c%c\n", + arg.alpha2[0], arg.alpha2[1]); + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + + memcpy(&ar->alpha2, &arg.alpha2, 2); + ret = ath12k_wmi_send_set_current_country_cmd(ar, &arg); + if (ret) + ath12k_warn(ar->ab, + "pdev id %d failed set current country code: %d\n", + i, ret); + } +} + static void ath12k_core_post_reconfigure_recovery(struct ath12k_base *ab) { struct ath12k_hw_group *ag = ab->ag; @@ -1386,19 +1566,30 @@ static void ath12k_core_restart(struct work_struct *work) ath12k_dbg(ab, ATH12K_DBG_BOOT, "reset success\n"); } + mutex_lock(&ag->mutex); + + if (!ath12k_core_hw_group_start_ready(ag)) { + mutex_unlock(&ag->mutex); + goto exit_restart; + } + for (i = 0; i < ag->num_hw; i++) { - ah = ath12k_ag_to_ah(ab->ag, i); + ah = ath12k_ag_to_ah(ag, i); ieee80211_restart_hw(ah->hw); } + + mutex_unlock(&ag->mutex); } +exit_restart: complete(&ab->restart_completed); } static void ath12k_core_reset(struct work_struct *work) { struct ath12k_base *ab = container_of(work, struct ath12k_base, reset_work); - int reset_count, fail_cont_count; + struct ath12k_hw_group *ag = ab->ag; + int reset_count, fail_cont_count, i; long time_left; if (!(test_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags))) { @@ -1457,9 +1648,34 @@ static void ath12k_core_reset(struct work_struct *work) ath12k_hif_ce_irq_disable(ab); ath12k_hif_power_down(ab, false); - ath12k_hif_power_up(ab); - ath12k_dbg(ab, ATH12K_DBG_BOOT, "reset started\n"); + /* prepare for power up */ + ab->qmi.num_radios = U8_MAX; + + mutex_lock(&ag->mutex); + ath12k_core_to_group_ref_put(ab); + + if (ag->num_started > 0) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, + "waiting for %d partner device(s) to reset\n", + ag->num_started); + mutex_unlock(&ag->mutex); + return; + } + + /* Prepare MLO global memory region for power up */ + ath12k_qmi_reset_mlo_mem(ag); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + ath12k_hif_power_up(ab); + ath12k_dbg(ab, ATH12K_DBG_BOOT, "reset started\n"); + } + + mutex_unlock(&ag->mutex); } int ath12k_core_pre_init(struct ath12k_base *ab) @@ -1843,20 +2059,18 @@ void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) lockdep_assert_held(&ag->mutex); - /* If more than one devices are grouped, then inter MLO - * functionality can work still independent of whether internally - * each device supports single_chip_mlo or not. - * Only when there is one device, then disable for WCN chipsets - * till the required driver implementation is in place. - */ if (ag->num_devices == 1) { ab = ag->ab[0]; - - /* WCN chipsets does not advertise in firmware features - * hence skip checking - */ - if (ab->hw_params->def_num_link) + /* QCN9274 firmware uses firmware IE for MLO advertisement */ + if (ab->fw.fw_features_valid) { + ag->mlo_capable = + ath12k_fw_feature_supported(ab, ATH12K_FW_FEATURE_MLO); return; + } + + /* while WCN7850 firmware uses QMI single_chip_mlo_support bit */ + ag->mlo_capable = ab->single_chip_mlo_support; + return; } ag->mlo_capable = true; @@ -1869,7 +2083,7 @@ void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) /* even if 1 device's firmware feature indicates MLO * unsupported, make MLO unsupported for the whole group */ - if (!test_bit(ATH12K_FW_FEATURE_MLO, ab->fw.fw_features)) { + if (!ath12k_fw_feature_supported(ab, ATH12K_FW_FEATURE_MLO)) { ag->mlo_capable = false; return; } @@ -1966,6 +2180,7 @@ struct ath12k_base *ath12k_core_alloc(struct device *dev, size_t priv_size, INIT_WORK(&ab->reset_work, ath12k_core_reset); INIT_WORK(&ab->rfkill_work, ath12k_rfkill_work); INIT_WORK(&ab->dump_work, ath12k_coredump_upload); + INIT_WORK(&ab->update_11d_work, ath12k_update_11d); timer_setup(&ab->rx_replenish_retry, ath12k_ce_rx_replenish_retry, 0); init_completion(&ab->htc_suspend); @@ -1975,6 +2190,7 @@ struct ath12k_base *ath12k_core_alloc(struct device *dev, size_t priv_size, ab->dev = dev; ab->hif.bus = bus; ab->qmi.num_radios = U8_MAX; + ab->single_chip_mlo_support = false; /* Device index used to identify the devices in a group. * @@ -1995,5 +2211,31 @@ err_sc_free: return NULL; } -MODULE_DESCRIPTION("Core module for Qualcomm Atheros 802.11be wireless LAN cards."); +static int ath12k_init(void) +{ + ahb_err = ath12k_ahb_init(); + if (ahb_err) + pr_warn("Failed to initialize ath12k AHB device: %d\n", ahb_err); + + pci_err = ath12k_pci_init(); + if (pci_err) + pr_warn("Failed to initialize ath12k PCI device: %d\n", pci_err); + + /* If both failed, return one of the failures (arbitrary) */ + return ahb_err && pci_err ? ahb_err : 0; +} + +static void ath12k_exit(void) +{ + if (!pci_err) + ath12k_pci_exit(); + + if (!ahb_err) + ath12k_ahb_exit(); +} + +module_init(ath12k_init); +module_exit(ath12k_exit); + +MODULE_DESCRIPTION("Driver support for Qualcomm Technologies 802.11be WLAN devices"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 3fac4f00d383..4b8f434e3e9a 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -14,6 +14,7 @@ #include <linux/dmi.h> #include <linux/ctype.h> #include <linux/firmware.h> +#include <linux/of_reserved_mem.h> #include <linux/panic_notifier.h> #include <linux/average.h> #include "qmi.h" @@ -147,7 +148,8 @@ struct ath12k_skb_rxcb { enum ath12k_hw_rev { ATH12K_HW_QCN9274_HW10, ATH12K_HW_QCN9274_HW20, - ATH12K_HW_WCN7850_HW20 + ATH12K_HW_WCN7850_HW20, + ATH12K_HW_IPQ5332_HW10, }; enum ath12k_firmware_mode { @@ -160,6 +162,7 @@ enum ath12k_firmware_mode { #define ATH12K_IRQ_NUM_MAX 57 #define ATH12K_EXT_IRQ_NUM_MAX 16 +#define ATH12K_MAX_TCL_RING_NUM 3 struct ath12k_ext_irq_grp { struct ath12k_base *ab; @@ -219,6 +222,12 @@ enum ath12k_scan_state { ATH12K_SCAN_ABORTING, }; +enum ath12k_11d_state { + ATH12K_11D_IDLE, + ATH12K_11D_PREPARING, + ATH12K_11D_RUNNING, +}; + enum ath12k_hw_group_flags { ATH12K_GROUP_FLAG_REGISTERED, ATH12K_GROUP_FLAG_UNREGISTER, @@ -238,6 +247,7 @@ enum ath12k_dev_flags { ATH12K_FLAG_EXT_IRQ_ENABLED, ATH12K_FLAG_QMI_FW_READY_COMPLETE, ATH12K_FLAG_FTM_SEGMENTED, + ATH12K_FLAG_FIXED_MEM_REGION, }; struct ath12k_tx_conf { @@ -295,14 +305,18 @@ struct ath12k_link_vif { int txpower; bool rsnie_present; bool wpaie_present; - struct ieee80211_chanctx_conf chanctx; u8 vdev_stats_id; u32 punct_bitmap; u8 link_id; struct ath12k_vif *ahvif; struct ath12k_rekey_data rekey_data; + struct ath12k_link_stats link_stats; + spinlock_t link_stats_lock; /* Protects updates to link_stats */ u8 current_cntdown_counter; + + /* only used in station mode */ + bool is_sta_assoc_link; }; struct ath12k_vif { @@ -364,6 +378,8 @@ struct ath12k_vif_iter { #define HAL_RX_MAX_NSS 8 #define HAL_RX_MAX_NUM_LEGACY_RATES 12 +#define ATH12K_SCAN_TIMEOUT_HZ (20 * HZ) + struct ath12k_rx_peer_rate_stats { u64 ht_mcs_count[HAL_RX_MAX_MCS_HT + 1]; u64 vht_mcs_count[HAL_RX_MAX_MCS_VHT + 1]; @@ -519,6 +535,12 @@ struct ath12k_link_sta { u8 link_idx; }; +struct ath12k_reoq_buf { + void *vaddr; + dma_addr_t paddr_aligned; + u32 size; +}; + struct ath12k_sta { struct ath12k_vif *ahvif; enum hal_pn_type pn_type; @@ -531,13 +553,25 @@ struct ath12k_sta { u8 num_peer; enum ieee80211_sta_state state; + + struct ath12k_reoq_buf reoq_bufs[IEEE80211_NUM_TIDS + 1]; }; -#define ATH12K_MIN_5G_FREQ 4150 -#define ATH12K_MIN_6G_FREQ 5925 -#define ATH12K_MAX_6G_FREQ 7115 +#define ATH12K_HALF_20MHZ_BW 10 +#define ATH12K_2GHZ_MIN_CENTER 2412 +#define ATH12K_2GHZ_MAX_CENTER 2484 +#define ATH12K_5GHZ_MIN_CENTER 4900 +#define ATH12K_5GHZ_MAX_CENTER 5920 +#define ATH12K_6GHZ_MIN_CENTER 5935 +#define ATH12K_6GHZ_MAX_CENTER 7115 +#define ATH12K_MIN_2GHZ_FREQ (ATH12K_2GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW - 1) +#define ATH12K_MAX_2GHZ_FREQ (ATH12K_2GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW + 1) +#define ATH12K_MIN_5GHZ_FREQ (ATH12K_5GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_5GHZ_FREQ (ATH12K_5GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) +#define ATH12K_MIN_6GHZ_FREQ (ATH12K_6GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_6GHZ_FREQ (ATH12K_6GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) #define ATH12K_NUM_CHANS 101 -#define ATH12K_MAX_5G_CHAN 173 +#define ATH12K_MAX_5GHZ_CHAN 173 enum ath12k_hw_state { ATH12K_HW_STATE_OFF, @@ -728,7 +762,6 @@ struct ath12k { #endif bool dfs_block_radar_events; - bool monitor_conf_enabled; bool monitor_vdev_created; bool monitor_started; int monitor_vdev_id; @@ -737,12 +770,20 @@ struct ath12k { bool nlo_enabled; + /* Protected by wiphy::mtx lock. */ + u32 vdev_id_11d_scan; + struct completion completed_11d_scan; + enum ath12k_11d_state state_11d; + u8 alpha2[REG_ALPHA2_LEN]; + bool regdom_set_by_user; + struct completion fw_stats_complete; struct completion mlo_setup_done; u32 mlo_setup_status; u8 ftm_msgref; struct ath12k_fw_stats fw_stats; + unsigned long last_tx_power_update; }; struct ath12k_hw { @@ -852,6 +893,11 @@ struct ath12k_soc_dp_stats { struct ath12k_soc_dp_tx_err_stats tx_err; }; +struct ath12k_reg_freq { + u32 start_freq; + u32 end_freq; +}; + struct ath12k_mlo_memory { struct target_mem_chunk chunk[ATH12K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01]; int mlo_mem_size; @@ -923,6 +969,10 @@ struct ath12k_base { void __iomem *mem; unsigned long mem_len; + void __iomem *mem_ce; + u32 ce_remap_base_addr; + bool ce_remap; + struct { enum ath12k_bus bus; const struct ath12k_hif_ops *ops; @@ -1011,6 +1061,8 @@ struct ath12k_base { /* continuous recovery fail count */ atomic_t fail_cont_count; unsigned long reset_fail_timeout; + struct work_struct update_11d_work; + u8 new_alpha2[2]; struct { /* protected by data_lock */ u32 fw_crash_counter; @@ -1051,6 +1103,7 @@ struct ath12k_base { size_t m3_len; DECLARE_BITMAP(fw_features, ATH12K_FW_FEATURE_COUNT); + bool fw_features_valid; } fw; const struct hal_rx_ops *hal_rx_ops; @@ -1087,6 +1140,14 @@ struct ath12k_base { struct ath12k_wsi_info wsi_info; enum ath12k_firmware_mode fw_mode; struct ath12k_ftm_event_obj ftm_event_obj; + bool hw_group_ref; + + /* Denote whether MLO is possible within the device */ + bool single_chip_mlo_support; + + struct ath12k_reg_freq reg_freq_2ghz; + struct ath12k_reg_freq reg_freq_5ghz; + struct ath12k_reg_freq reg_freq_6ghz; /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); @@ -1215,6 +1276,12 @@ u32 ath12k_core_get_max_peers_per_radio(struct ath12k_base *ab); u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab); void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag); +void ath12k_fw_stats_init(struct ath12k *ar); +void ath12k_fw_stats_bcn_free(struct list_head *head); +void ath12k_fw_stats_free(struct ath12k_fw_stats *stats); +void ath12k_fw_stats_reset(struct ath12k *ar); +struct reserved_mem *ath12k_core_get_reserved_mem(struct ath12k_base *ab, + int index); static inline const char *ath12k_scan_state_str(enum ath12k_scan_state state) { @@ -1284,6 +1351,8 @@ static inline const char *ath12k_bus_str(enum ath12k_bus bus) switch (bus) { case ATH12K_BUS_PCI: return "pci"; + case ATH12K_BUS_AHB: + return "ahb"; } return "unknown"; @@ -1333,20 +1402,6 @@ static inline struct ath12k_hw_group *ath12k_ab_to_ag(struct ath12k_base *ab) return ab->ag; } -static inline void ath12k_core_started(struct ath12k_base *ab) -{ - lockdep_assert_held(&ab->ag->mutex); - - ab->ag->num_started++; -} - -static inline void ath12k_core_stopped(struct ath12k_base *ab) -{ - lockdep_assert_held(&ab->ag->mutex); - - ab->ag->num_started--; -} - static inline struct ath12k_base *ath12k_ag_to_ab(struct ath12k_hw_group *ag, u8 device_id) { diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index 57002215ddf1..bab7be9c5a38 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -88,8 +88,8 @@ static int ath12k_get_tpc_ctl_mode_idx(struct wmi_tpc_stats_arg *tpc_stats, u32 chan_freq = le32_to_cpu(tpc_stats->tpc_config.chan_freq); u8 band; - band = ((chan_freq > ATH12K_MIN_6G_FREQ) ? NL80211_BAND_6GHZ : - ((chan_freq > ATH12K_MIN_5G_FREQ) ? NL80211_BAND_5GHZ : + band = ((chan_freq > ATH12K_MIN_6GHZ_FREQ) ? NL80211_BAND_6GHZ : + ((chan_freq > ATH12K_MIN_5GHZ_FREQ) ? NL80211_BAND_5GHZ : NL80211_BAND_2GHZ)); if (band == NL80211_BAND_5GHZ || band == NL80211_BAND_6GHZ) { @@ -833,6 +833,124 @@ static const struct file_operations fops_extd_rx_stats = { .open = simple_open, }; +static int ath12k_open_link_stats(struct inode *inode, struct file *file) +{ + struct ath12k_vif *ahvif = inode->i_private; + size_t len = 0, buf_len = (PAGE_SIZE * 2); + struct ath12k_link_stats linkstat; + struct ath12k_link_vif *arvif; + unsigned long links_map; + struct wiphy *wiphy; + int link_id, i; + char *buf; + + if (!ahvif) + return -EINVAL; + + buf = kzalloc(buf_len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + wiphy = ahvif->ah->hw->wiphy; + wiphy_lock(wiphy); + + links_map = ahvif->links_map; + for_each_set_bit(link_id, &links_map, + IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = rcu_dereference_protected(ahvif->link[link_id], + lockdep_is_held(&wiphy->mtx)); + + spin_lock_bh(&arvif->link_stats_lock); + linkstat = arvif->link_stats; + spin_unlock_bh(&arvif->link_stats_lock); + + len += scnprintf(buf + len, buf_len - len, + "link[%d] Tx Unicast Frames Enqueued = %d\n", + link_id, linkstat.tx_enqueued); + len += scnprintf(buf + len, buf_len - len, + "link[%d] Tx Broadcast Frames Enqueued = %d\n", + link_id, linkstat.tx_bcast_mcast); + len += scnprintf(buf + len, buf_len - len, + "link[%d] Tx Frames Completed = %d\n", + link_id, linkstat.tx_completed); + len += scnprintf(buf + len, buf_len - len, + "link[%d] Tx Frames Dropped = %d\n", + link_id, linkstat.tx_dropped); + + len += scnprintf(buf + len, buf_len - len, + "link[%d] Tx Frame descriptor Encap Type = ", + link_id); + + len += scnprintf(buf + len, buf_len - len, + " raw:%d", + linkstat.tx_encap_type[0]); + + len += scnprintf(buf + len, buf_len - len, + " native_wifi:%d", + linkstat.tx_encap_type[1]); + + len += scnprintf(buf + len, buf_len - len, + " ethernet:%d", + linkstat.tx_encap_type[2]); + + len += scnprintf(buf + len, buf_len - len, + "\nlink[%d] Tx Frame descriptor Encrypt Type = ", + link_id); + + for (i = 0; i < HAL_ENCRYPT_TYPE_MAX; i++) { + len += scnprintf(buf + len, buf_len - len, + " %d:%d", i, + linkstat.tx_encrypt_type[i]); + } + len += scnprintf(buf + len, buf_len - len, + "\nlink[%d] Tx Frame descriptor Type = buffer:%d extension:%d\n", + link_id, linkstat.tx_desc_type[0], + linkstat.tx_desc_type[1]); + + len += scnprintf(buf + len, buf_len - len, + "------------------------------------------------------\n"); + } + + wiphy_unlock(wiphy); + + file->private_data = buf; + + return 0; +} + +static int ath12k_release_link_stats(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t ath12k_read_link_stats(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + const char *buf = file->private_data; + size_t len = strlen(buf); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + +static const struct file_operations ath12k_fops_link_stats = { + .open = ath12k_open_link_stats, + .release = ath12k_release_link_stats, + .read = ath12k_read_link_stats, + .owner = THIS_MODULE, + .llseek = default_llseek, +}; + +void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + + debugfs_create_file("link_stats", 0400, vif->debugfs_dir, ahvif, + &ath12k_fops_link_stats); +} + void ath12k_debugfs_soc_create(struct ath12k_base *ab) { bool dput_needed; @@ -870,102 +988,6 @@ void ath12k_debugfs_soc_destroy(struct ath12k_base *ab) */ } -static void ath12k_fw_stats_pdevs_free(struct list_head *head) -{ - struct ath12k_fw_stats_pdev *i, *tmp; - - list_for_each_entry_safe(i, tmp, head, list) { - list_del(&i->list); - kfree(i); - } -} - -static void ath12k_fw_stats_bcn_free(struct list_head *head) -{ - struct ath12k_fw_stats_bcn *i, *tmp; - - list_for_each_entry_safe(i, tmp, head, list) { - list_del(&i->list); - kfree(i); - } -} - -static void ath12k_fw_stats_vdevs_free(struct list_head *head) -{ - struct ath12k_fw_stats_vdev *i, *tmp; - - list_for_each_entry_safe(i, tmp, head, list) { - list_del(&i->list); - kfree(i); - } -} - -void ath12k_debugfs_fw_stats_reset(struct ath12k *ar) -{ - spin_lock_bh(&ar->data_lock); - ar->fw_stats.fw_stats_done = false; - ath12k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - ath12k_fw_stats_bcn_free(&ar->fw_stats.bcn); - ath12k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - spin_unlock_bh(&ar->data_lock); -} - -static int ath12k_debugfs_fw_stats_request(struct ath12k *ar, - struct ath12k_fw_stats_req_params *param) -{ - struct ath12k_base *ab = ar->ab; - unsigned long timeout, time_left; - int ret; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + msecs_to_jiffies(3 * 1000); - - ath12k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - - ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id, - param->vdev_id, param->pdev_id); - - if (ret) { - ath12k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, - 1 * HZ); - /* If the wait timed out, return -ETIMEDOUT */ - if (!time_left) - return -ETIMEDOUT; - - /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back - * when stats data buffer limit is reached. fw_stats_complete - * is completed once host receives first event from firmware, but - * still end might not be marked in the TLV. - * Below loop is to confirm that firmware completed sending all the event - * and fw_stats_done is marked true when end is marked in the TLV - */ - for (;;) { - if (time_after(jiffies, timeout)) - break; - - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats.fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } - return 0; -} - void ath12k_debugfs_fw_stats_process(struct ath12k *ar, struct ath12k_fw_stats *stats) @@ -1022,10 +1044,6 @@ ath12k_debugfs_fw_stats_process(struct ath12k *ar, num_bcn = 0; } } - if (stats->stats_id == WMI_REQUEST_PDEV_STAT) { - list_splice_tail_init(&stats->pdevs, &ar->fw_stats.pdevs); - ar->fw_stats.fw_stats_done = true; - } } static int ath12k_open_vdev_stats(struct inode *inode, struct file *file) @@ -1052,7 +1070,7 @@ static int ath12k_open_vdev_stats(struct inode *inode, struct file *file) param.vdev_id = 0; param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath12k_debugfs_fw_stats_request(ar, ¶m); + ret = ath12k_mac_get_fw_stats(ar, ¶m); if (ret) { ath12k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); return ret; @@ -1117,7 +1135,7 @@ static int ath12k_open_bcn_stats(struct inode *inode, struct file *file) continue; param.vdev_id = arvif->vdev_id; - ret = ath12k_debugfs_fw_stats_request(ar, ¶m); + ret = ath12k_mac_get_fw_stats(ar, ¶m); if (ret) { ath12k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); return ret; @@ -1184,7 +1202,7 @@ static int ath12k_open_pdev_stats(struct inode *inode, struct file *file) param.vdev_id = 0; param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath12k_debugfs_fw_stats_request(ar, ¶m); + ret = ath12k_mac_get_fw_stats(ar, ¶m); if (ret) { ath12k_warn(ab, "failed to request fw pdev stats: %d\n", ret); return ret; @@ -1239,11 +1257,7 @@ void ath12k_debugfs_fw_stats_register(struct ath12k *ar) debugfs_create_file("pdev_stats", 0600, fwstats_dir, ar, &fops_pdev_stats); - INIT_LIST_HEAD(&ar->fw_stats.vdevs); - INIT_LIST_HEAD(&ar->fw_stats.bcn); - INIT_LIST_HEAD(&ar->fw_stats.pdevs); - - init_completion(&ar->fw_stats_complete); + ath12k_fw_stats_init(ar); } void ath12k_debugfs_register(struct ath12k *ar) diff --git a/drivers/net/wireless/ath/ath12k/debugfs.h b/drivers/net/wireless/ath/ath12k/debugfs.h index d7041297d5d8..74a255a27886 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.h +++ b/drivers/net/wireless/ath/ath12k/debugfs.h @@ -14,7 +14,8 @@ void ath12k_debugfs_register(struct ath12k *ar); void ath12k_debugfs_unregister(struct ath12k *ar); void ath12k_debugfs_fw_stats_process(struct ath12k *ar, struct ath12k_fw_stats *stats); -void ath12k_debugfs_fw_stats_reset(struct ath12k *ar); +void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar) { @@ -129,10 +130,6 @@ static inline void ath12k_debugfs_fw_stats_process(struct ath12k *ar, { } -static inline void ath12k_debugfs_fw_stats_reset(struct ath12k *ar) -{ -} - static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar) { return false; @@ -142,6 +139,11 @@ static inline int ath12k_debugfs_rx_filter(struct ath12k *ar) { return 0; } + +static inline void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ +} #endif /* CONFIG_ATH12K_DEBUGFS */ #endif /* _ATH12K_DEBUGFS_H_ */ diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 1c0d5fa39a8d..aeaf970339d4 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -5377,6 +5377,9 @@ static ssize_t ath12k_write_htt_stats_type(struct file *file, const int size = 32; int num_args; + if (count > size) + return -EINVAL; + char *buf __free(kfree) = kzalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 50c36e6ea102..ad873013e46c 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1206,11 +1206,19 @@ static void ath12k_dp_cc_cleanup(struct ath12k_base *ab) if (!skb) continue; + skb_cb = ATH12K_SKB_CB(skb); + if (skb_cb->paddr_ext_desc) { + dma_unmap_single(ab->dev, + skb_cb->paddr_ext_desc, + tx_desc_info->skb_ext_desc->len, + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_desc_info->skb_ext_desc); + } + /* if we are unregistering, hw would've been destroyed and * ar is no longer valid. */ if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) { - skb_cb = ATH12K_SKB_CB(skb); ar = skb_cb->ar; if (atomic_dec_and_test(&ar->dp.num_tx_pending)) @@ -1261,22 +1269,24 @@ static void ath12k_dp_reoq_lut_cleanup(struct ath12k_base *ab) if (!ab->hw_params->reoq_lut_support) return; - if (dp->reoq_lut.vaddr) { + if (dp->reoq_lut.vaddr_unaligned) { ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE0(ab), 0); - dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, - dp->reoq_lut.vaddr, dp->reoq_lut.paddr); - dp->reoq_lut.vaddr = NULL; + dma_free_coherent(ab->dev, dp->reoq_lut.size, + dp->reoq_lut.vaddr_unaligned, + dp->reoq_lut.paddr_unaligned); + dp->reoq_lut.vaddr_unaligned = NULL; } - if (dp->ml_reoq_lut.vaddr) { + if (dp->ml_reoq_lut.vaddr_unaligned) { ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE1(ab), 0); - dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, - dp->ml_reoq_lut.vaddr, dp->ml_reoq_lut.paddr); - dp->ml_reoq_lut.vaddr = NULL; + dma_free_coherent(ab->dev, dp->ml_reoq_lut.size, + dp->ml_reoq_lut.vaddr_unaligned, + dp->ml_reoq_lut.paddr_unaligned); + dp->ml_reoq_lut.vaddr_unaligned = NULL; } } @@ -1608,39 +1618,67 @@ free: return ret; } +static int ath12k_dp_alloc_reoq_lut(struct ath12k_base *ab, + struct ath12k_reo_q_addr_lut *lut) +{ + lut->size = DP_REOQ_LUT_SIZE + HAL_REO_QLUT_ADDR_ALIGN - 1; + lut->vaddr_unaligned = dma_alloc_coherent(ab->dev, lut->size, + &lut->paddr_unaligned, + GFP_KERNEL | __GFP_ZERO); + if (!lut->vaddr_unaligned) + return -ENOMEM; + + lut->vaddr = PTR_ALIGN(lut->vaddr_unaligned, HAL_REO_QLUT_ADDR_ALIGN); + lut->paddr = lut->paddr_unaligned + + ((unsigned long)lut->vaddr - (unsigned long)lut->vaddr_unaligned); + return 0; +} + static int ath12k_dp_reoq_lut_setup(struct ath12k_base *ab) { struct ath12k_dp *dp = &ab->dp; + u32 val; + int ret; if (!ab->hw_params->reoq_lut_support) return 0; - dp->reoq_lut.vaddr = dma_alloc_coherent(ab->dev, - DP_REOQ_LUT_SIZE, - &dp->reoq_lut.paddr, - GFP_KERNEL | __GFP_ZERO); - if (!dp->reoq_lut.vaddr) { + ret = ath12k_dp_alloc_reoq_lut(ab, &dp->reoq_lut); + if (ret) { ath12k_warn(ab, "failed to allocate memory for reoq table"); - return -ENOMEM; + return ret; } - dp->ml_reoq_lut.vaddr = dma_alloc_coherent(ab->dev, - DP_REOQ_LUT_SIZE, - &dp->ml_reoq_lut.paddr, - GFP_KERNEL | __GFP_ZERO); - if (!dp->ml_reoq_lut.vaddr) { + ret = ath12k_dp_alloc_reoq_lut(ab, &dp->ml_reoq_lut); + if (ret) { ath12k_warn(ab, "failed to allocate memory for ML reoq table"); - dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, - dp->reoq_lut.vaddr, dp->reoq_lut.paddr); - dp->reoq_lut.vaddr = NULL; - return -ENOMEM; + dma_free_coherent(ab->dev, dp->reoq_lut.size, + dp->reoq_lut.vaddr_unaligned, + dp->reoq_lut.paddr_unaligned); + dp->reoq_lut.vaddr_unaligned = NULL; + return ret; } + /* Bits in the register have address [39:8] LUT base address to be + * allocated such that LSBs are assumed to be zero. Also, current + * design supports paddr up to 4 GB max hence it fits in 32 bit + * register only + */ + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE0(ab), - dp->reoq_lut.paddr); + dp->reoq_lut.paddr >> 8); + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE1(ab), dp->ml_reoq_lut.paddr >> 8); + val = ath12k_hif_read32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_ADDR(ab)); + + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_ADDR(ab), + val | HAL_REO_QDESC_ADDR_READ_LUT_ENABLE); + + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_MAX_PEERID(ab), + HAL_REO_QDESC_MAX_PEERID); + return 0; } diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 75435a931548..706d766d8c81 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -7,6 +7,7 @@ #ifndef ATH12K_DP_H #define ATH12K_DP_H +#include "hal_desc.h" #include "hal_rx.h" #include "hw.h" @@ -106,6 +107,8 @@ struct dp_mon_mpdu { struct list_head list; struct sk_buff *head; struct sk_buff *tail; + u32 err_bitmap; + u8 decap_format; }; #define DP_MON_MAX_STATUS_BUF 32 @@ -293,11 +296,18 @@ struct ath12k_rx_desc_info { struct ath12k_tx_desc_info { struct list_head list; struct sk_buff *skb; + struct sk_buff *skb_ext_desc; u32 desc_id; /* Cookie */ u8 mac_id; u8 pool_id; }; +struct ath12k_tx_desc_params { + struct sk_buff *skb; + struct sk_buff *skb_ext_desc; + u8 mac_id; +}; + struct ath12k_spt_info { dma_addr_t paddr; u64 *vaddr; @@ -309,8 +319,21 @@ struct ath12k_reo_queue_ref { } __packed; struct ath12k_reo_q_addr_lut { - dma_addr_t paddr; + u32 *vaddr_unaligned; u32 *vaddr; + dma_addr_t paddr_unaligned; + dma_addr_t paddr; + u32 size; +}; + +struct ath12k_link_stats { + u32 tx_enqueued; + u32 tx_completed; + u32 tx_bcast_mcast; + u32 tx_dropped; + u32 tx_encap_type[HAL_TCL_ENCAP_TYPE_MAX]; + u32 tx_encrypt_type[HAL_ENCRYPT_TYPE_MAX]; + u32 tx_desc_type[HAL_TCL_DESC_TYPE_MAX]; }; struct ath12k_dp { diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index d22800e89485..17117315ec7b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1647,7 +1647,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k *ar, u32_get_bits(info[0], HAL_RX_MPDU_START_INFO0_PPDU_ID); } - break; + return HAL_RX_MON_STATUS_MPDU_START; } case HAL_RX_MSDU_START: /* TODO: add msdu start parsing logic */ @@ -1688,55 +1688,168 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k *ar, return HAL_RX_MON_STATUS_PPDU_NOT_DONE; } -static void ath12k_dp_mon_rx_msdus_set_payload(struct ath12k *ar, - struct sk_buff *head_msdu, - struct sk_buff *tail_msdu) -{ - u32 rx_pkt_offset, l2_hdr_offset; +static void +ath12k_dp_mon_fill_rx_stats_info(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + u32 center_freq = ppdu_info->freq; + + rx_status->freq = center_freq; + rx_status->bw = ath12k_mac_bw_to_mac80211_bw(ppdu_info->bw); + rx_status->nss = ppdu_info->nss; + rx_status->rate_idx = 0; + rx_status->encoding = RX_ENC_LEGACY; + rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; + + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { + rx_status->band = NL80211_BAND_6GHZ; + } else if (center_freq >= ATH12K_MIN_2GHZ_FREQ && + center_freq <= ATH12K_MAX_2GHZ_FREQ) { + rx_status->band = NL80211_BAND_2GHZ; + } else if (center_freq >= ATH12K_MIN_5GHZ_FREQ && + center_freq <= ATH12K_MAX_5GHZ_FREQ) { + rx_status->band = NL80211_BAND_5GHZ; + } else { + rx_status->band = NUM_NL80211_BANDS; + } +} - rx_pkt_offset = ar->ab->hal.hal_desc_sz; - l2_hdr_offset = - ath12k_dp_rx_h_l3pad(ar->ab, (struct hal_rx_desc *)tail_msdu->data); - skb_pull(head_msdu, rx_pkt_offset + l2_hdr_offset); +static void +ath12k_dp_mon_fill_rx_rate(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_supported_band *sband; + enum rx_msdu_start_pkt_type pkt_type; + u8 rate_mcs, nss, sgi; + bool is_cck; + + pkt_type = ppdu_info->preamble_type; + rate_mcs = ppdu_info->rate; + nss = ppdu_info->nss; + sgi = ppdu_info->gi; + + switch (pkt_type) { + case RX_MSDU_START_PKT_TYPE_11A: + case RX_MSDU_START_PKT_TYPE_11B: + is_cck = (pkt_type == RX_MSDU_START_PKT_TYPE_11B); + if (rx_status->band < NUM_NL80211_BANDS) { + sband = &ar->mac.sbands[rx_status->band]; + rx_status->rate_idx = ath12k_mac_hw_rate_to_idx(sband, rate_mcs, + is_cck); + } + break; + case RX_MSDU_START_PKT_TYPE_11N: + rx_status->encoding = RX_ENC_HT; + if (rate_mcs > ATH12K_HT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HT mode %d\n", + rate_mcs); + break; + } + rx_status->rate_idx = rate_mcs + (8 * (nss - 1)); + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AC: + rx_status->encoding = RX_ENC_VHT; + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_VHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in VHT mode %d\n", + rate_mcs); + break; + } + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AX: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_HE_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HE mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_HE; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + case RX_MSDU_START_PKT_TYPE_11BE: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_EHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in EHT mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_EHT; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + default: + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "monitor receives invalid preamble type %d", + pkt_type); + break; + } } static struct sk_buff * ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, - struct ieee80211_rx_status *rxs, bool *fcs_err) + struct dp_mon_mpdu *mon_mpdu, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rxs) { struct ath12k_base *ab = ar->ab; struct sk_buff *msdu, *mpdu_buf, *prev_buf, *head_frag_list; - struct hal_rx_desc *rx_desc, *tail_rx_desc; - u8 *hdr_desc, *dest, decap_format; + struct sk_buff *head_msdu; + struct hal_rx_desc *rx_desc; + u8 *hdr_desc, *dest, decap_format = mon_mpdu->decap_format; struct ieee80211_hdr_3addr *wh; - u32 err_bitmap, frag_list_sum_len = 0; + struct ieee80211_channel *channel; + u32 frag_list_sum_len = 0; + u8 channel_num = ppdu_info->chan_num; mpdu_buf = NULL; + head_msdu = mon_mpdu->head; if (!head_msdu) goto err_merge_fail; - rx_desc = (struct hal_rx_desc *)head_msdu->data; - tail_rx_desc = (struct hal_rx_desc *)tail_msdu->data; + ath12k_dp_mon_fill_rx_stats_info(ar, ppdu_info, rxs); - err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, tail_rx_desc); - if (err_bitmap & HAL_RX_MPDU_ERR_FCS) - *fcs_err = true; + if (unlikely(rxs->band == NUM_NL80211_BANDS || + !ath12k_ar_to_hw(ar)->wiphy->bands[rxs->band])) { + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "sband is NULL for status band %d channel_num %d center_freq %d pdev_id %d\n", + rxs->band, channel_num, ppdu_info->freq, ar->pdev_idx); - decap_format = ath12k_dp_rx_h_decap_type(ab, tail_rx_desc); + spin_lock_bh(&ar->data_lock); + channel = ar->rx_channel; + if (channel) { + rxs->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } + spin_unlock_bh(&ar->data_lock); + } - ath12k_dp_rx_h_ppdu(ar, tail_rx_desc, rxs); + if (rxs->band < NUM_NL80211_BANDS) + rxs->freq = ieee80211_channel_to_frequency(channel_num, + rxs->band); + + ath12k_dp_mon_fill_rx_rate(ar, ppdu_info, rxs); if (decap_format == DP_RX_DECAP_TYPE_RAW) { - ath12k_dp_mon_rx_msdus_set_payload(ar, head_msdu, tail_msdu); + skb_pull(head_msdu, ATH12K_MON_RX_PKT_OFFSET); prev_buf = head_msdu; msdu = head_msdu->next; head_frag_list = NULL; while (msdu) { - ath12k_dp_mon_rx_msdus_set_payload(ar, msdu, tail_msdu); + skb_pull(msdu, ATH12K_MON_RX_PKT_OFFSET); if (!head_frag_list) head_frag_list = msdu; @@ -1748,7 +1861,7 @@ ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, prev_buf->next = NULL; - skb_trim(prev_buf, prev_buf->len - HAL_RX_FCS_LEN); + skb_trim(prev_buf, prev_buf->len); if (head_frag_list) { skb_shinfo(head_msdu)->frag_list = head_frag_list; head_msdu->data_len = frag_list_sum_len; @@ -1771,7 +1884,7 @@ ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, msdu = head_msdu; while (msdu) { - ath12k_dp_mon_rx_msdus_set_payload(ar, msdu, tail_msdu); + skb_pull(msdu, ATH12K_MON_RX_PKT_OFFSET); if (qos_pkt) { dest = skb_push(msdu, sizeof(__le16)); if (!dest) @@ -1954,7 +2067,8 @@ static void ath12k_dp_mon_update_radiotap(struct ath12k *ar, static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + u8 decap) { static const struct ieee80211_radiotap_he known = { .data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN | @@ -1966,10 +2080,12 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct struct ieee80211_sta *pubsta = NULL; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - u8 decap = DP_RX_DECAP_TYPE_RAW; + struct ath12k_dp_rx_info rx_info; bool is_mcbc = rxcb->is_mcbc; bool is_eapol_tkip = rxcb->is_eapol; + status->link_valid = 0; + if ((status->encoding == RX_ENC_HE) && !(status->flag & RX_FLAG_RADIOTAP_HE) && !(status->flag & RX_FLAG_SKIP_MONITOR)) { he = skb_push(msdu, sizeof(known)); @@ -1977,10 +2093,9 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct status->flag |= RX_FLAG_RADIOTAP_HE; } - if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ar->ab, rxcb->rx_desc); spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + rx_info.addr2_present = false; + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, &rx_info); if (peer && peer->sta) { pubsta = peer->sta; if (pubsta->valid_links) { @@ -2035,25 +2150,23 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct } static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, + struct dp_mon_mpdu *mon_mpdu, struct hal_rx_mon_ppdu_info *ppduinfo, struct napi_struct *napi) { struct ath12k_pdev_dp *dp = &ar->dp; struct sk_buff *mon_skb, *skb_next, *header; struct ieee80211_rx_status *rxs = &dp->rx_status; - bool fcs_err = false; + u8 decap = DP_RX_DECAP_TYPE_RAW; - mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, - head_msdu, tail_msdu, - rxs, &fcs_err); + mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, mon_mpdu, ppduinfo, rxs); if (!mon_skb) goto mon_deliver_fail; header = mon_skb; rxs->flag = 0; - if (fcs_err) + if (mon_mpdu->err_bitmap & HAL_RX_MPDU_ERR_FCS) rxs->flag = RX_FLAG_FAILED_FCS_CRC; do { @@ -2070,8 +2183,12 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, rxs->flag |= RX_FLAG_ALLOW_SAME_PN; } rxs->flag |= RX_FLAG_ONLY_MONITOR; + + if (!(rxs->flag & RX_FLAG_ONLY_MONITOR)) + decap = mon_mpdu->decap_format; + ath12k_dp_mon_update_radiotap(ar, ppduinfo, mon_skb, rxs); - ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs); + ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs, decap); mon_skb = skb_next; } while (mon_skb); rxs->flag = 0; @@ -2079,7 +2196,7 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, return 0; mon_deliver_fail: - mon_skb = head_msdu; + mon_skb = mon_mpdu->head; while (mon_skb) { skb_next = mon_skb->next; dev_kfree_skb_any(mon_skb); @@ -2088,6 +2205,144 @@ mon_deliver_fail: return -EINVAL; } +static int ath12k_dp_pkt_set_pktlen(struct sk_buff *skb, u32 len) +{ + if (skb->len > len) { + skb_trim(skb, len); + } else { + if (skb_tailroom(skb) < len - skb->len) { + if ((pskb_expand_head(skb, 0, + len - skb->len - skb_tailroom(skb), + GFP_ATOMIC))) { + return -ENOMEM; + } + } + skb_put(skb, (len - skb->len)); + } + + return 0; +} + +static void ath12k_dp_mon_parse_rx_msdu_end_err(u32 info, u32 *errmap) +{ + if (info & RX_MSDU_END_INFO13_FCS_ERR) + *errmap |= HAL_RX_MPDU_ERR_FCS; + + if (info & RX_MSDU_END_INFO13_DECRYPT_ERR) + *errmap |= HAL_RX_MPDU_ERR_DECRYPT; + + if (info & RX_MSDU_END_INFO13_TKIP_MIC_ERR) + *errmap |= HAL_RX_MPDU_ERR_TKIP_MIC; + + if (info & RX_MSDU_END_INFO13_A_MSDU_ERROR) + *errmap |= HAL_RX_MPDU_ERR_AMSDU_ERR; + + if (info & RX_MSDU_END_INFO13_OVERFLOW_ERR) + *errmap |= HAL_RX_MPDU_ERR_OVERFLOW; + + if (info & RX_MSDU_END_INFO13_MSDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MSDU_LEN; + + if (info & RX_MSDU_END_INFO13_MPDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MPDU_LEN; +} + +static int +ath12k_dp_mon_parse_status_msdu_end(struct ath12k_mon_data *pmon, + const struct hal_rx_msdu_end *msdu_end) +{ + struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; + + ath12k_dp_mon_parse_rx_msdu_end_err(__le32_to_cpu(msdu_end->info2), + &mon_mpdu->err_bitmap); + + mon_mpdu->decap_format = le32_get_bits(msdu_end->info1, + RX_MSDU_END_INFO11_DECAP_FORMAT); + + return 0; +} + +static int +ath12k_dp_mon_parse_status_buf(struct ath12k *ar, + struct ath12k_mon_data *pmon, + const struct dp_mon_packet_info *packet_info) +{ + struct ath12k_base *ab = ar->ab; + struct dp_rxdma_mon_ring *buf_ring = &ab->dp.rxdma_mon_buf_ring; + struct sk_buff *msdu; + int buf_id; + u32 offset; + + buf_id = u32_get_bits(packet_info->cookie, DP_RXDMA_BUF_COOKIE_BUF_ID); + + spin_lock_bh(&buf_ring->idr_lock); + msdu = idr_remove(&buf_ring->bufs_idr, buf_id); + spin_unlock_bh(&buf_ring->idr_lock); + + if (unlikely(!msdu)) { + ath12k_warn(ab, "mon dest desc with inval buf_id %d\n", buf_id); + return 0; + } + + dma_unmap_single(ab->dev, ATH12K_SKB_RXCB(msdu)->paddr, + msdu->len + skb_tailroom(msdu), + DMA_FROM_DEVICE); + + offset = packet_info->dma_length + ATH12K_MON_RX_DOT11_OFFSET; + if (ath12k_dp_pkt_set_pktlen(msdu, offset)) { + dev_kfree_skb_any(msdu); + goto dest_replenish; + } + + if (!pmon->mon_mpdu->head) + pmon->mon_mpdu->head = msdu; + else + pmon->mon_mpdu->tail->next = msdu; + + pmon->mon_mpdu->tail = msdu; + +dest_replenish: + ath12k_dp_mon_buf_replenish(ab, buf_ring, 1); + + return 0; +} + +static int +ath12k_dp_mon_parse_rx_dest_tlv(struct ath12k *ar, + struct ath12k_mon_data *pmon, + enum hal_rx_mon_status hal_status, + const void *tlv_data) +{ + switch (hal_status) { + case HAL_RX_MON_STATUS_MPDU_START: + if (WARN_ON_ONCE(pmon->mon_mpdu)) + break; + + pmon->mon_mpdu = kzalloc(sizeof(*pmon->mon_mpdu), GFP_ATOMIC); + if (!pmon->mon_mpdu) + return -ENOMEM; + break; + case HAL_RX_MON_STATUS_BUF_ADDR: + return ath12k_dp_mon_parse_status_buf(ar, pmon, tlv_data); + case HAL_RX_MON_STATUS_MPDU_END: + /* If no MSDU then free empty MPDU */ + if (pmon->mon_mpdu->tail) { + pmon->mon_mpdu->tail->next = NULL; + list_add_tail(&pmon->mon_mpdu->list, &pmon->dp_rx_mon_mpdu_list); + } else { + kfree(pmon->mon_mpdu); + } + pmon->mon_mpdu = NULL; + break; + case HAL_RX_MON_STATUS_MSDU_END: + return ath12k_dp_mon_parse_status_msdu_end(pmon, tlv_data); + default: + break; + } + + return 0; +} + static enum hal_rx_mon_status ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, struct sk_buff *skb) @@ -2114,14 +2369,20 @@ ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, tlv_len = le64_get_bits(tlv->tl, HAL_TLV_64_HDR_LEN); hal_status = ath12k_dp_mon_rx_parse_status_tlv(ar, pmon, tlv); + + if (ar->monitor_started && + ath12k_dp_mon_parse_rx_dest_tlv(ar, pmon, hal_status, tlv->value)) + return HAL_RX_MON_STATUS_PPDU_DONE; + ptr += sizeof(*tlv) + tlv_len; ptr = PTR_ALIGN(ptr, HAL_TLV_64_ALIGN); - if ((ptr - skb->data) >= DP_RX_BUFFER_SIZE) + if ((ptr - skb->data) > skb->len) break; } while ((hal_status == HAL_RX_MON_STATUS_PPDU_NOT_DONE) || (hal_status == HAL_RX_MON_STATUS_BUF_ADDR) || + (hal_status == HAL_RX_MON_STATUS_MPDU_START) || (hal_status == HAL_RX_MON_STATUS_MPDU_END) || (hal_status == HAL_RX_MON_STATUS_MSDU_END)); @@ -2141,23 +2402,21 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; struct dp_mon_mpdu *tmp; struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; - enum hal_rx_mon_status hal_status = HAL_RX_MON_STATUS_BUF_DONE; + enum hal_rx_mon_status hal_status; - ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) + return hal_status; list_for_each_entry_safe(mon_mpdu, tmp, &pmon->dp_rx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu && tail_msdu) { - ath12k_dp_mon_rx_deliver(ar, head_msdu, - tail_msdu, ppdu_info, napi); - } + if (mon_mpdu->head && mon_mpdu->tail) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, ppdu_info, napi); kfree(mon_mpdu); } + return hal_status; } @@ -2838,16 +3097,13 @@ ath12k_dp_mon_tx_process_ppdu_info(struct ath12k *ar, struct dp_mon_tx_ppdu_info *tx_ppdu_info) { struct dp_mon_mpdu *tmp, *mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; list_for_each_entry_safe(mon_mpdu, tmp, &tx_ppdu_info->dp_tx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu) - ath12k_dp_mon_rx_deliver(ar, head_msdu, tail_msdu, + if (mon_mpdu->head) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, &tx_ppdu_info->rx_status, napi); kfree(mon_mpdu); @@ -2949,11 +3205,10 @@ static void ath12k_dp_mon_rx_update_peer_su_stats(struct ath12k *ar, struct ath12k_rx_peer_stats *rx_stats = arsta->rx_stats; u32 num_msdu; - if (!rx_stats) - return; - arsta->rssi_comb = ppdu_info->rssi_comb; ewma_avg_rssi_add(&arsta->avg_rssi, ppdu_info->rssi_comb); + if (!rx_stats) + return; num_msdu = ppdu_info->tcp_msdu_count + ppdu_info->tcp_ack_msdu_count + ppdu_info->udp_msdu_count + ppdu_info->other_msdu_count; @@ -3126,14 +3381,12 @@ ath12k_dp_mon_rx_update_user_stats(struct ath12k *ar, ahsta = ath12k_sta_to_ahsta(peer->sta); arsta = &ahsta->deflink; + arsta->rssi_comb = ppdu_info->rssi_comb; + ewma_avg_rssi_add(&arsta->avg_rssi, ppdu_info->rssi_comb); rx_stats = arsta->rx_stats; - if (!rx_stats) return; - arsta->rssi_comb = ppdu_info->rssi_comb; - ewma_avg_rssi_add(&arsta->avg_rssi, ppdu_info->rssi_comb); - num_msdu = user_stats->tcp_msdu_count + user_stats->tcp_ack_msdu_count + user_stats->udp_msdu_count + user_stats->other_msdu_count; @@ -3346,7 +3599,7 @@ move_next: ath12k_dp_mon_rx_memset_ppdu_info(ppdu_info); while ((skb = __skb_dequeue(&skb_list))) { - hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_rx_parse_mon_status(ar, pmon, skb, napi); if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) { ppdu_info->ppdu_continuation = true; dev_kfree_skb_any(skb); diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.h b/drivers/net/wireless/ath/ath12k/dp_mon.h index e4368eb42aca..9f3adee51cb2 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.h +++ b/drivers/net/wireless/ath/ath12k/dp_mon.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_DP_MON_H @@ -9,6 +9,9 @@ #include "core.h" +#define ATH12K_MON_RX_DOT11_OFFSET 5 +#define ATH12K_MON_RX_PKT_OFFSET 8 + enum dp_monitor_mode { ATH12K_DP_TX_MONITOR_MODE, ATH12K_DP_RX_MONITOR_MODE diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 75bf4211ad42..1abfbd15f13c 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -228,12 +228,6 @@ static void ath12k_dp_rx_desc_get_crypto_header(struct ath12k_base *ab, ab->hal_rx_ops->rx_desc_get_crypto_header(desc, crypto_hdr, enctype); } -static u16 ath12k_dp_rxdesc_get_mpdu_frame_ctrl(struct ath12k_base *ab, - struct hal_rx_desc *desc) -{ - return ab->hal_rx_ops->rx_desc_get_mpdu_frame_ctl(desc); -} - static inline u8 ath12k_dp_rx_get_msdu_src_link(struct ath12k_base *ab, struct hal_rx_desc *desc) { @@ -556,9 +550,9 @@ void ath12k_dp_rx_reo_cmd_list_cleanup(struct ath12k_base *ab) spin_lock_bh(&dp->reo_cmd_lock); list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) { list_del(&cmd->list); - dma_unmap_single(ab->dev, cmd->data.paddr, - cmd->data.size, DMA_BIDIRECTIONAL); - kfree(cmd->data.vaddr); + dma_unmap_single(ab->dev, cmd->data.qbuf.paddr_aligned, + cmd->data.qbuf.size, DMA_BIDIRECTIONAL); + kfree(cmd->data.qbuf.vaddr); kfree(cmd); } @@ -566,9 +560,9 @@ void ath12k_dp_rx_reo_cmd_list_cleanup(struct ath12k_base *ab) &dp->reo_cmd_cache_flush_list, list) { list_del(&cmd_cache->list); dp->reo_cmd_cache_flush_count--; - dma_unmap_single(ab->dev, cmd_cache->data.paddr, - cmd_cache->data.size, DMA_BIDIRECTIONAL); - kfree(cmd_cache->data.vaddr); + dma_unmap_single(ab->dev, cmd_cache->data.qbuf.paddr_aligned, + cmd_cache->data.qbuf.size, DMA_BIDIRECTIONAL); + kfree(cmd_cache->data.qbuf.vaddr); kfree(cmd_cache); } spin_unlock_bh(&dp->reo_cmd_lock); @@ -583,10 +577,10 @@ static void ath12k_dp_reo_cmd_free(struct ath12k_dp *dp, void *ctx, ath12k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n", rx_tid->tid, status); - dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size, + dma_unmap_single(dp->ab->dev, rx_tid->qbuf.paddr_aligned, rx_tid->qbuf.size, DMA_BIDIRECTIONAL); - kfree(rx_tid->vaddr); - rx_tid->vaddr = NULL; + kfree(rx_tid->qbuf.vaddr); + rx_tid->qbuf.vaddr = NULL; } static int ath12k_dp_reo_cmd_send(struct ath12k_base *ab, struct ath12k_dp_rx_tid *rx_tid, @@ -641,13 +635,13 @@ static void ath12k_dp_reo_cache_flush(struct ath12k_base *ab, unsigned long tot_desc_sz, desc_sz; int ret; - tot_desc_sz = rx_tid->size; + tot_desc_sz = rx_tid->qbuf.size; desc_sz = ath12k_hal_reo_qdesc_size(0, HAL_DESC_REO_NON_QOS_TID); while (tot_desc_sz > desc_sz) { tot_desc_sz -= desc_sz; - cmd.addr_lo = lower_32_bits(rx_tid->paddr + tot_desc_sz); - cmd.addr_hi = upper_32_bits(rx_tid->paddr); + cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned + tot_desc_sz); + cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); ret = ath12k_dp_reo_cmd_send(ab, rx_tid, HAL_REO_CMD_FLUSH_CACHE, &cmd, NULL); @@ -658,8 +652,8 @@ static void ath12k_dp_reo_cache_flush(struct ath12k_base *ab, } memset(&cmd, 0, sizeof(cmd)); - cmd.addr_lo = lower_32_bits(rx_tid->paddr); - cmd.addr_hi = upper_32_bits(rx_tid->paddr); + cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned); + cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); cmd.flag = HAL_REO_CMD_FLG_NEED_STATUS; ret = ath12k_dp_reo_cmd_send(ab, rx_tid, HAL_REO_CMD_FLUSH_CACHE, @@ -667,10 +661,10 @@ static void ath12k_dp_reo_cache_flush(struct ath12k_base *ab, if (ret) { ath12k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n", rx_tid->tid, ret); - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, + dma_unmap_single(ab->dev, rx_tid->qbuf.paddr_aligned, rx_tid->qbuf.size, DMA_BIDIRECTIONAL); - kfree(rx_tid->vaddr); - rx_tid->vaddr = NULL; + kfree(rx_tid->qbuf.vaddr); + rx_tid->qbuf.vaddr = NULL; } } @@ -729,10 +723,10 @@ static void ath12k_dp_rx_tid_del_func(struct ath12k_dp *dp, void *ctx, return; free_desc: - dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size, + dma_unmap_single(ab->dev, rx_tid->qbuf.paddr_aligned, rx_tid->qbuf.size, DMA_BIDIRECTIONAL); - kfree(rx_tid->vaddr); - rx_tid->vaddr = NULL; + kfree(rx_tid->qbuf.vaddr); + rx_tid->qbuf.vaddr = NULL; } static void ath12k_peer_rx_tid_qref_setup(struct ath12k_base *ab, u16 peer_id, u16 tid, @@ -762,6 +756,7 @@ static void ath12k_peer_rx_tid_qref_setup(struct ath12k_base *ab, u16 peer_id, u qref->info1 = u32_encode_bits(upper_32_bits(paddr), BUFFER_ADDR_INFO1_ADDR) | u32_encode_bits(tid, DP_REO_QREF_NUM); + ath12k_hal_reo_shared_qaddr_cache_clear(ab); } static void ath12k_peer_rx_tid_qref_reset(struct ath12k_base *ab, u16 peer_id, u16 tid) @@ -801,8 +796,8 @@ void ath12k_dp_rx_peer_tid_delete(struct ath12k *ar, return; cmd.flag = HAL_REO_CMD_FLG_NEED_STATUS; - cmd.addr_lo = lower_32_bits(rx_tid->paddr); - cmd.addr_hi = upper_32_bits(rx_tid->paddr); + cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned); + cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); cmd.upd0 = HAL_REO_CMD_UPD0_VLD; ret = ath12k_dp_reo_cmd_send(ar->ab, rx_tid, HAL_REO_CMD_UPDATE_RX_QUEUE, &cmd, @@ -810,10 +805,10 @@ void ath12k_dp_rx_peer_tid_delete(struct ath12k *ar, if (ret) { ath12k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n", tid, ret); - dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size, - DMA_BIDIRECTIONAL); - kfree(rx_tid->vaddr); - rx_tid->vaddr = NULL; + dma_unmap_single(ar->ab->dev, rx_tid->qbuf.paddr_aligned, + rx_tid->qbuf.size, DMA_BIDIRECTIONAL); + kfree(rx_tid->qbuf.vaddr); + rx_tid->qbuf.vaddr = NULL; } if (peer->mlo) @@ -909,8 +904,8 @@ static int ath12k_peer_rx_tid_reo_update(struct ath12k *ar, struct ath12k_hal_reo_cmd cmd = {0}; int ret; - cmd.addr_lo = lower_32_bits(rx_tid->paddr); - cmd.addr_hi = upper_32_bits(rx_tid->paddr); + cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned); + cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); cmd.flag = HAL_REO_CMD_FLG_NEED_STATUS; cmd.upd0 = HAL_REO_CMD_UPD0_BA_WINDOW_SIZE; cmd.ba_window_size = ba_win_sz; @@ -934,18 +929,67 @@ static int ath12k_peer_rx_tid_reo_update(struct ath12k *ar, return 0; } +static int ath12k_dp_rx_assign_reoq(struct ath12k_base *ab, + struct ath12k_sta *ahsta, + struct ath12k_dp_rx_tid *rx_tid, + u16 ssn, enum hal_pn_type pn_type) +{ + u32 ba_win_sz = rx_tid->ba_win_sz; + struct ath12k_reoq_buf *buf; + void *vaddr, *vaddr_aligned; + dma_addr_t paddr_aligned; + u8 tid = rx_tid->tid; + u32 hw_desc_sz; + int ret; + + buf = &ahsta->reoq_bufs[tid]; + if (!buf->vaddr) { + /* TODO: Optimize the memory allocation for qos tid based on + * the actual BA window size in REO tid update path. + */ + if (tid == HAL_DESC_REO_NON_QOS_TID) + hw_desc_sz = ath12k_hal_reo_qdesc_size(ba_win_sz, tid); + else + hw_desc_sz = ath12k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid); + + vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC); + if (!vaddr) + return -ENOMEM; + + vaddr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN); + + ath12k_hal_reo_qdesc_setup(vaddr_aligned, tid, ba_win_sz, + ssn, pn_type); + + paddr_aligned = dma_map_single(ab->dev, vaddr_aligned, hw_desc_sz, + DMA_BIDIRECTIONAL); + ret = dma_mapping_error(ab->dev, paddr_aligned); + if (ret) { + kfree(vaddr); + return ret; + } + + buf->vaddr = vaddr; + buf->paddr_aligned = paddr_aligned; + buf->size = hw_desc_sz; + } + + rx_tid->qbuf = *buf; + rx_tid->active = true; + + return 0; +} + int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_id, u8 tid, u32 ba_win_sz, u16 ssn, enum hal_pn_type pn_type) { struct ath12k_base *ab = ar->ab; struct ath12k_dp *dp = &ab->dp; - struct hal_rx_reo_queue *addr_aligned; struct ath12k_peer *peer; + struct ath12k_sta *ahsta; struct ath12k_dp_rx_tid *rx_tid; - u32 hw_desc_sz; - void *vaddr; - dma_addr_t paddr; + dma_addr_t paddr_aligned; int ret; spin_lock_bh(&ab->base_lock); @@ -957,7 +1001,8 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ return -ENOENT; } - if (!peer->primary_link) { + if (ab->hw_params->dp_primary_link_only && + !peer->primary_link) { spin_unlock_bh(&ab->base_lock); return 0; } @@ -977,9 +1022,9 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ } rx_tid = &peer->rx_tid[tid]; + paddr_aligned = rx_tid->qbuf.paddr_aligned; /* Update the tid queue if it is already setup */ if (rx_tid->active) { - paddr = rx_tid->paddr; ret = ath12k_peer_rx_tid_reo_update(ar, peer, rx_tid, ba_win_sz, ssn, true); spin_unlock_bh(&ab->base_lock); @@ -991,8 +1036,8 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ if (!ab->hw_params->reoq_lut_support) { ret = ath12k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, - paddr, tid, 1, - ba_win_sz); + paddr_aligned, tid, + 1, ba_win_sz); if (ret) { ath12k_warn(ab, "failed to setup peer rx reorder queuefor tid %d: %d\n", tid, ret); @@ -1007,61 +1052,34 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ rx_tid->ba_win_sz = ba_win_sz; - /* TODO: Optimize the memory allocation for qos tid based on - * the actual BA window size in REO tid update path. - */ - if (tid == HAL_DESC_REO_NON_QOS_TID) - hw_desc_sz = ath12k_hal_reo_qdesc_size(ba_win_sz, tid); - else - hw_desc_sz = ath12k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid); - - vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC); - if (!vaddr) { - spin_unlock_bh(&ab->base_lock); - return -ENOMEM; - } - - addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN); - - ath12k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz, - ssn, pn_type); - - paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz, - DMA_BIDIRECTIONAL); - - ret = dma_mapping_error(ab->dev, paddr); + ahsta = ath12k_sta_to_ahsta(peer->sta); + ret = ath12k_dp_rx_assign_reoq(ab, ahsta, rx_tid, ssn, pn_type); if (ret) { spin_unlock_bh(&ab->base_lock); - goto err_mem_free; + ath12k_warn(ab, "failed to assign reoq buf for rx tid %u\n", tid); + return ret; } - rx_tid->vaddr = vaddr; - rx_tid->paddr = paddr; - rx_tid->size = hw_desc_sz; - rx_tid->active = true; - if (ab->hw_params->reoq_lut_support) { /* Update the REO queue LUT at the corresponding peer id * and tid with qaddr. */ if (peer->mlo) - ath12k_peer_rx_tid_qref_setup(ab, peer->ml_id, tid, paddr); + ath12k_peer_rx_tid_qref_setup(ab, peer->ml_id, tid, + paddr_aligned); else - ath12k_peer_rx_tid_qref_setup(ab, peer->peer_id, tid, paddr); + ath12k_peer_rx_tid_qref_setup(ab, peer->peer_id, tid, + paddr_aligned); spin_unlock_bh(&ab->base_lock); } else { spin_unlock_bh(&ab->base_lock); ret = ath12k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, - paddr, tid, 1, ba_win_sz); + paddr_aligned, tid, 1, + ba_win_sz); } return ret; - -err_mem_free: - kfree(vaddr); - - return ret; } int ath12k_dp_rx_ampdu_start(struct ath12k *ar, @@ -1196,8 +1214,8 @@ int ath12k_dp_rx_peer_pn_replay_config(struct ath12k_link_vif *arvif, rx_tid = &peer->rx_tid[tid]; if (!rx_tid->active) continue; - cmd.addr_lo = lower_32_bits(rx_tid->paddr); - cmd.addr_hi = upper_32_bits(rx_tid->paddr); + cmd.addr_lo = lower_32_bits(rx_tid->qbuf.paddr_aligned); + cmd.addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); ret = ath12k_dp_reo_cmd_send(ab, rx_tid, HAL_REO_CMD_UPDATE_RX_QUEUE, &cmd, NULL); @@ -1823,6 +1841,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, struct hal_rx_desc *ldesc; int space_extra, rem_len, buf_len; u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz; + bool is_continuation; /* As the msdu is spread across multiple rx buffers, * find the offset to the start of msdu for computing @@ -1871,7 +1890,8 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, rem_len = msdu_len - buf_first_len; while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { rxcb = ATH12K_SKB_RXCB(skb); - if (rxcb->is_continuation) + is_continuation = rxcb->is_continuation; + if (is_continuation) buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; else buf_len = rem_len; @@ -1889,7 +1909,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, dev_kfree_skb_any(skb); rem_len -= buf_len; - if (!rxcb->is_continuation) + if (!is_continuation) break; } @@ -1914,21 +1934,14 @@ static struct sk_buff *ath12k_dp_rx_get_msdu_last_buf(struct sk_buff_head *msdu_ return NULL; } -static void ath12k_dp_rx_h_csum_offload(struct ath12k *ar, struct sk_buff *msdu) +static void ath12k_dp_rx_h_csum_offload(struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { - struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct ath12k_base *ab = ar->ab; - bool ip_csum_fail, l4_csum_fail; - - ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rxcb->rx_desc); - l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rxcb->rx_desc); - - msdu->ip_summed = (ip_csum_fail || l4_csum_fail) ? - CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + msdu->ip_summed = (rx_info->ip_csum_fail || rx_info->l4_csum_fail) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; } -static int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, - enum hal_encrypt_type enctype) +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype) { switch (enctype) { case HAL_ENCRYPT_TYPE_OPEN: @@ -2122,10 +2135,13 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_base *ab = ar->ab; size_t hdr_len, crypto_len; - struct ieee80211_hdr *hdr; - u16 qos_ctl; - __le16 fc; - u8 *crypto_hdr; + struct ieee80211_hdr hdr; + __le16 qos_ctl; + u8 *crypto_hdr, mesh_ctrl; + + ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, &hdr); + hdr_len = ieee80211_hdrlen(hdr.frame_control); + mesh_ctrl = ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc); if (!(status->flag & RX_FLAG_IV_STRIPPED)) { crypto_len = ath12k_dp_rx_crypto_param_len(ar, enctype); @@ -2133,27 +2149,21 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, ath12k_dp_rx_desc_get_crypto_header(ab, rx_desc, crypto_hdr, enctype); } - fc = cpu_to_le16(ath12k_dp_rxdesc_get_mpdu_frame_ctrl(ab, rx_desc)); - hdr_len = ieee80211_hdrlen(fc); skb_push(msdu, hdr_len); - hdr = (struct ieee80211_hdr *)msdu->data; - hdr->frame_control = fc; - - /* Get wifi header from rx_desc */ - ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, hdr); + memcpy(msdu->data, &hdr, min(hdr_len, sizeof(hdr))); if (rxcb->is_mcbc) status->flag &= ~RX_FLAG_PN_VALIDATED; /* Add QOS header */ - if (ieee80211_is_data_qos(hdr->frame_control)) { - qos_ctl = rxcb->tid; - if (ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc)) - qos_ctl |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT; + if (ieee80211_is_data_qos(hdr.frame_control)) { + struct ieee80211_hdr *qos_ptr = (struct ieee80211_hdr *)msdu->data; - /* TODO: Add other QoS ctl fields when required */ - memcpy(msdu->data + (hdr_len - IEEE80211_QOS_CTL_LEN), - &qos_ctl, IEEE80211_QOS_CTL_LEN); + qos_ctl = cpu_to_le16(rxcb->tid & IEEE80211_QOS_CTL_TID_MASK); + if (mesh_ctrl) + qos_ctl |= cpu_to_le16(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT); + + memcpy(ieee80211_get_qos_ctl(qos_ptr), &qos_ctl, IEEE80211_QOS_CTL_LEN); } } @@ -2229,10 +2239,10 @@ static void ath12k_dp_rx_h_undecap(struct ath12k *ar, struct sk_buff *msdu, } struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_peer *peer = NULL; lockdep_assert_held(&ab->base_lock); @@ -2243,40 +2253,41 @@ ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) if (peer) return peer; - if (!rx_desc || !(ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc))) - return NULL; + if (rx_info->addr2_present) + peer = ath12k_peer_find_by_addr(ab, rx_info->addr2); - peer = ath12k_peer_find_by_addr(ab, - ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, - rx_desc)); return peer; } static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, struct sk_buff *msdu, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { - bool fill_crypto_hdr; struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb; enum hal_encrypt_type enctype; bool is_decrypted = false; struct ieee80211_hdr *hdr; struct ath12k_peer *peer; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u32 err_bitmap; /* PN for multicast packets will be checked in mac80211 */ rxcb = ATH12K_SKB_RXCB(msdu); - fill_crypto_hdr = ath12k_dp_rx_h_is_da_mcbc(ar->ab, rx_desc); - rxcb->is_mcbc = fill_crypto_hdr; + rxcb->is_mcbc = rx_info->is_mcbc; if (rxcb->is_mcbc) - rxcb->peer_id = ath12k_dp_rx_h_peer_id(ar->ab, rx_desc); + rxcb->peer_id = rx_info->peer_id; spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, rx_info); if (peer) { + /* resetting mcbc bit because mcbc packets are unicast + * packets only for AP as STA sends unicast packets. + */ + rxcb->is_mcbc = rxcb->is_mcbc && !peer->ucast_ra_only; + if (rxcb->is_mcbc) enctype = peer->sec_type_grp; else @@ -2305,7 +2316,7 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, if (is_decrypted) { rx_status->flag |= RX_FLAG_DECRYPTED | RX_FLAG_MMIC_STRIPPED; - if (fill_crypto_hdr) + if (rx_info->is_mcbc) rx_status->flag |= RX_FLAG_MIC_STRIPPED | RX_FLAG_ICV_STRIPPED; else @@ -2313,37 +2324,28 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, RX_FLAG_PN_VALIDATED; } - ath12k_dp_rx_h_csum_offload(ar, msdu); + ath12k_dp_rx_h_csum_offload(msdu, rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, enctype, rx_status, is_decrypted); - if (!is_decrypted || fill_crypto_hdr) + if (!is_decrypted || rx_info->is_mcbc) return; - if (ath12k_dp_rx_h_decap_type(ar->ab, rx_desc) != - DP_RX_DECAP_TYPE_ETHERNET2_DIX) { + if (rx_info->decap_type != DP_RX_DECAP_TYPE_ETHERNET2_DIX) { hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } } -static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; struct ieee80211_supported_band *sband; - enum rx_msdu_start_pkt_type pkt_type; - u8 bw; - u8 rate_mcs, nss; - u8 sgi; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; + enum rx_msdu_start_pkt_type pkt_type = rx_info->pkt_type; + u8 bw = rx_info->bw, sgi = rx_info->sgi; + u8 rate_mcs = rx_info->rate_mcs, nss = rx_info->nss; bool is_cck; - pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); - bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); - rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); - nss = ath12k_dp_rx_h_nss(ab, rx_desc); - sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); - switch (pkt_type) { case RX_MSDU_START_PKT_TYPE_11A: case RX_MSDU_START_PKT_TYPE_11B: @@ -2412,10 +2414,35 @@ static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, } } -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; + rx_info->ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rx_desc); + rx_info->l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rx_desc); + rx_info->is_mcbc = ath12k_dp_rx_h_is_da_mcbc(ab, rx_desc); + rx_info->decap_type = ath12k_dp_rx_h_decap_type(ab, rx_desc); + rx_info->pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); + rx_info->sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); + rx_info->rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); + rx_info->bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); + rx_info->nss = ath12k_dp_rx_h_nss(ab, rx_desc); + rx_info->tid = ath12k_dp_rx_h_tid(ab, rx_desc); + rx_info->peer_id = ath12k_dp_rx_h_peer_id(ab, rx_desc); + rx_info->phy_meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + + if (ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc)) { + ether_addr_copy(rx_info->addr2, + ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, rx_desc)); + rx_info->addr2_present = true; + } + + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", + rx_desc, sizeof(*rx_desc)); +} + +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) +{ + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u8 channel_num; u32 center_freq, meta_data; struct ieee80211_channel *channel; @@ -2429,12 +2456,12 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; - meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + meta_data = rx_info->phy_meta_data; channel_num = meta_data; center_freq = meta_data >> 16; - if (center_freq >= ATH12K_MIN_6G_FREQ && - center_freq <= ATH12K_MAX_6G_FREQ) { + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { rx_status->band = NL80211_BAND_6GHZ; rx_status->freq = center_freq; } else if (channel_num >= 1 && channel_num <= 14) { @@ -2450,20 +2477,18 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, ieee80211_frequency_to_channel(channel->center_freq); } spin_unlock_bh(&ar->data_lock); - ath12k_dbg_dump(ar->ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", - rx_desc, sizeof(*rx_desc)); } if (rx_status->band != NL80211_BAND_6GHZ) rx_status->freq = ieee80211_channel_to_frequency(channel_num, rx_status->band); - ath12k_dp_rx_h_rate(ar, rx_desc, rx_status); + ath12k_dp_rx_h_rate(ar, rx_info); } static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; static const struct ieee80211_radiotap_he known = { @@ -2476,6 +2501,7 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap struct ieee80211_sta *pubsta; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); + struct ieee80211_rx_status *status = rx_info->rx_status; u8 decap = DP_RX_DECAP_TYPE_RAW; bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; @@ -2488,10 +2514,10 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap } if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ab, rxcb->rx_desc); + decap = rx_info->decap_type; spin_lock_bh(&ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ab, msdu, rx_info); pubsta = peer ? peer->sta : NULL; @@ -2574,7 +2600,7 @@ static bool ath12k_dp_rx_check_nwifi_hdr_len_valid(struct ath12k_base *ab, static int ath12k_dp_rx_process_msdu(struct ath12k *ar, struct sk_buff *msdu, struct sk_buff_head *msdu_list, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct hal_rx_desc *rx_desc, *lrx_desc; @@ -2634,10 +2660,11 @@ static int ath12k_dp_rx_process_msdu(struct ath12k *ar, goto free_out; } - ath12k_dp_rx_h_ppdu(ar, rx_desc, rx_status); - ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_info); - rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; + rx_info->rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; return 0; @@ -2657,12 +2684,16 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, struct ath12k *ar; struct ath12k_hw_link *hw_links = ag->hw_links; struct ath12k_base *partner_ab; + struct ath12k_dp_rx_info rx_info; u8 hw_link_id, pdev_id; int ret; if (skb_queue_empty(msdu_list)) return; + rx_info.addr2_present = false; + rx_info.rx_status = &rx_status; + rcu_read_lock(); while ((msdu = __skb_dequeue(msdu_list))) { @@ -2683,7 +2714,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_status); + ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_info); if (ret) { ath12k_dbg(ab, ATH12K_DBG_DATA, "Unable to process msdu %d", ret); @@ -2691,7 +2722,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_status); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } rcu_read_unlock(); @@ -2984,6 +3015,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer struct ieee80211_rx_status *rxs = IEEE80211_SKB_RXCB(msdu); struct ieee80211_key_conf *key_conf; struct ieee80211_hdr *hdr; + struct ath12k_dp_rx_info rx_info; u8 mic[IEEE80211_CCMP_MIC_LEN]; int head_len, tail_len, ret; size_t data_len; @@ -2994,6 +3026,9 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer if (ath12k_dp_rx_h_enctype(ab, rx_desc) != HAL_ENCRYPT_TYPE_TKIP_MIC) return 0; + rx_info.addr2_present = false; + rx_info.rx_status = rxs; + hdr = (struct ieee80211_hdr *)(msdu->data + hal_rx_desc_sz); hdr_len = ieee80211_hdrlen(hdr->frame_control); head_len = hdr_len + hal_rx_desc_sz + IEEE80211_TKIP_IV_LEN; @@ -3020,6 +3055,8 @@ mic_fail: (ATH12K_SKB_RXCB(msdu))->is_first_msdu = true; (ATH12K_SKB_RXCB(msdu))->is_last_msdu = true; + ath12k_dp_rx_h_fetch_info(ab, rx_desc, &rx_info); + rxs->flag |= RX_FLAG_MMIC_ERROR | RX_FLAG_MMIC_STRIPPED | RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED; skb_pull(msdu, hal_rx_desc_sz); @@ -3027,7 +3064,7 @@ mic_fail: if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, rx_desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, rx_desc, rxs); + ath12k_dp_rx_h_ppdu(ar, &rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, HAL_ENCRYPT_TYPE_TKIP_MIC, rxs, true); ieee80211_rx(ath12k_ar_to_hw(ar), msdu); @@ -3242,8 +3279,15 @@ static int ath12k_dp_rx_h_defrag_reo_reinject(struct ath12k *ar, reo_ent_ring->rx_mpdu_info.peer_meta_data = reo_dest_ring->rx_mpdu_info.peer_meta_data; - reo_ent_ring->queue_addr_lo = cpu_to_le32(lower_32_bits(rx_tid->paddr)); - queue_addr_hi = upper_32_bits(rx_tid->paddr); + if (ab->hw_params->reoq_lut_support) { + reo_ent_ring->queue_addr_lo = reo_dest_ring->rx_mpdu_info.peer_meta_data; + queue_addr_hi = 0; + } else { + reo_ent_ring->queue_addr_lo = + cpu_to_le32(lower_32_bits(rx_tid->qbuf.paddr_aligned)); + queue_addr_hi = upper_32_bits(rx_tid->qbuf.paddr_aligned); + } + reo_ent_ring->info0 = le32_encode_bits(queue_addr_hi, HAL_REO_ENTR_RING_INFO0_QUEUE_ADDR_HI) | le32_encode_bits(dst_ind, @@ -3716,7 +3760,7 @@ static void ath12k_dp_rx_null_q_desc_sg_drop(struct ath12k *ar, } static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_base *ab = ar->ab; @@ -3772,11 +3816,11 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, desc, status); - - ath12k_dp_rx_h_mpdu(ar, msdu, desc, status); + ath12k_dp_rx_h_fetch_info(ab, desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, desc, rx_info); - rxcb->tid = ath12k_dp_rx_h_tid(ab, desc); + rxcb->tid = rx_info->tid; /* Please note that caller will having the access to msdu and completing * rx with mac80211. Need not worry about cleaning up amsdu_list. @@ -3786,7 +3830,7 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3796,7 +3840,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, switch (rxcb->err_code) { case HAL_REO_DEST_RING_ERROR_CODE_DESC_ADDR_ZERO: - if (ath12k_dp_rx_h_null_q_desc(ar, msdu, status, msdu_list)) + if (ath12k_dp_rx_h_null_q_desc(ar, msdu, rx_info, msdu_list)) drop = true; break; case HAL_REO_DEST_RING_ERROR_CODE_PN_CHECK_FAILED: @@ -3817,7 +3861,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; u16 msdu_len; @@ -3831,24 +3875,33 @@ static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, l3pad_bytes = ath12k_dp_rx_h_l3pad(ab, desc); msdu_len = ath12k_dp_rx_h_msdu_len(ab, desc); + + if ((hal_rx_desc_sz + l3pad_bytes + msdu_len) > DP_RX_BUFFER_SIZE) { + ath12k_dbg(ab, ATH12K_DBG_DATA, + "invalid msdu len in tkip mic err %u\n", msdu_len); + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "", desc, + sizeof(*desc)); + return true; + } + skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len); skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes); if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return true; - ath12k_dp_rx_h_ppdu(ar, desc, status); + ath12k_dp_rx_h_ppdu(ar, rx_info); - status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | - RX_FLAG_DECRYPTED); + rx_info->rx_status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | + RX_FLAG_DECRYPTED); ath12k_dp_rx_h_undecap(ar, msdu, desc, - HAL_ENCRYPT_TYPE_TKIP_MIC, status, false); + HAL_ENCRYPT_TYPE_TKIP_MIC, rx_info->rx_status, false); return false; } static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3863,7 +3916,8 @@ static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, case HAL_REO_ENTR_RING_RXDMA_ECODE_TKIP_MIC_ERR: err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, rx_desc); if (err_bitmap & HAL_RX_MPDU_ERR_TKIP_MIC) { - drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, rx_info); break; } fallthrough; @@ -3885,14 +3939,18 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); struct ieee80211_rx_status rxs = {0}; + struct ath12k_dp_rx_info rx_info; bool drop = true; + rx_info.addr2_present = false; + rx_info.rx_status = &rxs; + switch (rxcb->err_rel_src) { case HAL_WBM_REL_SRC_MODULE_REO: - drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rxs, msdu_list); + drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rx_info, msdu_list); break; case HAL_WBM_REL_SRC_MODULE_RXDMA: - drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rxs); + drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rx_info); break; default: /* msdu will get freed */ @@ -3904,7 +3962,7 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, return; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rxs); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, @@ -4480,6 +4538,8 @@ int ath12k_dp_rx_pdev_mon_attach(struct ath12k *ar) pmon->mon_last_linkdesc_paddr = 0; pmon->mon_last_buf_cookie = DP_RX_DESC_COOKIE_MAX + 1; + INIT_LIST_HEAD(&pmon->dp_rx_mon_mpdu_list); + pmon->mon_mpdu = NULL; spin_lock_init(&pmon->mon_lock); return 0; diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index 88e42365a9d8..f5c05f17813b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -14,11 +14,9 @@ struct ath12k_dp_rx_tid { u8 tid; - u32 *vaddr; - dma_addr_t paddr; - u32 size; u32 ba_win_sz; bool active; + struct ath12k_reoq_buf qbuf; /* Info related to rx fragments */ u32 cur_sn; @@ -65,6 +63,24 @@ struct ath12k_dp_rx_rfc1042_hdr { __be16 snap_type; } __packed; +struct ath12k_dp_rx_info { + struct ieee80211_rx_status *rx_status; + u32 phy_meta_data; + u16 peer_id; + u8 decap_type; + u8 pkt_type; + u8 sgi; + u8 rate_mcs; + u8 bw; + u8 nss; + u8 addr2[ETH_ALEN]; + u8 tid; + bool ip_csum_fail; + bool l4_csum_fail; + bool is_mcbc; + bool addr2_present; +}; + static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi) { u32 ret = 0; @@ -131,13 +147,13 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev u8 ath12k_dp_rx_h_l3pad(struct ath12k_base *ab, struct hal_rx_desc *desc); struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu); +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info); u8 ath12k_dp_rx_h_decap_type(struct ath12k_base *ab, struct hal_rx_desc *desc); u32 ath12k_dp_rx_h_mpdu_err(struct ath12k_base *ab, struct hal_rx_desc *desc); -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status); +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info); int ath12k_dp_rxdma_ring_sel_config_qcn9274(struct ath12k_base *ab); int ath12k_dp_rxdma_ring_sel_config_wcn7850(struct ath12k_base *ab); @@ -145,4 +161,9 @@ int ath12k_dp_htt_tlv_iter(struct ath12k_base *ab, const void *ptr, size_t len, int (*iter)(struct ath12k_base *ar, u16 tag, u16 len, const void *ptr, void *data), void *data); +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info); + +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype); + #endif /* ATH12K_DP_RX_H */ diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index ced232bf4aed..2136eeb278af 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -7,6 +7,7 @@ #include "core.h" #include "dp_tx.h" #include "debug.h" +#include "debugfs.h" #include "hw.h" #include "peer.h" #include "mac.h" @@ -83,6 +84,7 @@ static void ath12k_dp_tx_release_txbuf(struct ath12k_dp *dp, u8 pool_id) { spin_lock_bh(&dp->tx_desc_lock[pool_id]); + tx_desc->skb_ext_desc = NULL; list_move_tail(&tx_desc->list, &dp->tx_desc_free_list[pool_id]); spin_unlock_bh(&dp->tx_desc_lock[pool_id]); } @@ -219,7 +221,8 @@ out: } int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, - struct sk_buff *skb, bool gsn_valid, int mcbc_gsn) + struct sk_buff *skb, bool gsn_valid, int mcbc_gsn, + bool is_mcast) { struct ath12k_base *ab = ar->ab; struct ath12k_dp *dp = &ab->dp; @@ -229,7 +232,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); struct hal_tcl_data_cmd *hal_tcl_desc; struct hal_tx_msdu_ext_desc *msg; - struct sk_buff *skb_ext_desc; + struct sk_buff *skb_ext_desc = NULL; struct hal_srng *tcl_ring; struct ieee80211_hdr *hdr = (void *)skb->data; struct ath12k_vif *ahvif = arvif->ahvif; @@ -415,23 +418,21 @@ map: if (ret < 0) { ath12k_dbg(ab, ATH12K_DBG_DP_TX, "Failed to add HTT meta data, dropping packet\n"); - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; + goto fail_free_ext_skb; } } ti.paddr = dma_map_single(ab->dev, skb_ext_desc->data, skb_ext_desc->len, DMA_TO_DEVICE); ret = dma_mapping_error(ab->dev, ti.paddr); - if (ret) { - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; - } + if (ret) + goto fail_free_ext_skb; ti.data_len = skb_ext_desc->len; ti.type = HAL_TCL_DESC_TYPE_EXT_DESC; skb_cb->paddr_ext_desc = ti.paddr; + tx_desc->skb_ext_desc = skb_ext_desc; } hal_ring_id = tx_ring->tcl_data_ring.ring_id; @@ -462,9 +463,20 @@ map: ring_selector++; } - goto fail_unmap_dma; + goto fail_unmap_dma_ext; } + spin_lock_bh(&arvif->link_stats_lock); + arvif->link_stats.tx_encap_type[ti.encap_type]++; + arvif->link_stats.tx_encrypt_type[ti.encrypt_type]++; + arvif->link_stats.tx_desc_type[ti.type]++; + + if (is_mcast) + arvif->link_stats.tx_bcast_mcast++; + else + arvif->link_stats.tx_enqueued++; + spin_unlock_bh(&arvif->link_stats_lock); + ath12k_hal_tx_cmd_desc_setup(ab, hal_tcl_desc, &ti); ath12k_hal_srng_access_end(ab, tcl_ring); @@ -478,16 +490,24 @@ map: return 0; -fail_unmap_dma: - dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); - +fail_unmap_dma_ext: if (skb_cb->paddr_ext_desc) dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, - sizeof(struct hal_tx_msdu_ext_desc), + skb_ext_desc->len, DMA_TO_DEVICE); +fail_free_ext_skb: + kfree_skb(skb_ext_desc); + +fail_unmap_dma: + dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); fail_remove_tx_buf: ath12k_dp_tx_release_txbuf(dp, tx_desc, pool_id); + + spin_lock_bh(&arvif->link_stats_lock); + arvif->link_stats.tx_dropped++; + spin_unlock_bh(&arvif->link_stats_lock); + if (tcl_ring_retry) goto tcl_ring_sel; @@ -495,20 +515,23 @@ fail_remove_tx_buf: } static void ath12k_dp_tx_free_txbuf(struct ath12k_base *ab, - struct sk_buff *msdu, u8 mac_id, - struct dp_tx_ring *tx_ring) + struct dp_tx_ring *tx_ring, + struct ath12k_tx_desc_params *desc_params) { struct ath12k *ar; + struct sk_buff *msdu = desc_params->skb; struct ath12k_skb_cb *skb_cb; - u8 pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + u8 pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, desc_params->mac_id); skb_cb = ATH12K_SKB_CB(msdu); ar = ab->pdevs[pdev_id].ar; dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); - if (skb_cb->paddr_ext_desc) + if (skb_cb->paddr_ext_desc) { dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, - sizeof(struct hal_tx_msdu_ext_desc), DMA_TO_DEVICE); + desc_params->skb_ext_desc->len, DMA_TO_DEVICE); + dev_kfree_skb_any(desc_params->skb_ext_desc); + } ieee80211_free_txskb(ar->ah->hw, msdu); @@ -518,13 +541,17 @@ static void ath12k_dp_tx_free_txbuf(struct ath12k_base *ab, static void ath12k_dp_tx_htt_tx_complete_buf(struct ath12k_base *ab, - struct sk_buff *msdu, + struct ath12k_tx_desc_params *desc_params, struct dp_tx_ring *tx_ring, struct ath12k_dp_htt_wbm_tx_status *ts) { struct ieee80211_tx_info *info; + struct ath12k_link_vif *arvif; struct ath12k_skb_cb *skb_cb; + struct ieee80211_vif *vif; + struct ath12k_vif *ahvif; struct ath12k *ar; + struct sk_buff *msdu = desc_params->skb; skb_cb = ATH12K_SKB_CB(msdu); info = IEEE80211_SKB_CB(msdu); @@ -535,9 +562,24 @@ ath12k_dp_tx_htt_tx_complete_buf(struct ath12k_base *ab, wake_up(&ar->dp.tx_empty_waitq); dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); - if (skb_cb->paddr_ext_desc) + if (skb_cb->paddr_ext_desc) { dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, - sizeof(struct hal_tx_msdu_ext_desc), DMA_TO_DEVICE); + desc_params->skb_ext_desc->len, DMA_TO_DEVICE); + dev_kfree_skb_any(desc_params->skb_ext_desc); + } + + vif = skb_cb->vif; + if (vif) { + ahvif = ath12k_vif_to_ahvif(vif); + rcu_read_lock(); + arvif = rcu_dereference(ahvif->link[skb_cb->link_id]); + if (arvif) { + spin_lock_bh(&arvif->link_stats_lock); + arvif->link_stats.tx_completed++; + spin_unlock_bh(&arvif->link_stats_lock); + } + rcu_read_unlock(); + } memset(&info->status, 0, sizeof(info->status)); @@ -560,10 +602,9 @@ ath12k_dp_tx_htt_tx_complete_buf(struct ath12k_base *ab, } static void -ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, - void *desc, u8 mac_id, - struct sk_buff *msdu, - struct dp_tx_ring *tx_ring) +ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, void *desc, + struct dp_tx_ring *tx_ring, + struct ath12k_tx_desc_params *desc_params) { struct htt_tx_wbm_completion *status_desc; struct ath12k_dp_htt_wbm_tx_status ts = {0}; @@ -579,13 +620,14 @@ ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, ts.acked = (wbm_status == HAL_WBM_REL_HTT_TX_COMP_STATUS_OK); ts.ack_rssi = le32_get_bits(status_desc->info2, HTT_TX_WBM_COMP_INFO2_ACK_RSSI); - ath12k_dp_tx_htt_tx_complete_buf(ab, msdu, tx_ring, &ts); + ath12k_dp_tx_htt_tx_complete_buf(ab, desc_params, tx_ring, &ts); break; case HAL_WBM_REL_HTT_TX_COMP_STATUS_DROP: case HAL_WBM_REL_HTT_TX_COMP_STATUS_TTL: case HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ: case HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT: - ath12k_dp_tx_free_txbuf(ab, msdu, mac_id, tx_ring); + case HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH: + ath12k_dp_tx_free_txbuf(ab, tx_ring, desc_params); break; case HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY: /* This event is to be handled only when the driver decides to @@ -593,7 +635,7 @@ ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, */ break; default: - ath12k_warn(ab, "Unknown htt tx status %d\n", wbm_status); + ath12k_warn(ab, "Unknown htt wbm tx status %d\n", wbm_status); break; } } @@ -717,13 +759,17 @@ static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct hal_tx_status } static void ath12k_dp_tx_complete_msdu(struct ath12k *ar, - struct sk_buff *msdu, + struct ath12k_tx_desc_params *desc_params, struct hal_tx_status *ts) { struct ath12k_base *ab = ar->ab; struct ath12k_hw *ah = ar->ah; struct ieee80211_tx_info *info; + struct ath12k_link_vif *arvif; struct ath12k_skb_cb *skb_cb; + struct ieee80211_vif *vif; + struct ath12k_vif *ahvif; + struct sk_buff *msdu = desc_params->skb; if (WARN_ON_ONCE(ts->buf_rel_source != HAL_WBM_REL_SRC_MODULE_TQM)) { /* Must not happen */ @@ -733,9 +779,11 @@ static void ath12k_dp_tx_complete_msdu(struct ath12k *ar, skb_cb = ATH12K_SKB_CB(msdu); dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); - if (skb_cb->paddr_ext_desc) + if (skb_cb->paddr_ext_desc) { dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, - sizeof(struct hal_tx_msdu_ext_desc), DMA_TO_DEVICE); + desc_params->skb_ext_desc->len, DMA_TO_DEVICE); + dev_kfree_skb_any(desc_params->skb_ext_desc); + } rcu_read_lock(); @@ -749,6 +797,17 @@ static void ath12k_dp_tx_complete_msdu(struct ath12k *ar, goto exit; } + vif = skb_cb->vif; + if (vif) { + ahvif = ath12k_vif_to_ahvif(vif); + arvif = rcu_dereference(ahvif->link[skb_cb->link_id]); + if (arvif) { + spin_lock_bh(&arvif->link_stats_lock); + arvif->link_stats.tx_completed++; + spin_unlock_bh(&arvif->link_stats_lock); + } + } + info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); @@ -842,11 +901,11 @@ void ath12k_dp_tx_completion_handler(struct ath12k_base *ab, int ring_id) int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id; struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id]; struct ath12k_tx_desc_info *tx_desc = NULL; - struct sk_buff *msdu; struct hal_tx_status ts = { 0 }; + struct ath12k_tx_desc_params desc_params; struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id]; struct hal_wbm_release_ring *desc; - u8 mac_id, pdev_id; + u8 pdev_id; u64 desc_va; spin_lock_bh(&status_ring->lock); @@ -900,28 +959,27 @@ void ath12k_dp_tx_completion_handler(struct ath12k_base *ab, int ring_id) continue; } - msdu = tx_desc->skb; - mac_id = tx_desc->mac_id; + desc_params.mac_id = tx_desc->mac_id; + desc_params.skb = tx_desc->skb; + desc_params.skb_ext_desc = tx_desc->skb_ext_desc; /* Release descriptor as soon as extracting necessary info * to reduce contention */ ath12k_dp_tx_release_txbuf(dp, tx_desc, tx_desc->pool_id); if (ts.buf_rel_source == HAL_WBM_REL_SRC_MODULE_FW) { - ath12k_dp_tx_process_htt_tx_complete(ab, - (void *)tx_status, - mac_id, msdu, - tx_ring); + ath12k_dp_tx_process_htt_tx_complete(ab, (void *)tx_status, + tx_ring, &desc_params); continue; } - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, desc_params.mac_id); ar = ab->pdevs[pdev_id].ar; if (atomic_dec_and_test(&ar->dp.num_tx_pending)) wake_up(&ar->dp.tx_empty_waitq); - ath12k_dp_tx_complete_msdu(ar, msdu, &ts); + ath12k_dp_tx_complete_msdu(ar, &desc_params, &ts); } } @@ -1431,6 +1489,11 @@ int ath12k_dp_tx_htt_rx_monitor_mode_ring_config(struct ath12k *ar, bool reset) HTT_RX_MON_MO_CTRL_FILTER_FLASG3 | HTT_RX_MON_FP_DATA_FILTER_FLASG3 | HTT_RX_MON_MO_DATA_FILTER_FLASG3; + } else { + tlv_filter = ath12k_mac_mon_status_filter_default; + + if (ath12k_debugfs_is_extd_rx_stats_enabled(ar)) + tlv_filter.rx_filter = ath12k_debugfs_rx_filter(ar); } if (ab->hw_params->rxdma1_enable) { diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.h b/drivers/net/wireless/ath/ath12k/dp_tx.h index a5904097dc34..10acdcf1fa8f 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.h +++ b/drivers/net/wireless/ath/ath12k/dp_tx.h @@ -17,7 +17,8 @@ struct ath12k_dp_htt_wbm_tx_status { int ath12k_dp_tx_htt_h2t_ver_req_msg(struct ath12k_base *ab); int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, - struct sk_buff *skb, bool gsn_valid, int mcbc_gsn); + struct sk_buff *skb, bool gsn_valid, int mcbc_gsn, + bool is_mcast); void ath12k_dp_tx_completion_handler(struct ath12k_base *ab, int ring_id); int ath12k_dp_tx_htt_h2t_ppdu_stats_req(struct ath12k *ar, u32 mask); diff --git a/drivers/net/wireless/ath/ath12k/fw.c b/drivers/net/wireless/ath/ath12k/fw.c index 5be4b2d4a19d..5ac497f80cad 100644 --- a/drivers/net/wireless/ath/ath12k/fw.c +++ b/drivers/net/wireless/ath/ath12k/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -99,6 +99,8 @@ static int ath12k_fw_request_firmware_api_n(struct ath12k_base *ab, __set_bit(i, ab->fw.fw_features); } + ab->fw.fw_features_valid = true; + ath12k_dbg_dump(ab, ATH12K_DBG_BOOT, "features", "", ab->fw.fw_features, sizeof(ab->fw.fw_features)); @@ -169,3 +171,8 @@ void ath12k_fw_unmap(struct ath12k_base *ab) release_firmware(ab->fw.fw); memset(&ab->fw, 0, sizeof(ab->fw)); } + +bool ath12k_fw_feature_supported(struct ath12k_base *ab, enum ath12k_fw_features feat) +{ + return ab->fw.fw_features_valid && test_bit(feat, ab->fw.fw_features); +} diff --git a/drivers/net/wireless/ath/ath12k/fw.h b/drivers/net/wireless/ath/ath12k/fw.h index 273c003eff3b..7afaefed5086 100644 --- a/drivers/net/wireless/ath/ath12k/fw.h +++ b/drivers/net/wireless/ath/ath12k/fw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* - * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_FW_H @@ -32,5 +32,6 @@ enum ath12k_fw_features { void ath12k_fw_map(struct ath12k_base *ab); void ath12k_fw_unmap(struct ath12k_base *ab); +bool ath12k_fw_feature_supported(struct ath12k_base *ab, enum ath12k_fw_features feat); #endif /* ATH12K_FW_H */ diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index cd59ff8e6c7b..12d0f991a47f 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/dma-mapping.h> #include "hal_tx.h" @@ -449,8 +449,8 @@ static u8 *ath12k_hw_qcn9274_rx_desc_mpdu_start_addr2(struct hal_rx_desc *desc) static bool ath12k_hw_qcn9274_rx_desc_is_da_mcbc(struct hal_rx_desc *desc) { - return __le32_to_cpu(desc->u.qcn9274.mpdu_start.info6) & - RX_MPDU_START_INFO6_MCAST_BCAST; + return __le16_to_cpu(desc->u.qcn9274.msdu_end.info5) & + RX_MSDU_END_INFO5_DA_IS_MCBC; } static void ath12k_hw_qcn9274_rx_desc_get_dot11_hdr(struct hal_rx_desc *desc, @@ -511,11 +511,6 @@ static void ath12k_hw_qcn9274_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -552,9 +547,9 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO_STATUS_HP; s = &hal->srng_config[HAL_TCL_DATA]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -566,29 +561,29 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL_STATUS_RING_HP; s = &hal->srng_config[HAL_CE_SRC]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -736,7 +731,6 @@ const struct hal_rx_ops hal_rx_qcn9274_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_ip_cksum_fail, @@ -908,8 +902,8 @@ static u8 *ath12k_hw_qcn9274_compact_rx_desc_mpdu_start_addr2(struct hal_rx_desc static bool ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc(struct hal_rx_desc *desc) { - return __le32_to_cpu(desc->u.qcn9274_compact.mpdu_start.info6) & - RX_MPDU_START_INFO6_MCAST_BCAST; + return __le16_to_cpu(desc->u.qcn9274_compact.msdu_end.info5) & + RX_MSDU_END_INFO5_DA_IS_MCBC; } static void ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr(struct hal_rx_desc *desc, @@ -975,11 +969,6 @@ ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.frame_ctrl); -} - static bool ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done(struct hal_rx_desc *desc) { return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info14, @@ -1080,8 +1069,6 @@ const struct hal_rx_ops hal_rx_qcn9274_compact_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = - ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_ip_cksum_fail, @@ -1330,11 +1317,6 @@ static void ath12k_hw_wcn7850_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.wcn7850.mpdu_start.pn[1]); } -static u16 ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.wcn7850.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -1371,9 +1353,9 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_TCL_DATA]; s->max_rings = 5; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -1386,31 +1368,31 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_CE_SRC]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -1555,7 +1537,6 @@ const struct hal_rx_ops hal_rx_wcn7850_ops = { .rx_desc_is_da_mcbc = ath12k_hw_wcn7850_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_wcn7850_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_wcn7850_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_wcn7850_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_ip_cksum_fail, @@ -1756,7 +1737,7 @@ static void ath12k_hal_srng_src_hw_init(struct ath12k_base *ab, HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB) | u32_encode_bits((srng->entry_size * srng->num_entries), HAL_TCL1_RING_BASE_MSB_RING_SIZE); - ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET, val); + ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET(ab), val); val = u32_encode_bits(srng->entry_size, HAL_REO1_RING_ID_ENTRY_SIZE); ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET(ab), val); diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 94e2e8735958..4adef9240409 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HAL_H @@ -11,6 +11,7 @@ #include "rx_desc.h" struct ath12k_base; +#define HAL_CE_REMAP_REG_BASE (ab->ce_remap_base_addr) #define HAL_LINK_DESC_SIZE (32 << 2) #define HAL_LINK_DESC_ALIGN 128 @@ -21,6 +22,7 @@ struct ath12k_base; #define HAL_MAX_AVAIL_BLK_RES 3 #define HAL_RING_BASE_ALIGN 8 +#define HAL_REO_QLUT_ADDR_ALIGN 256 #define HAL_WBM_IDLE_SCATTER_BUF_SIZE_MAX 32704 /* TODO: Check with hw team on the supported scatter buf size */ @@ -39,15 +41,21 @@ struct ath12k_base; #define HAL_OFFSET_FROM_HP_TO_TP 4 #define HAL_SHADOW_REG(x) (HAL_SHADOW_BASE_ADDR + (4 * (x))) +#define HAL_REO_QDESC_MAX_PEERID 8191 /* WCSS Relative address */ +#define HAL_SEQ_WCSS_CMEM_OFFSET 0x00100000 #define HAL_SEQ_WCSS_UMAC_OFFSET 0x00a00000 #define HAL_SEQ_WCSS_UMAC_REO_REG 0x00a38000 #define HAL_SEQ_WCSS_UMAC_TCL_REG 0x00a44000 -#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG 0x01b80000 -#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG 0x01b81000 -#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG 0x01b82000 -#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG 0x01b83000 +#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_dest_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_dest_reg_base) #define HAL_SEQ_WCSS_UMAC_WBM_REG 0x00a34000 #define HAL_CE_WFSS_CE_REG_BASE 0x01b80000 @@ -57,8 +65,10 @@ struct ath12k_base; /* SW2TCL(x) R0 ring configuration address */ #define HAL_TCL1_RING_CMN_CTRL_REG 0x00000020 #define HAL_TCL1_RING_DSCP_TID_MAP 0x00000240 -#define HAL_TCL1_RING_BASE_LSB 0x00000900 -#define HAL_TCL1_RING_BASE_MSB 0x00000904 +#define HAL_TCL1_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_lsb) +#define HAL_TCL1_RING_BASE_MSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_msb) #define HAL_TCL1_RING_ID(ab) ((ab)->hw_params->regs->hal_tcl1_ring_id) #define HAL_TCL1_RING_MISC(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_misc) @@ -76,30 +86,31 @@ struct ath12k_base; ((ab)->hw_params->regs->hal_tcl1_ring_msi1_base_msb) #define HAL_TCL1_RING_MSI1_DATA(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_msi1_data) -#define HAL_TCL2_RING_BASE_LSB 0x00000978 +#define HAL_TCL2_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl2_ring_base_lsb) #define HAL_TCL_RING_BASE_LSB(ab) \ ((ab)->hw_params->regs->hal_tcl_ring_base_lsb) -#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_DATA(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_BASE_MSB_OFFSET \ - (HAL_TCL1_RING_BASE_MSB - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_ID_OFFSET(ab) \ - (HAL_TCL1_RING_ID(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MISC_OFFSET(ab) \ - (HAL_TCL1_RING_MISC(ab) - HAL_TCL1_RING_BASE_LSB) +#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_DATA(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_ID_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_ID(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MISC_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MISC(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) /* SW2TCL(x) R2 ring pointers (head/tail) address */ #define HAL_TCL1_RING_HP 0x00002000 @@ -132,6 +143,8 @@ struct ath12k_base; #define HAL_REO1_DEST_RING_CTRL_IX_1 0x00000008 #define HAL_REO1_DEST_RING_CTRL_IX_2 0x0000000c #define HAL_REO1_DEST_RING_CTRL_IX_3 0x00000010 +#define HAL_REO1_QDESC_ADDR(ab) ((ab)->hw_params->regs->hal_reo1_qdesc_addr) +#define HAL_REO1_QDESC_MAX_PEERID(ab) ((ab)->hw_params->regs->hal_reo1_qdesc_max_peerid) #define HAL_REO1_SW_COOKIE_CFG0(ab) ((ab)->hw_params->regs->hal_reo1_sw_cookie_cfg0) #define HAL_REO1_SW_COOKIE_CFG1(ab) ((ab)->hw_params->regs->hal_reo1_sw_cookie_cfg1) #define HAL_REO1_QDESC_LUT_BASE0(ab) ((ab)->hw_params->regs->hal_reo1_qdesc_lut_base0) @@ -319,6 +332,8 @@ struct ath12k_base; #define HAL_REO1_SW_COOKIE_CFG_ALIGN BIT(18) #define HAL_REO1_SW_COOKIE_CFG_ENABLE BIT(19) #define HAL_REO1_SW_COOKIE_CFG_GLOBAL_ENABLE BIT(20) +#define HAL_REO_QDESC_ADDR_READ_LUT_ENABLE BIT(7) +#define HAL_REO_QDESC_ADDR_READ_CLEAR_QDESC_ARRAY BIT(6) /* CE ring bit field mask and shift */ #define HAL_CE_DST_R0_DEST_CTRL_MAX_LEN GENMASK(15, 0) @@ -365,6 +380,9 @@ struct ath12k_base; * ath12k_hal_rx_desc_get_err(). */ +#define HAL_IPQ5332_CE_WFSS_REG_BASE 0x740000 +#define HAL_IPQ5332_CE_SIZE 0x100000 + enum hal_srng_ring_id { HAL_SRNG_RING_ID_REO2SW0 = 0, HAL_SRNG_RING_ID_REO2SW1, @@ -1068,7 +1086,6 @@ struct hal_rx_ops { bool (*rx_desc_is_da_mcbc)(struct hal_rx_desc *desc); void (*rx_desc_get_dot11_hdr)(struct hal_rx_desc *desc, struct ieee80211_hdr *hdr); - u16 (*rx_desc_get_mpdu_frame_ctl)(struct hal_rx_desc *desc); void (*rx_desc_get_crypto_header)(struct hal_rx_desc *desc, u8 *crypto_hdr, enum hal_encrypt_type enctype); @@ -1154,4 +1171,5 @@ int ath12k_hal_srng_update_shadow_config(struct ath12k_base *ab, void ath12k_hal_srng_shadow_config(struct ath12k_base *ab); void ath12k_hal_srng_shadow_update_hp_tp(struct ath12k_base *ab, struct hal_srng *srng); +void ath12k_hal_reo_shared_qaddr_cache_clear(struct ath12k_base *ab); #endif diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index 3e8983b85de8..49eededbfa9d 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -1284,11 +1284,13 @@ enum hal_tcl_encap_type { HAL_TCL_ENCAP_TYPE_NATIVE_WIFI, HAL_TCL_ENCAP_TYPE_ETHERNET, HAL_TCL_ENCAP_TYPE_802_3 = 3, + HAL_TCL_ENCAP_TYPE_MAX }; enum hal_tcl_desc_type { HAL_TCL_DESC_TYPE_BUFFER, HAL_TCL_DESC_TYPE_EXT_DESC, + HAL_TCL_DESC_TYPE_MAX, }; enum hal_wbm_htt_tx_comp_status { @@ -1298,6 +1300,7 @@ enum hal_wbm_htt_tx_comp_status { HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ, HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT, HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY, + HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH, HAL_WBM_REL_HTT_TX_COMP_STATUS_MAX, }; @@ -1998,6 +2001,7 @@ struct hal_wbm_release_ring_cc_rx { #define HAL_WBM_RELEASE_INFO3_CONTINUATION BIT(2) #define HAL_WBM_RELEASE_INFO5_LOOPING_COUNT GENMASK(31, 28) +#define HAL_ENCRYPT_TYPE_MAX 12 struct hal_wbm_release_ring { struct ath12k_buffer_addr buf_addr_info; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.c b/drivers/net/wireless/ath/ath12k/hal_rx.c index ac17d6223fa7..98eeccc68fcd 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.c +++ b/drivers/net/wireless/ath/ath12k/hal_rx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "debug.h" @@ -851,3 +851,20 @@ void ath12k_hal_reo_hw_setup(struct ath12k_base *ab, u32 ring_hash_map) ath12k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3, ring_hash_map); } + +void ath12k_hal_reo_shared_qaddr_cache_clear(struct ath12k_base *ab) +{ + u32 val; + + lockdep_assert_held(&ab->base_lock); + val = ath12k_hif_read32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + + HAL_REO1_QDESC_ADDR(ab)); + + val |= u32_encode_bits(1, HAL_REO_QDESC_ADDR_READ_CLEAR_QDESC_ARRAY); + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + + HAL_REO1_QDESC_ADDR(ab), val); + + val &= ~HAL_REO_QDESC_ADDR_READ_CLEAR_QDESC_ARRAY; + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + + HAL_REO1_QDESC_ADDR(ab), val); +} diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 6bdcd0867d86..c753eb2a03ad 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -108,11 +108,12 @@ enum hal_rx_mon_status { HAL_RX_MON_STATUS_PPDU_DONE, HAL_RX_MON_STATUS_BUF_DONE, HAL_RX_MON_STATUS_BUF_ADDR, + HAL_RX_MON_STATUS_MPDU_START, HAL_RX_MON_STATUS_MPDU_END, HAL_RX_MON_STATUS_MSDU_END, }; -#define HAL_RX_MAX_MPDU 256 +#define HAL_RX_MAX_MPDU 1024 #define HAL_RX_NUM_WORDS_PER_PPDU_BITMAP (HAL_RX_MAX_MPDU >> 5) struct hal_rx_user_status { @@ -506,6 +507,18 @@ struct hal_rx_mpdu_start { __le32 rsvd2[16]; } __packed; +struct hal_rx_msdu_end { + __le32 info0; + __le32 rsvd0[9]; + __le16 info00; + __le16 info01; + __le32 rsvd00[8]; + __le32 info1; + __le32 rsvd1[10]; + __le32 info2; + __le32 rsvd2; +} __packed; + #define HAL_RX_PPDU_END_DURATION GENMASK(23, 0) struct hal_rx_ppdu_end_duration { __le32 rsvd0[9]; diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index a106ebed7870..a46d82857c5d 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/types.h> @@ -535,6 +535,217 @@ static const struct service_to_pipe ath12k_target_service_to_ce_map_wlan_wcn7850 }, }; +static const struct ce_pipe_config ath12k_target_ce_config_wlan_ipq5332[] = { + /* host->target HTC control and raw streams */ + { + .pipenum = __cpu_to_le32(0), + .pipedir = __cpu_to_le32(PIPEDIR_OUT), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* target->host HTT */ + { + .pipenum = __cpu_to_le32(1), + .pipedir = __cpu_to_le32(PIPEDIR_IN), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* target->host WMI + HTC control */ + { + .pipenum = __cpu_to_le32(2), + .pipedir = __cpu_to_le32(PIPEDIR_IN), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* host->target WMI */ + { + .pipenum = __cpu_to_le32(3), + .pipedir = __cpu_to_le32(PIPEDIR_OUT), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* host->target HTT */ + { + .pipenum = __cpu_to_le32(4), + .pipedir = __cpu_to_le32(PIPEDIR_OUT), + .nentries = __cpu_to_le32(256), + .nbytes_max = __cpu_to_le32(256), + .flags = __cpu_to_le32(CE_ATTR_FLAGS | CE_ATTR_DIS_INTR), + .reserved = __cpu_to_le32(0), + }, + /* Target -> host PKTLOG */ + { + .pipenum = __cpu_to_le32(5), + .pipedir = __cpu_to_le32(PIPEDIR_IN), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* Reserved for target autonomous HIF_memcpy */ + { + .pipenum = __cpu_to_le32(6), + .pipedir = __cpu_to_le32(PIPEDIR_INOUT), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(16384), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* CE7 Reserved for CV Prefetch */ + { + .pipenum = __cpu_to_le32(7), + .pipedir = __cpu_to_le32(PIPEDIR_OUT), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* CE8 Reserved for target generic HIF memcpy */ + { + .pipenum = __cpu_to_le32(8), + .pipedir = __cpu_to_le32(PIPEDIR_INOUT), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(16384), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* CE9 WMI logging/CFR/Spectral/Radar/ */ + { + .pipenum = __cpu_to_le32(9), + .pipedir = __cpu_to_le32(PIPEDIR_IN), + .nentries = __cpu_to_le32(32), + .nbytes_max = __cpu_to_le32(2048), + .flags = __cpu_to_le32(CE_ATTR_FLAGS), + .reserved = __cpu_to_le32(0), + }, + /* Unused TBD */ + { + .pipenum = __cpu_to_le32(10), + .pipedir = __cpu_to_le32(PIPEDIR_NONE), + .nentries = __cpu_to_le32(0), + .nbytes_max = __cpu_to_le32(0), + .flags = __cpu_to_le32(0), + .reserved = __cpu_to_le32(0), + }, + /* Unused TBD */ + { + .pipenum = __cpu_to_le32(11), + .pipedir = __cpu_to_le32(PIPEDIR_NONE), + .nentries = __cpu_to_le32(0), + .nbytes_max = __cpu_to_le32(0), + .flags = __cpu_to_le32(0), + .reserved = __cpu_to_le32(0), + }, +}; + +static const struct service_to_pipe ath12k_target_service_to_ce_map_wlan_ipq5332[] = { + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_VO), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(3), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_VO), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(2), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_BK), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(3), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_BK), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(2), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_BE), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(3), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_BE), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(2), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_VI), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(3), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_DATA_VI), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(2), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_CONTROL), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(3), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_CONTROL), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(2), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_RSVD_CTRL), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(0), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_RSVD_CTRL), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(1), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_TEST_RAW_STREAMS), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(0), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_TEST_RAW_STREAMS), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(1), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_HTT_DATA_MSG), + __cpu_to_le32(PIPEDIR_OUT), + __cpu_to_le32(4), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_HTT_DATA_MSG), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(1), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_PKT_LOG), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(5), + }, + { + __cpu_to_le32(ATH12K_HTC_SVC_ID_WMI_CONTROL_DIAG), + __cpu_to_le32(PIPEDIR_IN), + __cpu_to_le32(9), + }, + /* (Additions here) */ + + { /* must be last */ + __cpu_to_le32(0), + __cpu_to_le32(0), + __cpu_to_le32(0), + }, +}; + static const struct ath12k_hw_ring_mask ath12k_hw_ring_mask_qcn9274 = { .tx = { ATH12K_TX_RING_MASK_0, @@ -577,6 +788,46 @@ static const struct ath12k_hw_ring_mask ath12k_hw_ring_mask_qcn9274 = { }, }; +static const struct ath12k_hw_ring_mask ath12k_hw_ring_mask_ipq5332 = { + .tx = { + ATH12K_TX_RING_MASK_0, + ATH12K_TX_RING_MASK_1, + ATH12K_TX_RING_MASK_2, + ATH12K_TX_RING_MASK_3, + }, + .rx_mon_dest = { + 0, 0, 0, 0, 0, 0, 0, 0, + ATH12K_RX_MON_RING_MASK_0, + }, + .rx = { + 0, 0, 0, 0, + ATH12K_RX_RING_MASK_0, + ATH12K_RX_RING_MASK_1, + ATH12K_RX_RING_MASK_2, + ATH12K_RX_RING_MASK_3, + }, + .rx_err = { + 0, 0, 0, + ATH12K_RX_ERR_RING_MASK_0, + }, + .rx_wbm_rel = { + 0, 0, 0, + ATH12K_RX_WBM_REL_RING_MASK_0, + }, + .reo_status = { + 0, 0, 0, + ATH12K_REO_STATUS_RING_MASK_0, + }, + .host2rxdma = { + 0, 0, 0, + ATH12K_HOST2RXDMA_RING_MASK_0, + }, + .tx_mon_dest = { + ATH12K_TX_MON_RING_MASK_0, + ATH12K_TX_MON_RING_MASK_1, + }, +}; + static const struct ath12k_hw_ring_mask ath12k_hw_ring_mask_wcn7850 = { .tx = { ATH12K_TX_RING_MASK_0, @@ -619,6 +870,9 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -681,6 +935,12 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -695,6 +955,9 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -734,6 +997,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_reo1_sw_cookie_cfg1 = 0x00000070, .hal_reo1_qdesc_lut_base0 = 0x00000074, .hal_reo1_qdesc_lut_base1 = 0x00000078, + .hal_reo1_qdesc_addr = 0x0000007c, + .hal_reo1_qdesc_max_peerid = 0x00000088, .hal_reo1_ring_base_lsb = 0x00000500, .hal_reo1_ring_base_msb = 0x00000504, .hal_reo1_ring_id = 0x00000508, @@ -761,6 +1026,100 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000aa0, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, +}; + +static const struct ath12k_hw_regs ipq5332_regs = { + /* SW2TCL(x) R0 ring configuration address */ + .hal_tcl1_ring_id = 0x00000918, + .hal_tcl1_ring_misc = 0x00000920, + .hal_tcl1_ring_tp_addr_lsb = 0x0000092c, + .hal_tcl1_ring_tp_addr_msb = 0x00000930, + .hal_tcl1_ring_consumer_int_setup_ix0 = 0x00000940, + .hal_tcl1_ring_consumer_int_setup_ix1 = 0x00000944, + .hal_tcl1_ring_msi1_base_lsb = 0x00000958, + .hal_tcl1_ring_msi1_base_msb = 0x0000095c, + .hal_tcl1_ring_base_lsb = 0x00000910, + .hal_tcl1_ring_base_msb = 0x00000914, + .hal_tcl1_ring_msi1_data = 0x00000960, + .hal_tcl2_ring_base_lsb = 0x00000988, + .hal_tcl_ring_base_lsb = 0x00000b68, + + /* TCL STATUS ring address */ + .hal_tcl_status_ring_base_lsb = 0x00000d48, + + /* REO DEST ring address */ + .hal_reo2_ring_base = 0x00000578, + .hal_reo1_misc_ctrl_addr = 0x00000b9c, + .hal_reo1_sw_cookie_cfg0 = 0x0000006c, + .hal_reo1_sw_cookie_cfg1 = 0x00000070, + .hal_reo1_qdesc_lut_base0 = 0x00000074, + .hal_reo1_qdesc_lut_base1 = 0x00000078, + .hal_reo1_ring_base_lsb = 0x00000500, + .hal_reo1_ring_base_msb = 0x00000504, + .hal_reo1_ring_id = 0x00000508, + .hal_reo1_ring_misc = 0x00000510, + .hal_reo1_ring_hp_addr_lsb = 0x00000514, + .hal_reo1_ring_hp_addr_msb = 0x00000518, + .hal_reo1_ring_producer_int_setup = 0x00000524, + .hal_reo1_ring_msi1_base_lsb = 0x00000548, + .hal_reo1_ring_msi1_base_msb = 0x0000054C, + .hal_reo1_ring_msi1_data = 0x00000550, + .hal_reo1_aging_thres_ix0 = 0x00000B28, + .hal_reo1_aging_thres_ix1 = 0x00000B2C, + .hal_reo1_aging_thres_ix2 = 0x00000B30, + .hal_reo1_aging_thres_ix3 = 0x00000B34, + + /* REO Exception ring address */ + .hal_reo2_sw0_ring_base = 0x000008c0, + + /* REO Reinject ring address */ + .hal_sw2reo_ring_base = 0x00000320, + .hal_sw2reo1_ring_base = 0x00000398, + + /* REO cmd ring address */ + .hal_reo_cmd_ring_base = 0x000002A8, + + /* REO status ring address */ + .hal_reo_status_ring_base = 0x00000aa0, + + /* WBM idle link ring address */ + .hal_wbm_idle_ring_base_lsb = 0x00000d3c, + .hal_wbm_idle_ring_misc_addr = 0x00000d4c, + .hal_wbm_r0_idle_list_cntl_addr = 0x00000240, + .hal_wbm_r0_idle_list_size_addr = 0x00000244, + .hal_wbm_scattered_ring_base_lsb = 0x00000250, + .hal_wbm_scattered_ring_base_msb = 0x00000254, + .hal_wbm_scattered_desc_head_info_ix0 = 0x00000260, + .hal_wbm_scattered_desc_head_info_ix1 = 0x00000264, + .hal_wbm_scattered_desc_tail_info_ix0 = 0x00000270, + .hal_wbm_scattered_desc_tail_info_ix1 = 0x00000274, + .hal_wbm_scattered_desc_ptr_hp_addr = 0x0000027c, + + /* SW2WBM release ring address */ + .hal_wbm_sw_release_ring_base_lsb = 0x0000037c, + + /* WBM2SW release ring address */ + .hal_wbm0_release_ring_base_lsb = 0x00000e08, + .hal_wbm1_release_ring_base_lsb = 0x00000e80, + + /* PPE release ring address */ + .hal_ppe_rel_ring_base = 0x0000046c, + + /* CE address */ + .hal_umac_ce0_src_reg_base = 0x00740000 - + HAL_IPQ5332_CE_WFSS_REG_BASE, + .hal_umac_ce0_dest_reg_base = 0x00741000 - + HAL_IPQ5332_CE_WFSS_REG_BASE, + .hal_umac_ce1_src_reg_base = 0x00742000 - + HAL_IPQ5332_CE_WFSS_REG_BASE, + .hal_umac_ce1_dest_reg_base = 0x00743000 - + HAL_IPQ5332_CE_WFSS_REG_BASE, }; static const struct ath12k_hw_regs wcn7850_regs = { @@ -775,6 +1134,9 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -837,6 +1199,12 @@ static const struct ath12k_hw_regs wcn7850_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { @@ -856,6 +1224,26 @@ static const struct ath12k_hw_hal_params ath12k_hw_hal_params_wcn7850 = { HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW4_EN, }; +static const struct ath12k_hw_hal_params ath12k_hw_hal_params_ipq5332 = { + .rx_buf_rbm = HAL_RX_BUF_RBM_SW3_BM, + .wbm2sw_cc_enable = HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW0_EN | + HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW1_EN | + HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW2_EN | + HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW3_EN | + HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW4_EN, +}; + +static const struct ce_ie_addr ath12k_ce_ie_addr_ipq5332 = { + .ie1_reg_addr = CE_HOST_IE_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, + .ie2_reg_addr = CE_HOST_IE_2_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, + .ie3_reg_addr = CE_HOST_IE_3_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, +}; + +static const struct ce_remap ath12k_ce_remap_ipq5332 = { + .base = HAL_IPQ5332_CE_WFSS_REG_BASE, + .size = HAL_IPQ5332_CE_SIZE, +}; + static const struct ath12k_hw_params ath12k_hw_params[] = { { .name = "qcn9274 hw1.0", @@ -864,6 +1252,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .dir = "QCN9274/hw1.0", .board_size = 256 * 1024, .cal_offset = 128 * 1024, + .m3_loader = ath12k_m3_fw_loader_driver, }, .max_radios = 1, .single_pdev_only = false, @@ -899,7 +1288,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .download_calib = true, .supports_suspend = false, .tcl_ring_retry = true, - .reoq_lut_support = false, + .reoq_lut_support = true, .supports_shadow_regs = false, .num_tcl_banks = 48, @@ -932,6 +1321,14 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .iova_mask = 0, .supports_aspm = false, + + .ce_ie_addr = NULL, + .ce_remap = NULL, + .bdf_addr_offset = 0, + + .current_cc_support = false, + + .dp_primary_link_only = true, }, { .name = "wcn7850 hw2.0", @@ -941,6 +1338,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .dir = "WCN7850/hw2.0", .board_size = 256 * 1024, .cal_offset = 256 * 1024, + .m3_loader = ath12k_m3_fw_loader_driver, }, .max_radios = 1, @@ -1012,6 +1410,14 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .iova_mask = ATH12K_PCIE_MAX_PAYLOAD_SIZE - 1, .supports_aspm = true, + + .ce_ie_addr = NULL, + .ce_remap = NULL, + .bdf_addr_offset = 0, + + .current_cc_support = true, + + .dp_primary_link_only = false, }, { .name = "qcn9274 hw2.0", @@ -1020,6 +1426,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .dir = "QCN9274/hw2.0", .board_size = 256 * 1024, .cal_offset = 128 * 1024, + .m3_loader = ath12k_m3_fw_loader_driver, }, .max_radios = 2, .single_pdev_only = false, @@ -1049,7 +1456,7 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_AP_VLAN), - .supports_monitor = false, + .supports_monitor = true, .idle_ps = false, .download_calib = true, @@ -1088,6 +1495,92 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .iova_mask = 0, .supports_aspm = false, + + .ce_ie_addr = NULL, + .ce_remap = NULL, + .bdf_addr_offset = 0, + + .current_cc_support = false, + + .dp_primary_link_only = true, + }, + { + .name = "ipq5332 hw1.0", + .hw_rev = ATH12K_HW_IPQ5332_HW10, + .fw = { + .dir = "IPQ5332/hw1.0", + .board_size = 256 * 1024, + .cal_offset = 128 * 1024, + .m3_loader = ath12k_m3_fw_loader_remoteproc, + }, + .max_radios = 1, + .single_pdev_only = false, + .qmi_service_ins_id = ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ5332, + .internal_sleep_clock = false, + + .hw_ops = &qcn9274_ops, + .regs = &ipq5332_regs, + .ring_mask = &ath12k_hw_ring_mask_ipq5332, + + .host_ce_config = ath12k_host_ce_config_ipq5332, + .ce_count = 12, + .target_ce_config = ath12k_target_ce_config_wlan_ipq5332, + .target_ce_count = 12, + .svc_to_ce_map = ath12k_target_service_to_ce_map_wlan_ipq5332, + .svc_to_ce_map_len = 18, + + .hal_params = &ath12k_hw_hal_params_ipq5332, + + .rxdma1_enable = false, + .num_rxdma_per_pdev = 1, + .num_rxdma_dst_ring = 0, + .rx_mac_buf_ring = false, + .vdev_start_delay = false, + + .interface_modes = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_MESH_POINT), + .supports_monitor = false, + + .idle_ps = false, + .download_calib = true, + .supports_suspend = false, + .tcl_ring_retry = true, + .reoq_lut_support = false, + .supports_shadow_regs = false, + + .num_tcl_banks = 48, + .max_tx_ring = 4, + + .wmi_init = &ath12k_wmi_init_qcn9274, + + .hal_ops = &hal_qcn9274_ops, + + .qmi_cnss_feature_bitmap = BIT(CNSS_QDSS_CFG_MISS_V01), + + .rfkill_pin = 0, + .rfkill_cfg = 0, + .rfkill_on_level = 0, + + .rddm_size = 0, + + .def_num_link = 0, + .max_mlo_peer = 256, + + .otp_board_id_register = 0, + + .supports_sta_ps = false, + + .acpi_guid = NULL, + .supports_dynamic_smps_6ghz = false, + .iova_mask = 0, + .supports_aspm = false, + + .ce_ie_addr = &ath12k_ce_ie_addr_ipq5332, + .ce_remap = &ath12k_ce_remap_ipq5332, + .bdf_addr_offset = 0xC00000, + + .dp_primary_link_only = true, }, }; diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 8d52182e28ae..024cfcd2cc15 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HW_H @@ -97,6 +97,7 @@ #define ATH12K_REGDB_FILE_NAME "regdb.bin" #define ATH12K_PCIE_MAX_PAYLOAD_SIZE 128 +#define ATH12K_IPQ5332_USERPD_ID 1 enum ath12k_hw_rate_cck { ATH12K_HW_RATE_CCK_LP_11M = 0, @@ -121,6 +122,7 @@ enum ath12k_hw_rate_ofdm { enum ath12k_bus { ATH12K_BUS_PCI, + ATH12K_BUS_AHB, }; #define ATH12K_EXT_IRQ_GRP_NUM_MAX 11 @@ -146,6 +148,11 @@ struct ath12k_hw_hal_params { u32 wbm2sw_cc_enable; }; +enum ath12k_m3_fw_loaders { + ath12k_m3_fw_loader_driver, + ath12k_m3_fw_loader_remoteproc, +}; + struct ath12k_hw_params { const char *name; u16 hw_rev; @@ -154,6 +161,7 @@ struct ath12k_hw_params { const char *dir; size_t board_size; size_t cal_offset; + enum ath12k_m3_fw_loaders m3_loader; } fw; u8 max_radios; @@ -190,6 +198,7 @@ struct ath12k_hw_params { bool reoq_lut_support:1; bool supports_shadow_regs:1; bool supports_aspm:1; + bool current_cc_support:1; u32 num_tcl_banks; u32 max_tx_ring; @@ -220,6 +229,13 @@ struct ath12k_hw_params { bool supports_dynamic_smps_6ghz; u32 iova_mask; + + const struct ce_ie_addr *ce_ie_addr; + const struct ce_remap *ce_remap; + u32 bdf_addr_offset; + + /* setup REO queue, frag etc only for primary link peer */ + bool dp_primary_link_only:1; }; struct ath12k_hw_ops { @@ -293,9 +309,15 @@ struct ath12k_hw_regs { u32 hal_tcl1_ring_msi1_base_msb; u32 hal_tcl1_ring_msi1_data; u32 hal_tcl_ring_base_lsb; + u32 hal_tcl1_ring_base_lsb; + u32 hal_tcl1_ring_base_msb; + u32 hal_tcl2_ring_base_lsb; u32 hal_tcl_status_ring_base_lsb; + u32 hal_reo1_qdesc_addr; + u32 hal_reo1_qdesc_max_peerid; + u32 hal_wbm_idle_ring_base_lsb; u32 hal_wbm_idle_ring_misc_addr; u32 hal_wbm_r0_idle_list_cntl_addr; @@ -316,6 +338,11 @@ struct ath12k_hw_regs { u32 pcie_qserdes_sysclk_en_sel; u32 pcie_pcs_osc_dtct_config_base; + u32 hal_umac_ce0_src_reg_base; + u32 hal_umac_ce0_dest_reg_base; + u32 hal_umac_ce1_src_reg_base; + u32 hal_umac_ce1_dest_reg_base; + u32 hal_ppe_rel_ring_base; u32 hal_reo2_ring_base; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index dfa05f0ee6c9..4dae941c9615 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -582,12 +582,26 @@ static int ath12k_mac_vif_link_chan(struct ieee80211_vif *vif, u8 link_id, static struct ath12k_link_vif *ath12k_mac_get_tx_arvif(struct ath12k_link_vif *arvif) { + struct ieee80211_bss_conf *link_conf, *tx_bss_conf; + struct ath12k *ar = arvif->ar; struct ath12k_vif *tx_ahvif; - if (arvif->ahvif->vif->mbssid_tx_vif) { - tx_ahvif = ath12k_vif_to_ahvif(arvif->ahvif->vif->mbssid_tx_vif); - if (tx_ahvif) - return &tx_ahvif->deflink; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ar->ab, + "unable to access bss link conf for link %u required to retrieve transmitting link conf\n", + arvif->link_id); + return NULL; + } + + tx_bss_conf = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + link_conf->tx_bss_conf); + if (tx_bss_conf) { + tx_ahvif = ath12k_vif_to_ahvif(tx_bss_conf->vif); + return wiphy_dereference(tx_ahvif->ah->hw->wiphy, + tx_ahvif->link[tx_bss_conf->link_id]); } return NULL; @@ -874,12 +888,12 @@ static bool ath12k_mac_band_match(enum nl80211_band band1, enum WMI_HOST_WLAN_BA { switch (band1) { case NL80211_BAND_2GHZ: - if (band2 & WMI_HOST_WLAN_2G_CAP) + if (band2 & WMI_HOST_WLAN_2GHZ_CAP) return true; break; case NL80211_BAND_5GHZ: case NL80211_BAND_6GHZ: - if (band2 & WMI_HOST_WLAN_5G_CAP) + if (band2 & WMI_HOST_WLAN_5GHZ_CAP) return true; break; default: @@ -980,7 +994,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "txpower to set in hw %d\n", txpower / 2); - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) && ar->txpower_limit_2g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT2G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -990,7 +1004,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ar->txpower_limit_2g = txpower; } - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) && ar->txpower_limit_5g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT5G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -1245,61 +1259,6 @@ static int ath12k_mac_monitor_vdev_stop(struct ath12k *ar) return ret; } -static int ath12k_mac_monitor_vdev_create(struct ath12k *ar) -{ - struct ath12k_pdev *pdev = ar->pdev; - struct ath12k_wmi_vdev_create_arg arg = {}; - int bit, ret; - u8 tmp_addr[6]; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - if (ar->monitor_vdev_created) - return 0; - - if (ar->ab->free_vdev_map == 0) { - ath12k_warn(ar->ab, "failed to find free vdev id for monitor vdev\n"); - return -ENOMEM; - } - - bit = __ffs64(ar->ab->free_vdev_map); - - ar->monitor_vdev_id = bit; - - arg.if_id = ar->monitor_vdev_id; - arg.type = WMI_VDEV_TYPE_MONITOR; - arg.subtype = WMI_VDEV_SUBTYPE_NONE; - arg.pdev_id = pdev->pdev_id; - arg.if_stats_id = ATH12K_INVAL_VDEV_STATS_ID; - - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { - arg.chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; - arg.chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; - } - - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { - arg.chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; - arg.chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; - } - - ret = ath12k_wmi_vdev_create(ar, tmp_addr, &arg); - if (ret) { - ath12k_warn(ar->ab, "failed to request monitor vdev %i creation: %d\n", - ar->monitor_vdev_id, ret); - ar->monitor_vdev_id = -1; - return ret; - } - - ar->allocated_vdev_map |= 1LL << ar->monitor_vdev_id; - ar->ab->free_vdev_map &= ~(1LL << ar->monitor_vdev_id); - ar->num_created_vdevs++; - ar->monitor_vdev_created = true; - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac monitor vdev %d created\n", - ar->monitor_vdev_id); - - return 0; -} - static int ath12k_mac_monitor_vdev_delete(struct ath12k *ar) { int ret; @@ -1336,19 +1295,9 @@ static int ath12k_mac_monitor_vdev_delete(struct ath12k *ar) return ret; } -static void -ath12k_mac_get_any_chandef_iter(struct ieee80211_hw *hw, - struct ieee80211_chanctx_conf *conf, - void *data) -{ - struct cfg80211_chan_def **def = data; - - *def = &conf->def; -} - static int ath12k_mac_monitor_start(struct ath12k *ar) { - struct cfg80211_chan_def *chandef = NULL; + struct ath12k_mac_get_any_chanctx_conf_arg arg; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -1356,25 +1305,33 @@ static int ath12k_mac_monitor_start(struct ath12k *ar) if (ar->monitor_started) return 0; + arg.ar = ar; + arg.chanctx_conf = NULL; ieee80211_iter_chan_contexts_atomic(ath12k_ar_to_hw(ar), - ath12k_mac_get_any_chandef_iter, - &chandef); - if (!chandef) + ath12k_mac_get_any_chanctx_conf_iter, + &arg); + if (!arg.chanctx_conf) return 0; - ret = ath12k_mac_monitor_vdev_start(ar, ar->monitor_vdev_id, chandef); + ret = ath12k_mac_monitor_vdev_start(ar, ar->monitor_vdev_id, + &arg.chanctx_conf->def); if (ret) { ath12k_warn(ar->ab, "failed to start monitor vdev: %d\n", ret); - ath12k_mac_monitor_vdev_delete(ar); + return ret; + } + + ret = ath12k_dp_tx_htt_monitor_mode_ring_config(ar, false); + if (ret) { + ath12k_warn(ar->ab, "fail to set monitor filter: %d\n", ret); return ret; } ar->monitor_started = true; ar->num_started_vdevs++; - ret = ath12k_dp_tx_htt_monitor_mode_ring_config(ar, false); - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac monitor started ret %d\n", ret); - return ret; + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac monitor started\n"); + + return 0; } static int ath12k_mac_monitor_stop(struct ath12k *ar) @@ -1440,58 +1397,9 @@ err: return ret; } -static int ath12k_mac_config(struct ath12k *ar, u32 changed) -{ - struct ieee80211_hw *hw = ath12k_ar_to_hw(ar); - struct ieee80211_conf *conf = &hw->conf; - int ret = 0; - - lockdep_assert_wiphy(hw->wiphy); - - if (changed & IEEE80211_CONF_CHANGE_MONITOR) { - ar->monitor_conf_enabled = conf->flags & IEEE80211_CONF_MONITOR; - if (ar->monitor_conf_enabled) { - if (ar->monitor_vdev_created) - return ret; - ret = ath12k_mac_monitor_vdev_create(ar); - if (ret) - return ret; - ret = ath12k_mac_monitor_start(ar); - if (ret) - goto err_mon_del; - } else { - if (!ar->monitor_vdev_created) - return ret; - ret = ath12k_mac_monitor_stop(ar); - if (ret) - return ret; - ath12k_mac_monitor_vdev_delete(ar); - } - } - - return ret; - -err_mon_del: - ath12k_mac_monitor_vdev_delete(ar); - return ret; -} - static int ath12k_mac_op_config(struct ieee80211_hw *hw, u32 changed) { - struct ath12k_hw *ah = ath12k_hw_to_ah(hw); - struct ath12k *ar; - int ret; - - lockdep_assert_wiphy(hw->wiphy); - - ar = ath12k_ah_to_ar(ah, 0); - - ret = ath12k_mac_config(ar, changed); - if (ret) - ath12k_warn(ar->ab, "failed to update config pdev idx %d: %d\n", - ar->pdev_idx, ret); - - return ret; + return 0; } static int ath12k_mac_setup_bcn_p2p_ie(struct ath12k_link_vif *arvif, @@ -3377,6 +3285,11 @@ static void ath12k_bss_assoc(struct ath12k *ar, if (ret) ath12k_warn(ar->ab, "failed to set vdev %i OBSS PD parameters: %d\n", arvif->vdev_id, ret); + + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ar->ab->wmi_ab.svc_map) && + ahvif->vdev_type == WMI_VDEV_TYPE_STA && + ahvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE) + ath12k_mac_11d_scan_stop_all(ar->ab); } static void ath12k_bss_disassoc(struct ath12k *ar, @@ -3450,7 +3363,10 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar, } sband = hw->wiphy->bands[def->chan->band]; - basic_rate_idx = ffs(bss_conf->basic_rates) - 1; + if (bss_conf->basic_rates) + basic_rate_idx = __ffs(bss_conf->basic_rates); + else + basic_rate_idx = 0; bitrate = sband->bitrates[basic_rate_idx].bitrate; hw_rate_code = ath12k_mac_get_rate_hw_value(bitrate); @@ -3495,6 +3411,9 @@ static void ath12k_mac_init_arvif(struct ath12k_vif *ahvif, arvif->ahvif = ahvif; arvif->link_id = _link_id; + /* Protects the datapath stats update on a per link basis */ + spin_lock_init(&arvif->link_stats_lock); + INIT_LIST_HEAD(&arvif->list); INIT_DELAYED_WORK(&arvif->connection_loss_work, ath12k_mac_vif_sta_connection_loss_work); @@ -3534,6 +3453,11 @@ static void ath12k_mac_remove_link_interface(struct ieee80211_hw *hw, ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac remove link interface (vdev %d link id %d)", arvif->vdev_id, arvif->link_id); + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ar->ab->wmi_ab.svc_map) && + ahvif->vdev_type == WMI_VDEV_TYPE_STA && + ahvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE) + ath12k_mac_11d_scan_stop(ar); + if (ahvif->vdev_type == WMI_VDEV_TYPE_AP) { ret = ath12k_peer_delete(ar, arvif->vdev_id, arvif->bssid); if (ret) @@ -3567,6 +3491,8 @@ static struct ath12k_link_vif *ath12k_mac_assign_link_vif(struct ath12k_hw *ah, */ if (!ahvif->links_map && link_id != ATH12K_DEFAULT_SCAN_LINK) { arvif = &ahvif->deflink; + if (vif->type == NL80211_IFTYPE_STATION) + arvif->is_sta_assoc_link = true; } else { arvif = (struct ath12k_link_vif *) kzalloc(sizeof(struct ath12k_link_vif), GFP_KERNEL); @@ -3702,6 +3628,8 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, unsigned long links = ahvif->links_map; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; + struct ieee80211_sta *sta; + struct ath12k_sta *ahsta; struct ath12k *ar; u8 link_id; @@ -3714,6 +3642,35 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_ASSOC) { + if (vif->cfg.assoc) { + /* only in station mode we can get here, so it's safe + * to use ap_addr + */ + rcu_read_lock(); + sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); + if (!sta) { + rcu_read_unlock(); + WARN_ONCE(1, "failed to find sta with addr %pM\n", + vif->cfg.ap_addr); + return; + } + + ahsta = ath12k_sta_to_ahsta(sta); + arvif = wiphy_dereference(hw->wiphy, + ahvif->link[ahsta->assoc_link_id]); + rcu_read_unlock(); + + ar = arvif->ar; + /* there is no reason for which an assoc link's + * bss info does not exist + */ + info = ath12k_mac_get_link_bss_conf(arvif); + ath12k_bss_assoc(ar, arvif, info); + + /* exclude assoc link as it is done above */ + links &= ~BIT(ahsta->assoc_link_id); + } + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); if (!arvif || !arvif->ar) @@ -3983,12 +3940,16 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, band = def.chan->band; mcast_rate = info->mcast_rate[band]; - if (mcast_rate > 0) + if (mcast_rate > 0) { rateidx = mcast_rate - 1; - else - rateidx = ffs(info->basic_rates) - 1; + } else { + if (info->basic_rates) + rateidx = __ffs(info->basic_rates); + else + rateidx = 0; + } - if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) + if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) rateidx += ATH12K_MAC_FIRST_OFDM_RATE_IDX; bitrate = ath12k_legacy_rates[rateidx].bitrate; @@ -4162,9 +4123,9 @@ ath12k_mac_select_scan_device(struct ieee80211_hw *hw, * split the hw request and perform multiple scans */ - if (center_freq < ATH12K_MIN_5G_FREQ) + if (center_freq < ATH12K_MIN_5GHZ_FREQ) band = NL80211_BAND_2GHZ; - else if (center_freq < ATH12K_MIN_6G_FREQ) + else if (center_freq < ATH12K_MIN_6GHZ_FREQ) band = NL80211_BAND_5GHZ; else band = NL80211_BAND_6GHZ; @@ -4194,7 +4155,7 @@ void __ath12k_mac_scan_finish(struct ath12k *ar) fallthrough; case ATH12K_SCAN_STARTING: cancel_delayed_work(&ar->scan.timeout); - complete(&ar->scan.completed); + complete_all(&ar->scan.completed); wiphy_work_queue(ar->ah->hw->wiphy, &ar->scan.vdev_clean_wk); break; } @@ -4376,6 +4337,145 @@ static int ath12k_start_scan(struct ath12k *ar, return 0; } +int ath12k_mac_get_fw_stats(struct ath12k *ar, + struct ath12k_fw_stats_req_params *param) +{ + struct ath12k_base *ab = ar->ab; + struct ath12k_hw *ah = ath12k_ar_to_ah(ar); + unsigned long timeout, time_left; + int ret; + + guard(mutex)(&ah->hw_mutex); + + if (ah->state != ATH12K_HW_STATE_ON) + return -ENETDOWN; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + timeout = jiffies + msecs_to_jiffies(3 * 1000); + ath12k_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + + ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id, + param->vdev_id, param->pdev_id); + + if (ret) { + ath12k_warn(ab, "failed to request fw stats: %d\n", ret); + return ret; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "get fw stat pdev id %d vdev id %d stats id 0x%x\n", + param->pdev_id, param->vdev_id, param->stats_id); + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + + if (!time_left) { + ath12k_warn(ab, "time out while waiting for get fw stats\n"); + return -ETIMEDOUT; + } + + /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back + * when stats data buffer limit is reached. fw_stats_complete + * is completed once host receives first event from firmware, but + * still end might not be marked in the TLV. + * Below loop is to confirm that firmware completed sending all the event + * and fw_stats_done is marked true when end is marked in the TLV. + */ + for (;;) { + if (time_after(jiffies, timeout)) + break; + spin_lock_bh(&ar->data_lock); + if (ar->fw_stats.fw_stats_done) { + spin_unlock_bh(&ar->data_lock); + break; + } + spin_unlock_bh(&ar->data_lock); + } + return 0; +} + +static int ath12k_mac_op_get_txpower(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + unsigned int link_id, + int *dbm) +{ + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + struct ath12k_fw_stats_req_params params = {}; + struct ath12k_fw_stats_pdev *pdev; + struct ath12k_hw *ah = hw->priv; + struct ath12k_link_vif *arvif; + struct ath12k_base *ab; + struct ath12k *ar; + int ret; + + /* Final Tx power is minimum of Target Power, CTL power, Regulatory + * Power, PSD EIRP Power. We just know the Regulatory power from the + * regulatory rules obtained. FW knows all these power and sets the min + * of these. Hence, we request the FW pdev stats in which FW reports + * the minimum of all vdev's channel Tx power. + */ + lockdep_assert_wiphy(hw->wiphy); + + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[link_id]); + if (!arvif || !arvif->ar) + return -EINVAL; + + ar = arvif->ar; + ab = ar->ab; + if (ah->state != ATH12K_HW_STATE_ON) + goto err_fallback; + + if (test_bit(ATH12K_FLAG_CAC_RUNNING, &ar->dev_flags)) + return -EAGAIN; + + /* Limit the requests to Firmware for fetching the tx power */ + if (ar->chan_tx_pwr != ATH12K_PDEV_TX_POWER_INVALID && + time_before(jiffies, + msecs_to_jiffies(ATH12K_PDEV_TX_POWER_REFRESH_TIME_MSECS) + + ar->last_tx_power_update)) + goto send_tx_power; + + params.pdev_id = ar->pdev->pdev_id; + params.vdev_id = arvif->vdev_id; + params.stats_id = WMI_REQUEST_PDEV_STAT; + ret = ath12k_mac_get_fw_stats(ar, ¶ms); + if (ret) { + ath12k_warn(ab, "failed to request fw pdev stats: %d\n", ret); + goto err_fallback; + } + + spin_lock_bh(&ar->data_lock); + pdev = list_first_entry_or_null(&ar->fw_stats.pdevs, + struct ath12k_fw_stats_pdev, list); + if (!pdev) { + spin_unlock_bh(&ar->data_lock); + goto err_fallback; + } + + /* tx power reported by firmware is in units of 0.5 dBm */ + ar->chan_tx_pwr = pdev->chan_tx_power / 2; + spin_unlock_bh(&ar->data_lock); + ar->last_tx_power_update = jiffies; + +send_tx_power: + *dbm = ar->chan_tx_pwr; + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "txpower fetched from firmware %d dBm\n", + *dbm); + return 0; + +err_fallback: + /* We didn't get txpower from FW. Hence, relying on vif->bss_conf.txpower */ + *dbm = vif->bss_conf.txpower; + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "txpower from firmware NaN, reported %d dBm\n", + *dbm); + return 0; +} + static u8 ath12k_mac_find_link_id_by_ar(struct ath12k_vif *ahvif, struct ath12k *ar) { @@ -4429,7 +4529,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, return -EINVAL; /* check if any of the links of ML VIF is already started on - * radio(ar) correpsondig to given scan frequency and use it, + * radio(ar) corresponding to given scan frequency and use it, * if not use scan link (link 15) for scan purpose. */ link_id = ath12k_mac_find_link_id_by_ar(ahvif, ar); @@ -4538,7 +4638,12 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, ret = ath12k_start_scan(ar, arg); if (ret) { - ath12k_warn(ar->ab, "failed to start hw scan: %d\n", ret); + if (ret == -EBUSY) + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "scan engine is busy 11d state %d\n", ar->state_11d); + else + ath12k_warn(ar->ab, "failed to start hw scan: %d\n", ret); + spin_lock_bh(&ar->data_lock); ar->scan.state = ATH12K_SCAN_IDLE; spin_unlock_bh(&ar->data_lock); @@ -4565,6 +4670,11 @@ exit: kfree(arg); } + if (ar->state_11d == ATH12K_11D_PREPARING && + ahvif->vdev_type == WMI_VDEV_TYPE_STA && + ahvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE) + ath12k_mac_11d_scan_start(ar, arvif->vdev_id); + return ret; } @@ -4605,7 +4715,6 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, .macaddr = macaddr, }; struct ath12k_vif *ahvif = arvif->ahvif; - struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4624,8 +4733,8 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, switch (key->cipher) { case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: arg.key_cipher = WMI_CIPHER_AES_CCM; - /* TODO: Re-check if flag is valid */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; case WLAN_CIPHER_SUITE_TKIP: @@ -4633,12 +4742,10 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, arg.key_txmic_len = 8; arg.key_rxmic_len = 8; break; - case WLAN_CIPHER_SUITE_CCMP_256: - arg.key_cipher = WMI_CIPHER_AES_CCM; - break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: arg.key_cipher = WMI_CIPHER_AES_GCM; + key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; default: ath12k_warn(ar->ab, "cipher %d is not supported\n", key->cipher); @@ -4658,7 +4765,7 @@ install: if (!wait_for_completion_timeout(&ar->install_key_done, 1 * HZ)) return -ETIMEDOUT; - if (ether_addr_equal(macaddr, vif->addr)) + if (ether_addr_equal(macaddr, arvif->bssid)) ahvif->key_cipher = key->cipher; return ar->install_key_status ? -EINVAL : 0; @@ -5524,10 +5631,13 @@ static int ath12k_mac_station_add(struct ath12k *ar, ar->max_num_stations); goto exit; } - arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL); - if (!arsta->rx_stats) { - ret = -ENOMEM; - goto dec_num_station; + + if (ath12k_debugfs_is_extd_rx_stats_enabled(ar) && !arsta->rx_stats) { + arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL); + if (!arsta->rx_stats) { + ret = -ENOMEM; + goto dec_num_station; + } } peer_param.vdev_id = arvif->vdev_id; @@ -5802,6 +5912,17 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, * link sta */ if (sta->mlo) { + /* For station mode, arvif->is_sta_assoc_link has been set when + * vdev starts. Make sure the arvif/arsta pair have same setting + */ + if (vif->type == NL80211_IFTYPE_STATION && + !arsta->arvif->is_sta_assoc_link) { + ath12k_hw_warn(ah, "failed to verify assoc link setting with link id %u\n", + link_id); + ret = -EINVAL; + goto exit; + } + arsta->is_assoc_link = true; ahsta->assoc_link_id = link_id; } @@ -6475,7 +6596,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_tx_chainmask = ar->cfg_tx_chainmask >> cap->tx_chain_mask_shift; rate_cap_rx_chainmask = ar->cfg_rx_chainmask >> cap->rx_chain_mask_shift; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = &ar->mac.sbands[NL80211_BAND_2GHZ]; ht_cap = cap->band[NL80211_BAND_2GHZ].ht_cap_info; if (ht_cap_info) @@ -6484,7 +6605,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_rx_chainmask); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && (ar->ab->hw_params->single_pdev_only || !ar->supports_6ghz)) { band = &ar->mac.sbands[NL80211_BAND_5GHZ]; @@ -6661,6 +6782,8 @@ static void ath12k_mac_copy_he_cap(struct ath12k_band_cap *band_cap, switch (iftype) { case NL80211_IFTYPE_AP: + he_cap_elem->mac_cap_info[2] &= + ~IEEE80211_HE_MAC_CAP2_BCAST_TWT; he_cap_elem->phy_cap_info[3] &= ~IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK; he_cap_elem->phy_cap_info[9] |= @@ -6893,7 +7016,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, enum nl80211_band band; int count; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = NL80211_BAND_2GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6903,7 +7026,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { band = NL80211_BAND_5GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6913,7 +7036,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { band = NL80211_BAND_6GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, @@ -7041,14 +7164,17 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv { struct ath12k_base *ab = ar->ab; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); struct ieee80211_tx_info *info; + enum hal_encrypt_type enctype; + unsigned int mic_len; dma_addr_t paddr; int buf_id; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ATH12K_SKB_CB(skb)->ar = ar; + skb_cb->ar = ar; spin_lock_bh(&ar->txmgmt_idr_lock); buf_id = idr_alloc(&ar->txmgmt_idr, skb, 0, ATH12K_TX_MGMT_NUM_PENDING_MAX, GFP_ATOMIC); @@ -7057,12 +7183,15 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv return -ENOSPC; info = IEEE80211_SKB_CB(skb); - if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { + if ((skb_cb->flags & ATH12K_SKB_CIPHER_SET) && + !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { - skb_put(skb, IEEE80211_CCMP_MIC_LEN); + enctype = ath12k_dp_tx_get_encrypt_type(skb_cb->cipher); + mic_len = ath12k_dp_rx_crypto_mic_len(ar, enctype); + skb_put(skb, mic_len); } } @@ -7073,7 +7202,7 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv goto err_free_idr; } - ATH12K_SKB_CB(skb)->paddr = paddr; + skb_cb->paddr = paddr; ret = ath12k_wmi_mgmt_send(ar, arvif->vdev_id, buf_id, skb); if (ret) { @@ -7084,7 +7213,7 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv return 0; err_unmap_buf: - dma_unmap_single(ab->dev, ATH12K_SKB_CB(skb)->paddr, + dma_unmap_single(ab->dev, skb_cb->paddr, skb->len, DMA_TO_DEVICE); err_free_idr: spin_lock_bh(&ar->txmgmt_idr_lock); @@ -7343,6 +7472,11 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, u8 link_id; int ret; + if (ahvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { + ieee80211_free_txskb(hw, skb); + return; + } + link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK); memset(skb_cb, 0, sizeof(*skb_cb)); skb_cb->vif = vif; @@ -7399,7 +7533,7 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, if (!vif->valid_links || !is_mcast || test_bit(ATH12K_FLAG_RAW_MODE, &ar->ab->dev_flags)) { - ret = ath12k_dp_tx(ar, arvif, skb, false, 0); + ret = ath12k_dp_tx(ar, arvif, skb, false, 0, is_mcast); if (unlikely(ret)) { ath12k_warn(ar->ab, "failed to transmit frame %d\n", ret); ieee80211_free_txskb(ar->ah->hw, skb); @@ -7429,11 +7563,10 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, info_flags); skb_cb = ATH12K_SKB_CB(msdu_copied); - info = IEEE80211_SKB_CB(msdu_copied); skb_cb->link_id = link_id; /* For open mode, skip peer find logic */ - if (unlikely(ahvif->key_cipher == WMI_CIPHER_NONE)) + if (unlikely(!ahvif->key_cipher)) goto skip_peer_find; spin_lock_bh(&tmp_ar->ab->base_lock); @@ -7452,7 +7585,6 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, if (key) { skb_cb->cipher = key->cipher; skb_cb->flags |= ATH12K_SKB_CIPHER_SET; - info->control.hw_key = key; hdr = (struct ieee80211_hdr *)msdu_copied->data; if (!ieee80211_has_protected(hdr->frame_control)) @@ -7463,7 +7595,7 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, skip_peer_find: ret = ath12k_dp_tx(tmp_ar, tmp_arvif, - msdu_copied, true, mcbc_gsn); + msdu_copied, true, mcbc_gsn, is_mcast); if (unlikely(ret)) { if (ret == -ENOMEM) { /* Drops are expected during heavy multicast @@ -7549,7 +7681,7 @@ static int ath12k_mac_start(struct ath12k *ar) 1, pdev->pdev_id); if (ret) { - ath12k_err(ab, "failed to enable PMF QOS: (%d\n", ret); + ath12k_err(ab, "failed to enable PMF QOS: %d\n", ret); goto err; } @@ -7594,12 +7726,13 @@ static int ath12k_mac_start(struct ath12k *ar) /* TODO: Do we need to enable ANI? */ - ath12k_reg_update_chan_list(ar); + ath12k_reg_update_chan_list(ar, false); ar->num_started_vdevs = 0; ar->num_created_vdevs = 0; ar->num_peers = 0; ar->allocated_vdev_map = 0; + ar->chan_tx_pwr = ATH12K_PDEV_TX_POWER_INVALID; /* Configure monitor status ring with default rx_filter to get rx status * such as rssi, rx_duration. @@ -7781,6 +7914,9 @@ static void ath12k_mac_stop(struct ath12k *ar) wiphy_work_cancel(ath12k_ar_to_hw(ar)->wiphy, &ar->scan.vdev_clean_wk); cancel_work_sync(&ar->regd_update_work); cancel_work_sync(&ar->ab->rfkill_work); + cancel_work_sync(&ar->ab->update_11d_work); + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); spin_lock_bh(&ar->data_lock); list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) { @@ -7903,15 +8039,15 @@ static int ath12k_mac_setup_vdev_create_arg(struct ath12k_link_vif *arvif, return ret; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { arg->chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { arg->chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP && + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { arg->chains[NL80211_BAND_6GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_6GHZ].rx = ar->num_rx_chains; @@ -7940,7 +8076,7 @@ ath12k_mac_prepare_he_mode(struct ath12k_pdev *pdev, u32 viftype) u32 *hecap_phy_ptr = NULL; u32 hemode; - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; else cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; @@ -8071,6 +8207,118 @@ static void ath12k_mac_op_update_vif_offload(struct ieee80211_hw *hw, ath12k_mac_update_vif_offload(&ahvif->deflink); } +static bool ath12k_mac_vif_ap_active_any(struct ath12k_base *ab) +{ + struct ath12k *ar; + struct ath12k_pdev *pdev; + struct ath12k_link_vif *arvif; + int i; + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + list_for_each_entry(arvif, &ar->arvifs, list) { + if (arvif->is_up && + arvif->ahvif->vdev_type == WMI_VDEV_TYPE_AP) + return true; + } + } + return false; +} + +void ath12k_mac_11d_scan_start(struct ath12k *ar, u32 vdev_id) +{ + struct wmi_11d_scan_start_arg arg; + int ret; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + if (ar->regdom_set_by_user) + goto fin; + + if (ar->vdev_id_11d_scan != ATH12K_11D_INVALID_VDEV_ID) + goto fin; + + if (!test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ar->ab->wmi_ab.svc_map)) + goto fin; + + if (ath12k_mac_vif_ap_active_any(ar->ab)) + goto fin; + + arg.vdev_id = vdev_id; + arg.start_interval_msec = 0; + arg.scan_period_msec = ATH12K_SCAN_11D_INTERVAL; + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "mac start 11d scan for vdev %d\n", vdev_id); + + ret = ath12k_wmi_send_11d_scan_start_cmd(ar, &arg); + if (ret) { + ath12k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n", + vdev_id, ret); + } else { + ar->vdev_id_11d_scan = vdev_id; + if (ar->state_11d == ATH12K_11D_PREPARING) + ar->state_11d = ATH12K_11D_RUNNING; + } + +fin: + if (ar->state_11d == ATH12K_11D_PREPARING) { + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); + } +} + +void ath12k_mac_11d_scan_stop(struct ath12k *ar) +{ + int ret; + u32 vdev_id; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + if (!test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ar->ab->wmi_ab.svc_map)) + return; + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac stop 11d for vdev %d\n", + ar->vdev_id_11d_scan); + + if (ar->state_11d == ATH12K_11D_PREPARING) { + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + + if (ar->vdev_id_11d_scan != ATH12K_11D_INVALID_VDEV_ID) { + vdev_id = ar->vdev_id_11d_scan; + + ret = ath12k_wmi_send_11d_scan_stop_cmd(ar, vdev_id); + if (ret) { + ath12k_warn(ar->ab, + "failed to stopt 11d scan vdev %d ret: %d\n", + vdev_id, ret); + } else { + ar->vdev_id_11d_scan = ATH12K_11D_INVALID_VDEV_ID; + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + } +} + +void ath12k_mac_11d_scan_stop_all(struct ath12k_base *ab) +{ + struct ath12k *ar; + struct ath12k_pdev *pdev; + int i; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "mac stop soc 11d scan\n"); + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + + ath12k_mac_11d_scan_stop(ar); + } +} + int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) { struct ath12k_hw *ah = ar->ah; @@ -8089,6 +8337,12 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) lockdep_assert_wiphy(hw->wiphy); + /* In NO_VIRTUAL_MONITOR, its necessary to restrict only one monitor + * interface in each radio + */ + if (vif->type == NL80211_IFTYPE_MONITOR && ar->monitor_vdev_created) + return -EINVAL; + /* If no link is active and scan vdev is requested * use a default link conf for scan address purpose. */ @@ -8205,6 +8459,7 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) arvif->vdev_id, ret); goto err_peer_del; } + ath12k_mac_11d_scan_stop_all(ar->ab); break; case WMI_VDEV_TYPE_STA: param_id = WMI_STA_PS_PARAM_RX_WAKE_POLICY; @@ -8243,6 +8498,16 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) arvif->vdev_id, ret); goto err_peer_del; } + + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map) && + ahvif->vdev_type == WMI_VDEV_TYPE_STA && + ahvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE) { + reinit_completion(&ar->completed_11d_scan); + ar->state_11d = ATH12K_11D_PREPARING; + } + break; + case WMI_VDEV_TYPE_MONITOR: + ar->monitor_vdev_created = true; break; default: break; @@ -8263,8 +8528,6 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) } ath12k_dp_vdev_tx_attach(ar, arvif); - if (vif->type != NL80211_IFTYPE_MONITOR && ar->monitor_conf_enabled) - ath12k_mac_monitor_vdev_create(ar); return ret; @@ -8289,6 +8552,11 @@ err_peer_del: } err_vdev_del: + if (ahvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { + ar->monitor_vdev_id = -1; + ar->monitor_vdev_created = false; + } + ath12k_wmi_vdev_delete(ar, arvif->vdev_id); ar->num_created_vdevs--; arvif->is_created = false; @@ -8566,8 +8834,6 @@ static int ath12k_mac_vdev_delete(struct ath12k *ar, struct ath12k_link_vif *arv if (ahvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { ar->monitor_vdev_id = -1; ar->monitor_vdev_created = false; - } else if (ar->monitor_vdev_created && !ar->monitor_started) { - ret = ath12k_mac_monitor_vdev_delete(ar); } ath12k_dbg(ab, ATH12K_DBG_MAC, "vdev %pM deleted, vdev_id %d\n", @@ -8798,6 +9064,7 @@ static int ath12k_mac_op_add_chanctx(struct ieee80211_hw *hw, */ ar->rx_channel = ctx->def.chan; spin_unlock_bh(&ar->data_lock); + ar->chan_tx_pwr = ATH12K_PDEV_TX_POWER_INVALID; return 0; } @@ -8826,6 +9093,7 @@ static void ath12k_mac_op_remove_chanctx(struct ieee80211_hw *hw, */ ar->rx_channel = NULL; spin_unlock_bh(&ar->data_lock); + ar->chan_tx_pwr = ATH12K_PDEV_TX_POWER_INVALID; } static enum wmi_phy_mode @@ -8917,6 +9185,9 @@ ath12k_mac_mlo_get_vdev_args(struct ath12k_link_vif *arvif, * link vdevs which are advertised as partners below */ ml_arg->link_add = true; + + ml_arg->assoc_link = arvif->is_sta_assoc_link; + partner_info = ml_arg->partner_info; links = ahvif->links_map; @@ -9257,8 +9528,10 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, arvif = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, ahvif->link[link_id]); - if (vif->type == NL80211_IFTYPE_MONITOR) + if (vif->type == NL80211_IFTYPE_MONITOR) { monitor_vif = true; + continue; + } ath12k_dbg(ab, ATH12K_DBG_MAC, "mac chanctx switch vdev_id %i freq %u->%u width %d->%d\n", @@ -9408,16 +9681,26 @@ static int ath12k_start_vdev_delay(struct ath12k *ar, struct ath12k_base *ab = ar->ab; struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ieee80211_chanctx_conf *chanctx; + struct ieee80211_bss_conf *link_conf; int ret; if (WARN_ON(arvif->is_started)) return -EBUSY; - ret = ath12k_mac_vdev_start(arvif, &arvif->chanctx); + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ab, "failed to get link conf for vdev %u\n", arvif->vdev_id); + return -EINVAL; + } + + chanctx = wiphy_dereference(ath12k_ar_to_hw(arvif->ar)->wiphy, + link_conf->chanctx_conf); + ret = ath12k_mac_vdev_start(arvif, chanctx); if (ret) { ath12k_warn(ab, "failed to start vdev %i addr %pM on freq %d: %d\n", arvif->vdev_id, vif->addr, - arvif->chanctx.def.chan->center_freq, ret); + chanctx->def.chan->center_freq, ret); return ret; } @@ -9462,8 +9745,8 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, ar = ath12k_mac_assign_vif_to_vdev(hw, arvif, ctx); if (!ar) { - ath12k_warn(arvif->ar->ab, "failed to assign chanctx for vif %pM link id %u link vif is already started", - vif->addr, link_id); + ath12k_hw_warn(ah, "failed to assign chanctx for vif %pM link id %u link vif is already started", + vif->addr, link_id); return -EINVAL; } @@ -9480,7 +9763,6 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, ahvif->vdev_type != WMI_VDEV_TYPE_AP && ahvif->vdev_type != WMI_VDEV_TYPE_MONITOR && !ath12k_peer_exist_by_vdev_id(ab, arvif->vdev_id)) { - memcpy(&arvif->chanctx, ctx, sizeof(*ctx)); ret = 0; goto out; } @@ -9492,8 +9774,10 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, if (ahvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { ret = ath12k_mac_monitor_start(ar); - if (ret) + if (ret) { + ath12k_mac_monitor_vdev_delete(ar); goto out; + } arvif->is_started = true; goto out; @@ -9507,9 +9791,6 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, goto out; } - if (ahvif->vdev_type != WMI_VDEV_TYPE_MONITOR && ar->monitor_vdev_created) - ath12k_mac_monitor_start(ar); - arvif->is_started = true; /* TODO: Setup ps and cts/rts protection */ @@ -9572,9 +9853,13 @@ ath12k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, } arvif->is_started = false; - if (ahvif->vdev_type != WMI_VDEV_TYPE_MONITOR && - ar->num_started_vdevs == 1 && ar->monitor_vdev_created) - ath12k_mac_monitor_stop(ar); + if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map) && + ahvif->vdev_type == WMI_VDEV_TYPE_STA && + ahvif->vdev_subtype == WMI_VDEV_SUBTYPE_NONE && + ar->state_11d != ATH12K_11D_PREPARING) { + reinit_completion(&ar->completed_11d_scan); + ar->state_11d = ATH12K_11D_PREPARING; + } } static int @@ -10136,6 +10421,14 @@ ath12k_mac_op_reconfig_complete(struct ieee80211_hw *hw, ath12k_warn(ar->ab, "pdev %d successfully recovered\n", ar->pdev->pdev_id); + if (ar->ab->hw_params->current_cc_support && + ar->alpha2[0] != 0 && ar->alpha2[1] != 0) { + struct wmi_set_current_country_arg arg = {}; + + memcpy(&arg.alpha2, ar->alpha2, 2); + ath12k_wmi_send_set_current_country_cmd(ar, &arg); + } + if (ab->is_reset) { recovery_count = atomic_inc_return(&ab->recovery_count); @@ -10267,46 +10560,13 @@ static int ath12k_mac_op_get_survey(struct ieee80211_hw *hw, int idx, return 0; } -static int ath12k_mac_get_fw_stats(struct ath12k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath12k_base *ab = ar->ab; - struct ath12k_hw *ah = ath12k_ar_to_ah(ar); - unsigned long time_left; - int ret; - - guard(mutex)(&ah->hw_mutex); - - if (ah->state != ATH12K_HW_STATE_ON) - return -ENETDOWN; - - reinit_completion(&ar->fw_stats_complete); - - ret = ath12k_wmi_send_stats_request_cmd(ar, stats_id, vdev_id, pdev_id); - - if (ret) { - ath12k_warn(ab, "failed to request fw stats: %d\n", ret); - return ret; - } - - ath12k_dbg(ab, ATH12K_DBG_WMI, - "get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - - if (!time_left) - ath12k_warn(ab, "time out while waiting for get fw stats\n"); - - return ret; -} - static void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct station_info *sinfo) { struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_fw_stats_req_params params = {}; struct ath12k_link_sta *arsta; struct ath12k *ar; s8 signal; @@ -10348,10 +10608,13 @@ static void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw, /* TODO: Use real NF instead of default one. */ signal = arsta->rssi_comb; + params.pdev_id = ar->pdev->pdev_id; + params.vdev_id = 0; + params.stats_id = WMI_REQUEST_VDEV_STAT; + if (!signal && ahsta->ahvif->vdev_type == WMI_VDEV_TYPE_STA && - !(ath12k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath12k_mac_get_fw_stats(ar, ¶ms))) signal = arsta->rssi_beacon; if (signal) { @@ -10411,7 +10674,7 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, return -EINVAL; /* check if any of the links of ML VIF is already started on - * radio(ar) correpsondig to given scan frequency and use it, + * radio(ar) corresponding to given scan frequency and use it, * if not use deflink(link 0) for scan purpose. */ @@ -10595,6 +10858,7 @@ static const struct ieee80211_ops ath12k_ops = { .assign_vif_chanctx = ath12k_mac_op_assign_vif_chanctx, .unassign_vif_chanctx = ath12k_mac_op_unassign_vif_chanctx, .switch_vif_chanctx = ath12k_mac_op_switch_vif_chanctx, + .get_txpower = ath12k_mac_op_get_txpower, .set_rts_threshold = ath12k_mac_op_set_rts_threshold, .set_frag_threshold = ath12k_mac_op_set_frag_threshold, .set_bitrate_mask = ath12k_mac_op_set_bitrate_mask, @@ -10610,12 +10874,25 @@ static const struct ieee80211_ops ath12k_ops = { .resume = ath12k_wow_op_resume, .set_wakeup = ath12k_wow_op_set_wakeup, #endif +#ifdef CONFIG_ATH12K_DEBUGFS + .vif_add_debugfs = ath12k_debugfs_op_vif_add, +#endif CFG80211_TESTMODE_CMD(ath12k_tm_cmd) #ifdef CONFIG_ATH12K_DEBUGFS .link_sta_add_debugfs = ath12k_debugfs_link_sta_op_add, #endif }; +void ath12k_mac_update_freq_range(struct ath12k *ar, + u32 freq_low, u32 freq_high) +{ + if (!(freq_low && freq_high)) + return; + + ar->freq_range.start_freq = MHZ_TO_KHZ(freq_low); + ar->freq_range.end_freq = MHZ_TO_KHZ(freq_high); +} + static void ath12k_mac_update_ch_list(struct ath12k *ar, struct ieee80211_supported_band *band, u32 freq_low, u32 freq_high) @@ -10630,9 +10907,6 @@ static void ath12k_mac_update_ch_list(struct ath12k *ar, band->channels[i].center_freq > freq_high) band->channels[i].flags |= IEEE80211_CHAN_DISABLED; } - - ar->freq_range.start_freq = MHZ_TO_KHZ(freq_low); - ar->freq_range.end_freq = MHZ_TO_KHZ(freq_high); } static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band) @@ -10640,10 +10914,10 @@ static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band) struct ath12k_pdev *pdev = ar->pdev; struct ath12k_pdev_cap *pdev_cap = &pdev->cap; - if (band == WMI_HOST_WLAN_2G_CAP) + if (band == WMI_HOST_WLAN_2GHZ_CAP) return pdev_cap->band[NL80211_BAND_2GHZ].phy_id; - if (band == WMI_HOST_WLAN_5G_CAP) + if (band == WMI_HOST_WLAN_5GHZ_CAP) return pdev_cap->band[NL80211_BAND_5GHZ].phy_id; ath12k_warn(ar->ab, "unsupported phy cap:%d\n", band); @@ -10657,18 +10931,19 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, { struct ieee80211_supported_band *band; struct ath12k_wmi_hal_reg_capabilities_ext_arg *reg_cap; + struct ath12k_base *ab = ar->ab; + u32 phy_id, freq_low, freq_high; struct ath12k_hw *ah = ar->ah; void *channels; - u32 phy_id; BUILD_BUG_ON((ARRAY_SIZE(ath12k_2ghz_channels) + ARRAY_SIZE(ath12k_5ghz_channels) + ARRAY_SIZE(ath12k_6ghz_channels)) != ATH12K_NUM_CHANS); - reg_cap = &ar->ab->hal_reg_cap[ar->pdev_idx]; + reg_cap = &ab->hal_reg_cap[ar->pdev_idx]; - if (supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { channels = kmemdup(ath12k_2ghz_channels, sizeof(ath12k_2ghz_channels), GFP_KERNEL); @@ -10683,17 +10958,25 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, band->bitrates = ath12k_g_rates; bands[NL80211_BAND_2GHZ] = band; - if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2G_CAP); - reg_cap = &ar->ab->hal_reg_cap[phy_id]; + if (ab->hw_params->single_pdev_only) { + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2GHZ_CAP); + reg_cap = &ab->hal_reg_cap[phy_id]; } + + freq_low = max(reg_cap->low_2ghz_chan, + ab->reg_freq_2ghz.start_freq); + freq_high = min(reg_cap->high_2ghz_chan, + ab->reg_freq_2ghz.end_freq); + ath12k_mac_update_ch_list(ar, band, reg_cap->low_2ghz_chan, reg_cap->high_2ghz_chan); + + ath12k_mac_update_freq_range(ar, freq_low, freq_high); } - if (supported_bands & WMI_HOST_WLAN_5G_CAP) { - if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6G_FREQ) { + if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_6ghz_channels, sizeof(ath12k_6ghz_channels), GFP_KERNEL); if (!channels) { @@ -10709,13 +10992,21 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, band->n_bitrates = ath12k_a_rates_size; band->bitrates = ath12k_a_rates; bands[NL80211_BAND_6GHZ] = band; + + freq_low = max(reg_cap->low_5ghz_chan, + ab->reg_freq_6ghz.start_freq); + freq_high = min(reg_cap->high_5ghz_chan, + ab->reg_freq_6ghz.end_freq); + ath12k_mac_update_ch_list(ar, band, reg_cap->low_5ghz_chan, reg_cap->high_5ghz_chan); + + ath12k_mac_update_freq_range(ar, freq_low, freq_high); ah->use_6ghz_regd = true; } - if (reg_cap->low_5ghz_chan < ATH12K_MIN_6G_FREQ) { + if (reg_cap->low_5ghz_chan < ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_5ghz_channels, sizeof(ath12k_5ghz_channels), GFP_KERNEL); @@ -10733,14 +11024,21 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, band->bitrates = ath12k_a_rates; bands[NL80211_BAND_5GHZ] = band; - if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5G_CAP); - reg_cap = &ar->ab->hal_reg_cap[phy_id]; + if (ab->hw_params->single_pdev_only) { + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5GHZ_CAP); + reg_cap = &ab->hal_reg_cap[phy_id]; } + freq_low = max(reg_cap->low_5ghz_chan, + ab->reg_freq_5ghz.start_freq); + freq_high = min(reg_cap->high_5ghz_chan, + ab->reg_freq_5ghz.end_freq); + ath12k_mac_update_ch_list(ar, band, reg_cap->low_5ghz_chan, reg_cap->high_5ghz_chan); + + ath12k_mac_update_freq_range(ar, freq_low, freq_high); } } @@ -10840,13 +11138,18 @@ ath12k_mac_setup_radio_iface_comb(struct ath12k *ar, comb[0].limits = limits; comb[0].n_limits = n_limits; comb[0].max_interfaces = max_interfaces; - comb[0].num_different_channels = 1; comb[0].beacon_int_infra_match = true; comb[0].beacon_int_min_gcd = 100; - comb[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | - BIT(NL80211_CHAN_WIDTH_20) | - BIT(NL80211_CHAN_WIDTH_40) | - BIT(NL80211_CHAN_WIDTH_80); + + if (ar->ab->hw_params->single_pdev_only) { + comb[0].num_different_channels = 2; + } else { + comb[0].num_different_channels = 1; + comb[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | + BIT(NL80211_CHAN_WIDTH_20) | + BIT(NL80211_CHAN_WIDTH_40) | + BIT(NL80211_CHAN_WIDTH_80); + } return 0; } @@ -11219,6 +11522,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) ieee80211_hw_set(hw, QUEUE_CONTROL); ieee80211_hw_set(hw, SUPPORTS_TX_FRAG); ieee80211_hw_set(hw, REPORTS_LOW_ACK); + ieee80211_hw_set(hw, NO_VIRTUAL_MONITOR); if ((ht_cap & WMI_HT_CAP_ENABLED) || is_6ghz) { ieee80211_hw_set(hw, AMPDU_AGGREGATION); @@ -11347,11 +11651,10 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) goto err_unregister_hw; } + ath12k_fw_stats_init(ar); ath12k_debugfs_register(ar); } - init_completion(&ar->fw_stats_complete); - return 0; err_unregister_hw: @@ -11396,6 +11699,7 @@ static void ath12k_mac_setup(struct ath12k *ar) ar->num_tx_chains = hweight32(pdev->cap.tx_chain_mask); ar->num_rx_chains = hweight32(pdev->cap.rx_chain_mask); ar->scan.arvif = NULL; + ar->vdev_id_11d_scan = ATH12K_11D_INVALID_VDEV_ID; spin_lock_init(&ar->data_lock); INIT_LIST_HEAD(&ar->arvifs); @@ -11411,6 +11715,7 @@ static void ath12k_mac_setup(struct ath12k *ar) init_completion(&ar->scan.completed); init_completion(&ar->scan.on_channel); init_completion(&ar->mlo_setup_done); + init_completion(&ar->completed_11d_scan); INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work); wiphy_work_init(&ar->scan.vdev_clean_wk, ath12k_scan_vdev_clean_work); @@ -11418,6 +11723,10 @@ static void ath12k_mac_setup(struct ath12k *ar) wiphy_work_init(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work); skb_queue_head_init(&ar->wmi_mgmt_tx_queue); + + ar->monitor_vdev_id = -1; + ar->monitor_vdev_created = false; + ar->monitor_started = false; } static int __ath12k_mac_mlo_setup(struct ath12k *ar) @@ -11572,7 +11881,6 @@ void ath12k_mac_mlo_teardown(struct ath12k_hw_group *ag) int ath12k_mac_register(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; int ret; @@ -11585,8 +11893,6 @@ int ath12k_mac_register(struct ath12k_hw_group *ag) goto err; } - set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - return 0; err: @@ -11603,12 +11909,9 @@ err: void ath12k_mac_unregister(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; - clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - for (i = ag->num_hw - 1; i >= 0; i--) { ah = ath12k_ag_to_ah(ag, i); if (!ah) diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index ae35b73312bf..da37332352fe 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -33,6 +33,9 @@ struct ath12k_generic_iter { #define ATH12K_KEEPALIVE_MAX_IDLE 3895 #define ATH12K_KEEPALIVE_MAX_UNRESPONSIVE 3900 +#define ATH12K_PDEV_TX_POWER_INVALID ((u32)-1) +#define ATH12K_PDEV_TX_POWER_REFRESH_TIME_MSECS 5000 /* msecs */ + /* FIXME: should these be in ieee80211.h? */ #define IEEE80211_VHT_MCS_SUPPORT_0_11_MASK GENMASK(23, 16) #define IEEE80211_DISABLE_VHT_MCS_SUPPORT_0_11 BIT(24) @@ -66,6 +69,13 @@ struct ath12k_mac_get_any_chanctx_conf_arg { extern const struct htt_rx_ring_tlv_filter ath12k_mac_mon_status_filter_default; +#define ATH12K_SCAN_11D_INTERVAL 600000 +#define ATH12K_11D_INVALID_VDEV_ID 0xFFFF + +void ath12k_mac_11d_scan_start(struct ath12k *ar, u32 vdev_id); +void ath12k_mac_11d_scan_stop(struct ath12k *ar); +void ath12k_mac_11d_scan_stop_all(struct ath12k_base *ab); + void ath12k_mac_destroy(struct ath12k_hw_group *ag); void ath12k_mac_unregister(struct ath12k_hw_group *ag); int ath12k_mac_register(struct ath12k_hw_group *ag); @@ -115,4 +125,7 @@ struct ieee80211_bss_conf *ath12k_mac_get_link_bss_conf(struct ath12k_link_vif * struct ath12k *ath12k_get_ar_by_vif(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 link_id); +int ath12k_mac_get_fw_stats(struct ath12k *ar, struct ath12k_fw_stats_req_params *param); +void ath12k_mac_update_freq_range(struct ath12k *ar, + u32 freq_low, u32 freq_high); #endif diff --git a/drivers/net/wireless/ath/ath12k/mhi.c b/drivers/net/wireless/ath/ath12k/mhi.c index 2f6d14382ed7..08f44baf182a 100644 --- a/drivers/net/wireless/ath/ath12k/mhi.c +++ b/drivers/net/wireless/ath/ath12k/mhi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2020-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/msi.h> @@ -285,8 +285,11 @@ static void ath12k_mhi_op_status_cb(struct mhi_controller *mhi_cntrl, break; } - if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) + if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) { + set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); + set_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); queue_work(ab->workqueue_aux, &ab->reset_work); + } break; default: break; @@ -379,7 +382,7 @@ int ath12k_mhi_register(struct ath12k_pci *ab_pci) mhi_ctrl->irq_flags = IRQF_SHARED | IRQF_NOBALANCING; mhi_ctrl->iova_start = 0; - mhi_ctrl->iova_stop = 0xffffffff; + mhi_ctrl->iova_stop = ab_pci->dma_mask; mhi_ctrl->sbl_size = SZ_512K; mhi_ctrl->seg_len = SZ_512K; mhi_ctrl->fbc_download = true; diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index b474696ac6d8..528a4a57d136 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -17,7 +17,7 @@ #include "debug.h" #define ATH12K_PCI_BAR_NUM 0 -#define ATH12K_PCI_DMA_MASK 32 +#define ATH12K_PCI_DMA_MASK 36 #define ATH12K_PCI_IRQ_CE0_OFFSET 3 @@ -718,7 +718,7 @@ static void ath12k_pci_init_qmi_ce_config(struct ath12k_base *ab) cfg->svc_to_ce_map_len = ab->hw_params->svc_to_ce_map_len; ab->qmi.service_ins_id = ab->hw_params->qmi_service_ins_id; - if (test_bit(ATH12K_FW_FEATURE_MULTI_QRTR_ID, ab->fw.fw_features)) { + if (ath12k_fw_feature_supported(ab, ATH12K_FW_FEATURE_MULTI_QRTR_ID)) { ab_pci->qmi_instance = u32_encode_bits(pci_domain_nr(bus), DOMAIN_NUMBER_MASK) | u32_encode_bits(bus->number, BUS_NUMBER_MASK); @@ -877,13 +877,9 @@ static int ath12k_pci_claim(struct ath12k_pci *ab_pci, struct pci_dev *pdev) goto disable_device; } - ret = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(ATH12K_PCI_DMA_MASK)); - if (ret) { - ath12k_err(ab, "failed to set pci dma mask to %d: %d\n", - ATH12K_PCI_DMA_MASK, ret); - goto release_region; - } + ab_pci->dma_mask = DMA_BIT_MASK(ATH12K_PCI_DMA_MASK); + dma_set_mask(&pdev->dev, ab_pci->dma_mask); + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); pci_set_master(pdev); @@ -1472,7 +1468,7 @@ int ath12k_pci_power_up(struct ath12k_base *ab) ath12k_pci_msi_enable(ab_pci); - if (test_bit(ATH12K_FW_FEATURE_MULTI_QRTR_ID, ab->fw.fw_features)) + if (ath12k_fw_feature_supported(ab, ATH12K_FW_FEATURE_MULTI_QRTR_ID)) ath12k_pci_update_qrtr_node_id(ab); ret = ath12k_mhi_start(ab_pci); @@ -1491,6 +1487,9 @@ void ath12k_pci_power_down(struct ath12k_base *ab, bool is_suspend) { struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); + if (!test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags)) + return; + /* restore aspm in case firmware bootup fails */ ath12k_pci_aspm_restore(ab_pci); @@ -1710,12 +1709,12 @@ err_hal_srng_deinit: err_mhi_unregister: ath12k_mhi_unregister(ab_pci); -err_pci_msi_free: - ath12k_pci_msi_free(ab_pci); - err_irq_affinity_cleanup: ath12k_pci_set_irq_affinity_hint(ab_pci, NULL); +err_pci_msi_free: + ath12k_pci_msi_free(ab_pci); + err_pci_free_region: ath12k_pci_free_region(ab_pci); @@ -1758,13 +1757,34 @@ qmi_fail: ath12k_core_free(ab); } +static void ath12k_pci_hw_group_power_down(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + if (!ag) + return; + + mutex_lock(&ag->mutex); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + ath12k_pci_power_down(ab, false); + } + + mutex_unlock(&ag->mutex); +} + static void ath12k_pci_shutdown(struct pci_dev *pdev) { struct ath12k_base *ab = pci_get_drvdata(pdev); struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); ath12k_pci_set_irq_affinity_hint(ab_pci, NULL); - ath12k_pci_power_down(ab, false); + ath12k_pci_hw_group_power_down(ab->ag); } static __maybe_unused int ath12k_pci_pm_suspend(struct device *dev) @@ -1831,7 +1851,7 @@ static struct pci_driver ath12k_pci_driver = { .driver.pm = &ath12k_pci_pm_ops, }; -static int ath12k_pci_init(void) +int ath12k_pci_init(void) { int ret; @@ -1844,14 +1864,8 @@ static int ath12k_pci_init(void) return 0; } -module_init(ath12k_pci_init); -static void ath12k_pci_exit(void) +void ath12k_pci_exit(void) { pci_unregister_driver(&ath12k_pci_driver); } - -module_exit(ath12k_pci_exit); - -MODULE_DESCRIPTION("Driver support for Qualcomm Technologies PCIe 802.11be WLAN devices"); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 31584a7ad80e..0b4c459d6d8e 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_PCI_H #define ATH12K_PCI_H @@ -116,6 +116,7 @@ struct ath12k_pci { unsigned long irq_flags; const struct ath12k_pci_ops *pci_ops; u32 qmi_instance; + u64 dma_mask; }; static inline struct ath12k_pci *ath12k_pci_priv(struct ath12k_base *ab) @@ -145,4 +146,6 @@ void ath12k_pci_stop(struct ath12k_base *ab); int ath12k_pci_start(struct ath12k_base *ab); int ath12k_pci_power_up(struct ath12k_base *ab); void ath12k_pci_power_down(struct ath12k_base *ab, bool is_suspend); +int ath12k_pci_init(void); +void ath12k_pci_exit(void); #endif /* ATH12K_PCI_H */ diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 792cca8a3fb1..ec7236bbccc0 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -383,6 +383,9 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, arvif->ast_idx = peer->hw_peer_id; } + if (vif->type == NL80211_IFTYPE_AP) + peer->ucast_ra_only = true; + if (sta) { ahsta = ath12k_sta_to_ahsta(sta); arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index 5870ee11a8c7..f3a5e054d2b5 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_PEER_H @@ -62,6 +62,7 @@ struct ath12k_peer { /* for reference to ath12k_link_sta */ u8 link_id; + bool ucast_ra_only; }; struct ath12k_ml_peer { diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 348dbc81bad8..99e1fb2910d0 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -11,6 +11,8 @@ #include "debug.h" #include <linux/of.h> #include <linux/firmware.h> +#include <linux/of_address.h> +#include <linux/ioport.h> #define SLEEP_CLOCK_SELECT_INTERNAL_BIT 0x02 #define HOST_CSTATE_BIT 0x04 @@ -2168,10 +2170,12 @@ int ath12k_qmi_host_cap_send(struct ath12k_base *ab) req.bdf_support_valid = 1; req.bdf_support = 1; - req.m3_support_valid = 1; - req.m3_support = 1; - req.m3_cache_support_valid = 1; - req.m3_cache_support = 1; + if (ab->hw_params->fw.m3_loader == ath12k_m3_fw_loader_driver) { + req.m3_support_valid = 1; + req.m3_support = 1; + req.m3_cache_support_valid = 1; + req.m3_cache_support = 1; + } req.cal_done_valid = 1; req.cal_done = ab->qmi.cal_done; @@ -2264,6 +2268,9 @@ static void ath12k_qmi_phy_cap_send(struct ath12k_base *ab) goto out; } + if (resp.single_chip_mlo_support_valid && resp.single_chip_mlo_support) + ab->single_chip_mlo_support = true; + if (!resp.num_phy_valid) { ret = -ENODATA; goto out; @@ -2272,7 +2279,8 @@ static void ath12k_qmi_phy_cap_send(struct ath12k_base *ab) ab->qmi.num_radios = resp.num_phy; ath12k_dbg(ab, ATH12K_DBG_QMI, - "phy capability resp valid %d num_phy %d valid %d board_id %d\n", + "phy capability resp valid %d single_chip_mlo_support %d valid %d num_phy %d valid %d board_id %d\n", + resp.single_chip_mlo_support_valid, resp.single_chip_mlo_support, resp.num_phy_valid, resp.num_phy, resp.board_id_valid, resp.board_id); @@ -2376,7 +2384,8 @@ int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab) * failure to firmware and firmware then request multiple blocks of * small chunk size memory. */ - if (ab->qmi.target_mem_delayed) { + if (!test_bit(ATH12K_FLAG_FIXED_MEM_REGION, &ab->dev_flags) && + ab->qmi.target_mem_delayed) { delayed = true; ath12k_dbg(ab, ATH12K_DBG_QMI, "qmi delays mem_request %d\n", ab->qmi.mem_seg_count); @@ -2434,12 +2443,35 @@ out: return ret; } +void ath12k_qmi_reset_mlo_mem(struct ath12k_hw_group *ag) +{ + struct target_mem_chunk *mlo_chunk; + int i; + + lockdep_assert_held(&ag->mutex); + + if (!ag->mlo_mem.init_done || ag->num_started) + return; + + for (i = 0; i < ARRAY_SIZE(ag->mlo_mem.chunk); i++) { + mlo_chunk = &ag->mlo_mem.chunk[i]; + + if (mlo_chunk->v.addr) + /* TODO: Mode 0 recovery is the default mode hence resetting the + * whole memory region for now. Once Mode 1 support is added, this + * needs to be handled properly + */ + memset(mlo_chunk->v.addr, 0, mlo_chunk->size); + } +} + static void ath12k_qmi_free_mlo_mem_chunk(struct ath12k_base *ab, struct target_mem_chunk *chunk, int idx) { struct ath12k_hw_group *ag = ab->ag; struct target_mem_chunk *mlo_chunk; + bool fixed_mem; lockdep_assert_held(&ag->mutex); @@ -2451,8 +2483,13 @@ static void ath12k_qmi_free_mlo_mem_chunk(struct ath12k_base *ab, return; } + fixed_mem = test_bit(ATH12K_FLAG_FIXED_MEM_REGION, &ab->dev_flags); mlo_chunk = &ag->mlo_mem.chunk[idx]; - if (mlo_chunk->v.addr) { + + if (fixed_mem && mlo_chunk->v.ioaddr) { + iounmap(mlo_chunk->v.ioaddr); + mlo_chunk->v.ioaddr = NULL; + } else if (mlo_chunk->v.addr) { dma_free_coherent(ab->dev, mlo_chunk->size, mlo_chunk->v.addr, @@ -2462,7 +2499,10 @@ static void ath12k_qmi_free_mlo_mem_chunk(struct ath12k_base *ab, mlo_chunk->paddr = 0; mlo_chunk->size = 0; - chunk->v.addr = NULL; + if (fixed_mem) + chunk->v.ioaddr = NULL; + else + chunk->v.addr = NULL; chunk->paddr = 0; chunk->size = 0; } @@ -2473,19 +2513,24 @@ static void ath12k_qmi_free_target_mem_chunk(struct ath12k_base *ab) int i, mlo_idx; for (i = 0, mlo_idx = 0; i < ab->qmi.mem_seg_count; i++) { - if (!ab->qmi.target_mem[i].v.addr) - continue; - if (ab->qmi.target_mem[i].type == MLO_GLOBAL_MEM_REGION_TYPE) { ath12k_qmi_free_mlo_mem_chunk(ab, &ab->qmi.target_mem[i], mlo_idx++); } else { - dma_free_coherent(ab->dev, - ab->qmi.target_mem[i].prev_size, - ab->qmi.target_mem[i].v.addr, - ab->qmi.target_mem[i].paddr); - ab->qmi.target_mem[i].v.addr = NULL; + if (test_bit(ATH12K_FLAG_FIXED_MEM_REGION, &ab->dev_flags) && + ab->qmi.target_mem[i].v.ioaddr) { + iounmap(ab->qmi.target_mem[i].v.ioaddr); + ab->qmi.target_mem[i].v.ioaddr = NULL; + } else { + if (!ab->qmi.target_mem[i].v.addr) + continue; + dma_free_coherent(ab->dev, + ab->qmi.target_mem[i].prev_size, + ab->qmi.target_mem[i].v.addr, + ab->qmi.target_mem[i].paddr); + ab->qmi.target_mem[i].v.addr = NULL; + } } } @@ -2638,6 +2683,130 @@ err: return ret; } +static int ath12k_qmi_assign_target_mem_chunk(struct ath12k_base *ab) +{ + struct reserved_mem *rmem; + size_t avail_rmem_size; + int i, idx, ret; + + for (i = 0, idx = 0; i < ab->qmi.mem_seg_count; i++) { + switch (ab->qmi.target_mem[i].type) { + case HOST_DDR_REGION_TYPE: + rmem = ath12k_core_get_reserved_mem(ab, 0); + if (!rmem) { + ret = -ENODEV; + goto out; + } + + avail_rmem_size = rmem->size; + if (avail_rmem_size < ab->qmi.target_mem[i].size) { + ath12k_dbg(ab, ATH12K_DBG_QMI, + "failed to assign mem type %u req size %u avail size %zu\n", + ab->qmi.target_mem[i].type, + ab->qmi.target_mem[i].size, + avail_rmem_size); + ret = -EINVAL; + goto out; + } + + ab->qmi.target_mem[idx].paddr = rmem->base; + ab->qmi.target_mem[idx].v.ioaddr = + ioremap(ab->qmi.target_mem[idx].paddr, + ab->qmi.target_mem[i].size); + if (!ab->qmi.target_mem[idx].v.ioaddr) { + ret = -EIO; + goto out; + } + ab->qmi.target_mem[idx].size = ab->qmi.target_mem[i].size; + ab->qmi.target_mem[idx].type = ab->qmi.target_mem[i].type; + idx++; + break; + case BDF_MEM_REGION_TYPE: + rmem = ath12k_core_get_reserved_mem(ab, 0); + if (!rmem) { + ret = -ENODEV; + goto out; + } + + avail_rmem_size = rmem->size - ab->hw_params->bdf_addr_offset; + if (avail_rmem_size < ab->qmi.target_mem[i].size) { + ath12k_dbg(ab, ATH12K_DBG_QMI, + "failed to assign mem type %u req size %u avail size %zu\n", + ab->qmi.target_mem[i].type, + ab->qmi.target_mem[i].size, + avail_rmem_size); + ret = -EINVAL; + goto out; + } + ab->qmi.target_mem[idx].paddr = + rmem->base + ab->hw_params->bdf_addr_offset; + ab->qmi.target_mem[idx].v.ioaddr = + ioremap(ab->qmi.target_mem[idx].paddr, + ab->qmi.target_mem[i].size); + if (!ab->qmi.target_mem[idx].v.ioaddr) { + ret = -EIO; + goto out; + } + ab->qmi.target_mem[idx].size = ab->qmi.target_mem[i].size; + ab->qmi.target_mem[idx].type = ab->qmi.target_mem[i].type; + idx++; + break; + case CALDB_MEM_REGION_TYPE: + /* Cold boot calibration is not enabled in Ath12k. Hence, + * assign paddr = 0. + * Once cold boot calibration is enabled add support to + * assign reserved memory from DT. + */ + ab->qmi.target_mem[idx].paddr = 0; + ab->qmi.target_mem[idx].v.ioaddr = NULL; + ab->qmi.target_mem[idx].size = ab->qmi.target_mem[i].size; + ab->qmi.target_mem[idx].type = ab->qmi.target_mem[i].type; + idx++; + break; + case M3_DUMP_REGION_TYPE: + rmem = ath12k_core_get_reserved_mem(ab, 1); + if (!rmem) { + ret = -EINVAL; + goto out; + } + + avail_rmem_size = rmem->size; + if (avail_rmem_size < ab->qmi.target_mem[i].size) { + ath12k_dbg(ab, ATH12K_DBG_QMI, + "failed to assign mem type %u req size %u avail size %zu\n", + ab->qmi.target_mem[i].type, + ab->qmi.target_mem[i].size, + avail_rmem_size); + ret = -EINVAL; + goto out; + } + + ab->qmi.target_mem[idx].paddr = rmem->base; + ab->qmi.target_mem[idx].v.ioaddr = + ioremap(ab->qmi.target_mem[idx].paddr, + ab->qmi.target_mem[i].size); + if (!ab->qmi.target_mem[idx].v.ioaddr) { + ret = -EIO; + goto out; + } + ab->qmi.target_mem[idx].size = ab->qmi.target_mem[i].size; + ab->qmi.target_mem[idx].type = ab->qmi.target_mem[i].type; + idx++; + break; + default: + ath12k_warn(ab, "qmi ignore invalid mem req type %u\n", + ab->qmi.target_mem[i].type); + break; + } + } + ab->qmi.mem_seg_count = idx; + + return 0; +out: + ath12k_qmi_free_target_mem_chunk(ab); + return ret; +} + /* clang stack usage explodes if this is inlined */ static noinline_for_stack int ath12k_qmi_request_target_cap(struct ath12k_base *ab) @@ -2939,6 +3108,9 @@ static void ath12k_qmi_m3_free(struct ath12k_base *ab) { struct m3_mem_region *m3_mem = &ab->qmi.m3_mem; + if (ab->hw_params->fw.m3_loader == ath12k_m3_fw_loader_remoteproc) + return; + if (!m3_mem->vaddr) return; @@ -3019,15 +3191,16 @@ int ath12k_qmi_wlanfw_m3_info_send(struct ath12k_base *ab) struct qmi_txn txn; int ret = 0; - ret = ath12k_qmi_m3_load(ab); - if (ret) { - ath12k_err(ab, "failed to load m3 firmware: %d", ret); - return ret; + if (ab->hw_params->fw.m3_loader == ath12k_m3_fw_loader_driver) { + ret = ath12k_qmi_m3_load(ab); + if (ret) { + ath12k_err(ab, "failed to load m3 firmware: %d", ret); + return ret; + } + req.addr = m3_mem->paddr; + req.size = m3_mem->size; } - req.addr = m3_mem->paddr; - req.size = m3_mem->size; - ret = qmi_txn_init(&ab->qmi.handle, &txn, qmi_wlanfw_m3_info_resp_msg_v01_ei, &resp); if (ret < 0) @@ -3477,11 +3650,20 @@ static void ath12k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl, msg->mem_seg[i].type, msg->mem_seg[i].size); } - ret = ath12k_qmi_alloc_target_mem_chunk(ab); - if (ret) { - ath12k_warn(ab, "qmi failed to alloc target memory: %d\n", - ret); - return; + if (test_bit(ATH12K_FLAG_FIXED_MEM_REGION, &ab->dev_flags)) { + ret = ath12k_qmi_assign_target_mem_chunk(ab); + if (ret) { + ath12k_warn(ab, "failed to assign qmi target memory: %d\n", + ret); + return; + } + } else { + ret = ath12k_qmi_alloc_target_mem_chunk(ab); + if (ret) { + ath12k_warn(ab, "qmi failed to alloc target memory: %d\n", + ret); + return; + } } ath12k_qmi_driver_event_post(qmi, ATH12K_QMI_EVENT_REQUEST_MEM, NULL); diff --git a/drivers/net/wireless/ath/ath12k/qmi.h b/drivers/net/wireless/ath/ath12k/qmi.h index 45d7c3fcafdd..96e6c3daecfe 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.h +++ b/drivers/net/wireless/ath/ath12k/qmi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_QMI_H @@ -21,6 +21,7 @@ #define ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_WCN7850 0x1 #define ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9274 0x07 +#define ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ5332 0x2 #define ATH12K_QMI_WLANFW_MAX_TIMESTAMP_LEN_V01 32 #define ATH12K_QMI_RESP_LEN_MAX 8192 #define ATH12K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01 52 @@ -41,6 +42,7 @@ #define ATH12K_BOARD_ID_DEFAULT 0xFF struct ath12k_base; +struct ath12k_hw_group; enum ath12k_qmi_file_type { ATH12K_QMI_FILE_TYPE_BDF_GOLDEN = 0, @@ -621,5 +623,6 @@ void ath12k_qmi_deinit_service(struct ath12k_base *ab); int ath12k_qmi_init_service(struct ath12k_base *ab); void ath12k_qmi_free_resource(struct ath12k_base *ab); void ath12k_qmi_trigger_host_cap(struct ath12k_base *ab); +void ath12k_qmi_reset_mlo_mem(struct ath12k_hw_group *ag); #endif diff --git a/drivers/net/wireless/ath/ath12k/reg.c b/drivers/net/wireless/ath/ath12k/reg.c index 439d61f284d8..7048834e0d14 100644 --- a/drivers/net/wireless/ath/ath12k/reg.c +++ b/drivers/net/wireless/ath/ath12k/reg.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/rtnetlink.h> #include "core.h" #include "debug.h" +#include "mac.h" /* World regdom to be used in case default regd from fw is unavailable */ #define ATH12K_2GHZ_CH01_11 REG_RULE(2412 - 10, 2462 + 10, 40, 0, 20, 0) @@ -48,6 +49,7 @@ ath12k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) { struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy); struct ath12k_wmi_init_country_arg arg; + struct wmi_set_current_country_arg current_arg = {}; struct ath12k_hw *ah = ath12k_hw_to_ah(hw); struct ath12k *ar = ath12k_ah_to_ar(ah, 0); int ret, i; @@ -55,6 +57,24 @@ ath12k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) ath12k_dbg(ar->ab, ATH12K_DBG_REG, "Regulatory Notification received for %s\n", wiphy_name(wiphy)); + if (request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { + ath12k_dbg(ar->ab, ATH12K_DBG_REG, + "driver initiated regd update\n"); + if (ah->state != ATH12K_HW_STATE_ON) + return; + + for_each_ar(ah, ar, i) { + ret = ath12k_reg_update_chan_list(ar, true); + if (ret) { + ath12k_warn(ar->ab, + "failed to update chan list for pdev %u, ret %d\n", + i, ret); + break; + } + } + return; + } + /* Currently supporting only General User Hints. Cell base user * hints to be handled later. * Hints from other sources like Core, Beacons are not expected for @@ -77,27 +97,38 @@ ath12k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) return; } - /* Set the country code to the firmware and wait for - * the WMI_REG_CHAN_LIST_CC EVENT for updating the - * reg info - */ - arg.flags = ALPHA_IS_SET; - memcpy(&arg.cc_info.alpha2, request->alpha2, 2); - arg.cc_info.alpha2[2] = 0; - /* Allow fresh updates to wiphy regd */ ah->regd_updated = false; /* Send the reg change request to all the radios */ for_each_ar(ah, ar, i) { - ret = ath12k_wmi_send_init_country_cmd(ar, &arg); - if (ret) - ath12k_warn(ar->ab, - "INIT Country code set to fw failed : %d\n", ret); + if (ar->ab->hw_params->current_cc_support) { + memcpy(¤t_arg.alpha2, request->alpha2, 2); + memcpy(&ar->alpha2, ¤t_arg.alpha2, 2); + ret = ath12k_wmi_send_set_current_country_cmd(ar, ¤t_arg); + if (ret) + ath12k_warn(ar->ab, + "failed set current country code: %d\n", ret); + } else { + arg.flags = ALPHA_IS_SET; + memcpy(&arg.cc_info.alpha2, request->alpha2, 2); + arg.cc_info.alpha2[2] = 0; + + ret = ath12k_wmi_send_init_country_cmd(ar, &arg); + if (ret) + ath12k_warn(ar->ab, + "failed set INIT Country code: %d\n", ret); + } + + wiphy_lock(wiphy); + ath12k_mac_11d_scan_stop(ar); + wiphy_unlock(wiphy); + + ar->regdom_set_by_user = true; } } -int ath12k_reg_update_chan_list(struct ath12k *ar) +int ath12k_reg_update_chan_list(struct ath12k *ar, bool wait) { struct ieee80211_supported_band **bands; struct ath12k_wmi_scan_chan_list_arg *arg; @@ -106,7 +137,35 @@ int ath12k_reg_update_chan_list(struct ath12k *ar) struct ath12k_wmi_channel_arg *ch; enum nl80211_band band; int num_channels = 0; - int i, ret; + int i, ret, left; + + if (wait && ar->state_11d != ATH12K_11D_IDLE) { + left = wait_for_completion_timeout(&ar->completed_11d_scan, + ATH12K_SCAN_TIMEOUT_HZ); + if (!left) { + ath12k_dbg(ar->ab, ATH12K_DBG_REG, + "failed to receive 11d scan complete: timed out\n"); + ar->state_11d = ATH12K_11D_IDLE; + } + ath12k_dbg(ar->ab, ATH12K_DBG_REG, + "reg 11d scan wait left time %d\n", left); + } + + if (wait && + (ar->scan.state == ATH12K_SCAN_STARTING || + ar->scan.state == ATH12K_SCAN_RUNNING)) { + left = wait_for_completion_timeout(&ar->scan.completed, + ATH12K_SCAN_TIMEOUT_HZ); + if (!left) + ath12k_dbg(ar->ab, ATH12K_DBG_REG, + "failed to receive hw scan complete: timed out\n"); + + ath12k_dbg(ar->ab, ATH12K_DBG_REG, + "reg hw scan wait left time %d\n", left); + } + + if (ar->ah->state == ATH12K_HW_STATE_RESTARTING) + return 0; bands = hw->wiphy->bands; for (band = 0; band < NUM_NL80211_BANDS; band++) { @@ -206,15 +265,57 @@ static void ath12k_copy_regd(struct ieee80211_regdomain *regd_orig, int ath12k_regd_update(struct ath12k *ar, bool init) { + u32 phy_id, freq_low = 0, freq_high = 0, supported_bands, band; + struct ath12k_wmi_hal_reg_capabilities_ext_arg *reg_cap; struct ath12k_hw *ah = ath12k_ar_to_ah(ar); struct ieee80211_hw *hw = ah->hw; struct ieee80211_regdomain *regd, *regd_copy = NULL; int ret, regd_len, pdev_id; struct ath12k_base *ab; - int i; ab = ar->ab; + supported_bands = ar->pdev->cap.supported_bands; + if (supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { + band = NL80211_BAND_2GHZ; + } else if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP && !ar->supports_6ghz) { + band = NL80211_BAND_5GHZ; + } else if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { + band = NL80211_BAND_6GHZ; + } else { + /* This condition is not expected. + */ + WARN_ON(1); + ret = -EINVAL; + goto err; + } + + reg_cap = &ab->hal_reg_cap[ar->pdev_idx]; + + if (ab->hw_params->single_pdev_only && !ar->supports_6ghz) { + phy_id = ar->pdev->cap.band[band].phy_id; + reg_cap = &ab->hal_reg_cap[phy_id]; + } + + /* Possible that due to reg change, current limits for supported + * frequency changed. Update that + */ + if (supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { + freq_low = max(reg_cap->low_2ghz_chan, ab->reg_freq_2ghz.start_freq); + freq_high = min(reg_cap->high_2ghz_chan, ab->reg_freq_2ghz.end_freq); + } else if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP && !ar->supports_6ghz) { + freq_low = max(reg_cap->low_5ghz_chan, ab->reg_freq_5ghz.start_freq); + freq_high = min(reg_cap->high_5ghz_chan, ab->reg_freq_5ghz.end_freq); + } else if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { + freq_low = max(reg_cap->low_5ghz_chan, ab->reg_freq_6ghz.start_freq); + freq_high = min(reg_cap->high_5ghz_chan, ab->reg_freq_6ghz.end_freq); + } + + ath12k_mac_update_freq_range(ar, freq_low, freq_high); + + ath12k_dbg(ab, ATH12K_DBG_REG, "pdev %u reg updated freq limits %u->%u MHz\n", + ar->pdev->pdev_id, freq_low, freq_high); + /* If one of the radios within ah has already updated the regd for * the wiphy, then avoid setting regd again */ @@ -275,11 +376,7 @@ int ath12k_regd_update(struct ath12k *ar, bool init) goto err; } - rtnl_lock(); - wiphy_lock(hw->wiphy); - ret = regulatory_set_wiphy_regd_sync(hw->wiphy, regd_copy); - wiphy_unlock(hw->wiphy); - rtnl_unlock(); + ret = regulatory_set_wiphy_regd(hw->wiphy, regd_copy); kfree(regd_copy); @@ -290,15 +387,7 @@ int ath12k_regd_update(struct ath12k *ar, bool init) goto skip; ah->regd_updated = true; - /* Apply the new regd to all the radios, this is expected to be received only once - * since we check for ah->regd_updated and allow here only once. - */ - for_each_ar(ah, ar, i) { - ab = ar->ab; - ret = ath12k_reg_update_chan_list(ar); - if (ret) - goto err; - } + skip: return 0; err: @@ -611,6 +700,16 @@ ath12k_reg_update_weather_radar_band(struct ath12k_base *ab, *rule_idx = i; } +static void ath12k_reg_update_freq_range(struct ath12k_reg_freq *reg_freq, + struct ath12k_reg_rule *reg_rule) +{ + if (reg_freq->start_freq > reg_rule->start_freq) + reg_freq->start_freq = reg_rule->start_freq; + + if (reg_freq->end_freq < reg_rule->end_freq) + reg_freq->end_freq = reg_rule->end_freq; +} + struct ieee80211_regdomain * ath12k_reg_build_regd(struct ath12k_base *ab, struct ath12k_reg_info *reg_info, bool intersect) @@ -654,6 +753,16 @@ ath12k_reg_build_regd(struct ath12k_base *ab, "\r\nCountry %s, CFG Regdomain %s FW Regdomain %d, num_reg_rules %d\n", alpha2, ath12k_reg_get_regdom_str(tmp_regd->dfs_region), reg_info->dfs_region, num_rules); + + /* Reset start and end frequency for each band + */ + ab->reg_freq_5ghz.start_freq = INT_MAX; + ab->reg_freq_5ghz.end_freq = 0; + ab->reg_freq_2ghz.start_freq = INT_MAX; + ab->reg_freq_2ghz.end_freq = 0; + ab->reg_freq_6ghz.start_freq = INT_MAX; + ab->reg_freq_6ghz.end_freq = 0; + /* Update reg_rules[] below. Firmware is expected to * send these rules in order(2G rules first and then 5G) */ @@ -664,6 +773,7 @@ ath12k_reg_build_regd(struct ath12k_base *ab, max_bw = min_t(u16, reg_rule->max_bw, reg_info->max_bw_2g); flags = 0; + ath12k_reg_update_freq_range(&ab->reg_freq_2ghz, reg_rule); } else if (reg_info->num_5g_reg_rules && (j < reg_info->num_5g_reg_rules)) { reg_rule = reg_info->reg_rules_5g_ptr + j++; @@ -677,6 +787,7 @@ ath12k_reg_build_regd(struct ath12k_base *ab, * per other BW rule flags we pass from here */ flags = NL80211_RRF_AUTO_BW; + ath12k_reg_update_freq_range(&ab->reg_freq_5ghz, reg_rule); } else if (reg_info->is_ext_reg_event && reg_info->num_6g_reg_rules_ap[WMI_REG_INDOOR_AP] && (k < reg_info->num_6g_reg_rules_ap[WMI_REG_INDOOR_AP])) { @@ -684,6 +795,7 @@ ath12k_reg_build_regd(struct ath12k_base *ab, max_bw = min_t(u16, reg_rule->max_bw, reg_info->max_bw_6g_ap[WMI_REG_INDOOR_AP]); flags = NL80211_RRF_AUTO_BW; + ath12k_reg_update_freq_range(&ab->reg_freq_6ghz, reg_rule); } else { break; } @@ -770,6 +882,7 @@ void ath12k_regd_update_work(struct work_struct *work) void ath12k_reg_init(struct ieee80211_hw *hw) { hw->wiphy->regulatory_flags = REGULATORY_WIPHY_SELF_MANAGED; + hw->wiphy->flags |= WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER; hw->wiphy->reg_notifier = ath12k_reg_notifier; } @@ -777,8 +890,12 @@ void ath12k_reg_free(struct ath12k_base *ab) { int i; + mutex_lock(&ab->core_lock); for (i = 0; i < ab->hw_params->max_radios; i++) { kfree(ab->default_regd[i]); kfree(ab->new_regd[i]); + ab->default_regd[i] = NULL; + ab->new_regd[i] = NULL; } + mutex_unlock(&ab->core_lock); } diff --git a/drivers/net/wireless/ath/ath12k/reg.h b/drivers/net/wireless/ath/ath12k/reg.h index 75f80df2aa0c..b1eb584ff34c 100644 --- a/drivers/net/wireless/ath/ath12k/reg.h +++ b/drivers/net/wireless/ath/ath12k/reg.h @@ -99,6 +99,6 @@ struct ieee80211_regdomain *ath12k_reg_build_regd(struct ath12k_base *ab, struct ath12k_reg_info *reg_info, bool intersect); int ath12k_regd_update(struct ath12k *ar, bool init); -int ath12k_reg_update_chan_list(struct ath12k *ar); +int ath12k_reg_update_chan_list(struct ath12k *ar, bool wait); #endif diff --git a/drivers/net/wireless/ath/ath12k/testmode.c b/drivers/net/wireless/ath/ath12k/testmode.c index 18d56a976dc7..fb6af7ccf71f 100644 --- a/drivers/net/wireless/ath/ath12k/testmode.c +++ b/drivers/net/wireless/ath/ath12k/testmode.c @@ -97,7 +97,7 @@ void ath12k_tm_process_event(struct ath12k_base *ab, u32 cmd_id, u8 const *buf_pos; ath12k_dbg(ab, ATH12K_DBG_TESTMODE, - "testmode event wmi cmd_id %d ftm event msg %pK datalen %d\n", + "testmode event wmi cmd_id %d ftm event msg %p datalen %d\n", cmd_id, ftm_msg, length); ath12k_dbg_dump(ab, ATH12K_DBG_TESTMODE, NULL, "", ftm_msg, length); pdev_id = DP_HW2SW_MACID(le32_to_cpu(ftm_msg->seg_hdr.pdev_id)); @@ -227,7 +227,7 @@ static int ath12k_tm_cmd_process_ftm(struct ath12k *ar, struct nlattr *tb[]) buf_len = nla_len(tb[ATH_TM_ATTR_DATA]); cmd_id = WMI_PDEV_UTF_CMDID; ath12k_dbg(ar->ab, ATH12K_DBG_TESTMODE, - "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n", + "testmode cmd wmi cmd_id %d buf %p buf_len %d\n", cmd_id, buf, buf_len); ath12k_dbg_dump(ar->ab, ATH12K_DBG_TESTMODE, NULL, "", buf, buf_len); bufpos = buf; diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 6d1ea5f3a791..ea303dca38b5 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -29,6 +29,7 @@ struct ath12k_wmi_svc_ready_parse { struct wmi_tlv_fw_stats_parse { const struct wmi_stats_event *ev; + struct ath12k_fw_stats *stats; }; struct ath12k_wmi_dma_ring_caps_parse { @@ -177,6 +178,8 @@ static const struct ath12k_wmi_tlv_policy ath12k_wmi_tlv_policies[] = { .min_len = sizeof(struct ath12k_wmi_p2p_noa_info) }, [WMI_TAG_P2P_NOA_EVENT] = { .min_len = sizeof(struct wmi_p2p_noa_event) }, + [WMI_TAG_11D_NEW_COUNTRY_EVENT] = { + .min_len = sizeof(struct wmi_11d_new_cc_event) }, }; __le32 ath12k_wmi_tlv_hdr(u32 cmd, u32 len) @@ -520,10 +523,10 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, * band to band for a single radio, need to see how this should be * handled. */ - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { pdev_cap->tx_chain_mask = le32_to_cpu(mac_caps->tx_chain_mask_2g); pdev_cap->rx_chain_mask = le32_to_cpu(mac_caps->rx_chain_mask_2g); - } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { pdev_cap->vht_cap = le32_to_cpu(mac_caps->vht_cap_info_5g); pdev_cap->vht_mcs = le32_to_cpu(mac_caps->vht_supp_mcs_5g); pdev_cap->he_mcs = le32_to_cpu(mac_caps->he_supp_mcs_5g); @@ -546,7 +549,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, pdev_cap->rx_chain_mask_shift = find_first_bit((unsigned long *)&pdev_cap->rx_chain_mask, 32); - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = le32_to_cpu(mac_caps->max_bw_supported_2g); @@ -566,7 +569,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, le32_to_cpu(mac_caps->he_ppet2g.ppet16_ppet8_ru3_ru0[i]); } - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = @@ -1037,14 +1040,32 @@ int ath12k_wmi_vdev_down(struct ath12k *ar, u8 vdev_id) static void ath12k_wmi_put_wmi_channel(struct ath12k_wmi_channel_params *chan, struct wmi_vdev_start_req_arg *arg) { + u32 center_freq1 = arg->band_center_freq1; + memset(chan, 0, sizeof(*chan)); chan->mhz = cpu_to_le32(arg->freq); - chan->band_center_freq1 = cpu_to_le32(arg->band_center_freq1); - if (arg->mode == MODE_11AC_VHT80_80) + chan->band_center_freq1 = cpu_to_le32(center_freq1); + if (arg->mode == MODE_11BE_EHT320) { + if (arg->freq > center_freq1) + chan->band_center_freq1 = cpu_to_le32(center_freq1 + 80); + else + chan->band_center_freq1 = cpu_to_le32(center_freq1 - 80); + + chan->band_center_freq2 = cpu_to_le32(center_freq1); + + } else if (arg->mode == MODE_11BE_EHT160) { + if (arg->freq > center_freq1) + chan->band_center_freq1 = cpu_to_le32(center_freq1 + 40); + else + chan->band_center_freq1 = cpu_to_le32(center_freq1 - 40); + + chan->band_center_freq2 = cpu_to_le32(center_freq1); + } else if (arg->mode == MODE_11BE_EHT80_80) { chan->band_center_freq2 = cpu_to_le32(arg->band_center_freq2); - else + } else { chan->band_center_freq2 = 0; + } chan->info |= le32_encode_bits(arg->mode, WMI_CHAN_INFO_MODE); if (arg->passive) @@ -2351,7 +2372,7 @@ skip_ml_params: for (i = 0; i < arg->peer_eht_mcs_count; i++) { eht_mcs = ptr; - eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_HE_RATE_SET, + eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_EHT_RATE_SET, sizeof(*eht_mcs)); eht_mcs->rx_mcs_set = cpu_to_le32(arg->peer_eht_rx_mcs_set[i]); @@ -2591,7 +2612,10 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar, cmd->scan_id = cpu_to_le32(arg->scan_id); cmd->scan_req_id = cpu_to_le32(arg->scan_req_id); cmd->vdev_id = cpu_to_le32(arg->vdev_id); - cmd->scan_priority = cpu_to_le32(arg->scan_priority); + if (ar->state_11d == ATH12K_11D_PREPARING) + arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM; + else + arg->scan_priority = WMI_SCAN_PRIORITY_LOW; cmd->notify_scan_events = cpu_to_le32(arg->notify_scan_events); ath12k_wmi_copy_scan_event_cntrl_flags(cmd, arg); @@ -3313,6 +3337,110 @@ out: return ret; } +int ath12k_wmi_send_set_current_country_cmd(struct ath12k *ar, + struct wmi_set_current_country_arg *arg) +{ + struct ath12k_wmi_pdev *wmi = ar->wmi; + struct wmi_set_current_country_cmd *cmd; + struct sk_buff *skb; + int ret; + + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd)); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_set_current_country_cmd *)skb->data; + cmd->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_SET_CURRENT_COUNTRY_CMD, + sizeof(*cmd)); + + cmd->pdev_id = cpu_to_le32(ar->pdev->pdev_id); + memcpy(&cmd->new_alpha2, &arg->alpha2, sizeof(arg->alpha2)); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_SET_CURRENT_COUNTRY_CMDID); + + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, + "set current country pdev id %d alpha2 %c%c\n", + ar->pdev->pdev_id, + arg->alpha2[0], + arg->alpha2[1]); + + if (ret) { + ath12k_warn(ar->ab, + "failed to send WMI_SET_CURRENT_COUNTRY_CMDID: %d\n", ret); + dev_kfree_skb(skb); + } + + return ret; +} + +int ath12k_wmi_send_11d_scan_start_cmd(struct ath12k *ar, + struct wmi_11d_scan_start_arg *arg) +{ + struct ath12k_wmi_pdev *wmi = ar->wmi; + struct wmi_11d_scan_start_cmd *cmd; + struct sk_buff *skb; + int ret; + + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd)); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_11d_scan_start_cmd *)skb->data; + cmd->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_11D_SCAN_START_CMD, + sizeof(*cmd)); + + cmd->vdev_id = cpu_to_le32(arg->vdev_id); + cmd->scan_period_msec = cpu_to_le32(arg->scan_period_msec); + cmd->start_interval_msec = cpu_to_le32(arg->start_interval_msec); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_11D_SCAN_START_CMDID); + + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, + "send 11d scan start vdev id %d period %d ms internal %d ms\n", + arg->vdev_id, arg->scan_period_msec, + arg->start_interval_msec); + + if (ret) { + ath12k_warn(ar->ab, + "failed to send WMI_11D_SCAN_START_CMDID: %d\n", ret); + dev_kfree_skb(skb); + } + + return ret; +} + +int ath12k_wmi_send_11d_scan_stop_cmd(struct ath12k *ar, u32 vdev_id) +{ + struct ath12k_wmi_pdev *wmi = ar->wmi; + struct wmi_11d_scan_stop_cmd *cmd; + struct sk_buff *skb; + int ret; + + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd)); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_11d_scan_stop_cmd *)skb->data; + cmd->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_11D_SCAN_STOP_CMD, + sizeof(*cmd)); + + cmd->vdev_id = cpu_to_le32(vdev_id); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_11D_SCAN_STOP_CMDID); + + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, + "send 11d scan stop vdev id %d\n", + cmd->vdev_id); + + if (ret) { + ath12k_warn(ar->ab, + "failed to send WMI_11D_SCAN_STOP_CMDID: %d\n", ret); + dev_kfree_skb(skb); + } + + return ret; +} + int ath12k_wmi_send_twt_enable_cmd(struct ath12k *ar, u32 pdev_id) { @@ -3646,15 +3774,15 @@ ath12k_fill_band_to_mac_param(struct ath12k_base *soc, arg[i].pdev_id = pdev->pdev_id; switch (pdev->cap.supported_bands) { - case WMI_HOST_WLAN_2G_5G_CAP: + case WMI_HOST_WLAN_2GHZ_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; - case WMI_HOST_WLAN_2G_CAP: + case WMI_HOST_WLAN_2GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_2ghz_chan; break; - case WMI_HOST_WLAN_5G_CAP: + case WMI_HOST_WLAN_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_5ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; @@ -3665,7 +3793,8 @@ ath12k_fill_band_to_mac_param(struct ath12k_base *soc, } static void -ath12k_wmi_copy_resource_config(struct ath12k_wmi_resource_config_params *wmi_cfg, +ath12k_wmi_copy_resource_config(struct ath12k_base *ab, + struct ath12k_wmi_resource_config_params *wmi_cfg, struct ath12k_wmi_resource_config_arg *tg_cfg) { wmi_cfg->num_vdevs = cpu_to_le32(tg_cfg->num_vdevs); @@ -3732,6 +3861,9 @@ ath12k_wmi_copy_resource_config(struct ath12k_wmi_resource_config_params *wmi_cf WMI_RSRC_CFG_FLAGS2_RX_PEER_METADATA_VERSION); wmi_cfg->host_service_flags = cpu_to_le32(tg_cfg->is_reg_cc_ext_event_supported << WMI_RSRC_CFG_HOST_SVC_FLAG_REG_CC_EXT_SUPPORT_BIT); + if (ab->hw_params->reoq_lut_support) + wmi_cfg->host_service_flags |= + cpu_to_le32(1 << WMI_RSRC_CFG_HOST_SVC_FLAG_REO_QREF_SUPPORT_BIT); wmi_cfg->ema_max_vap_cnt = cpu_to_le32(tg_cfg->ema_max_vap_cnt); wmi_cfg->ema_max_profile_period = cpu_to_le32(tg_cfg->ema_max_profile_period); wmi_cfg->flags2 |= cpu_to_le32(WMI_RSRC_CFG_FLAGS2_CALC_NEXT_DTIM_COUNT_SET); @@ -3772,7 +3904,7 @@ static int ath12k_init_cmd_send(struct ath12k_wmi_pdev *wmi, ptr = skb->data + sizeof(*cmd); cfg = ptr; - ath12k_wmi_copy_resource_config(cfg, &arg->res_cfg); + ath12k_wmi_copy_resource_config(ab, cfg, &arg->res_cfg); cfg->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_RESOURCE_CONFIG, sizeof(*cfg)); @@ -4699,7 +4831,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, bands = pdev->cap.supported_bands; } - if (bands & WMI_HOST_WLAN_2G_CAP) { + if (bands & WMI_HOST_WLAN_2GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_2GHZ, caps->eht_cap_mac_info_2ghz, caps->eht_cap_phy_info_2ghz, @@ -4708,7 +4840,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, caps->eht_cap_info_internal); } - if (bands & WMI_HOST_WLAN_5G_CAP) { + if (bands & WMI_HOST_WLAN_5GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_5GHZ, caps->eht_cap_mac_info_5ghz, caps->eht_cap_phy_info_5ghz, @@ -4922,7 +5054,7 @@ static u8 ath12k_wmi_ignore_num_extra_rules(struct ath12k_wmi_reg_rule_ext_param for (count = 0; count < num_reg_rules; count++) { start_freq = le32_get_bits(rule[count].freq_info, REG_RULE_START_FREQ); - if (start_freq >= ATH12K_MIN_6G_FREQ) + if (start_freq >= ATH12K_MIN_6GHZ_FREQ) num_invalid_5ghz_rules++; } @@ -4992,9 +5124,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { num_6g_reg_rules_ap[i] = reg_info->num_6g_reg_rules_ap[i]; - if (num_6g_reg_rules_ap[i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_ap[i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6G reg rules for AP mode(%d) exceeds max limit (num_6g_reg_rules_ap: %d, max_rules: %d)\n", - i, num_6g_reg_rules_ap[i], MAX_6G_REG_RULES); + i, num_6g_reg_rules_ap[i], MAX_6GHZ_REG_RULES); kfree(tb); return -EINVAL; } @@ -5015,9 +5147,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, reg_info->num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; total_reg_rules += num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; - if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6g client reg rules exceeds max limit, for client(type: %d)\n", i); kfree(tb); @@ -5933,6 +6065,50 @@ static void ath12k_wmi_op_ep_tx_credits(struct ath12k_base *ab) wake_up(&ab->wmi_ab.tx_credits_wq); } +static int ath12k_reg_11d_new_cc_event(struct ath12k_base *ab, struct sk_buff *skb) +{ + const struct wmi_11d_new_cc_event *ev; + struct ath12k *ar; + struct ath12k_pdev *pdev; + const void **tb; + int ret, i; + + tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath12k_warn(ab, "failed to parse tlv: %d\n", ret); + return ret; + } + + ev = tb[WMI_TAG_11D_NEW_COUNTRY_EVENT]; + if (!ev) { + kfree(tb); + ath12k_warn(ab, "failed to fetch 11d new cc ev"); + return -EPROTO; + } + + spin_lock_bh(&ab->base_lock); + memcpy(&ab->new_alpha2, &ev->new_alpha2, REG_ALPHA2_LEN); + spin_unlock_bh(&ab->base_lock); + + ath12k_dbg(ab, ATH12K_DBG_WMI, "wmi 11d new cc %c%c\n", + ab->new_alpha2[0], + ab->new_alpha2[1]); + + kfree(tb); + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + ar->state_11d = ATH12K_11D_IDLE; + complete(&ar->completed_11d_scan); + } + + queue_work(ab->workqueue, &ab->update_11d_work); + + return 0; +} + static void ath12k_wmi_htc_tx_complete(struct ath12k_base *ab, struct sk_buff *skb) { @@ -6317,13 +6493,13 @@ static void ath12k_mgmt_rx_event(struct ath12k_base *ab, struct sk_buff *skb) if (rx_ev.status & WMI_RX_STATUS_ERR_MIC) status->flag |= RX_FLAG_MMIC_ERROR; - if (rx_ev.chan_freq >= ATH12K_MIN_6G_FREQ && - rx_ev.chan_freq <= ATH12K_MAX_6G_FREQ) { + if (rx_ev.chan_freq >= ATH12K_MIN_6GHZ_FREQ && + rx_ev.chan_freq <= ATH12K_MAX_6GHZ_FREQ) { status->band = NL80211_BAND_6GHZ; status->freq = rx_ev.chan_freq; } else if (rx_ev.channel >= 1 && rx_ev.channel <= 14) { status->band = NL80211_BAND_2GHZ; - } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5G_CHAN) { + } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5GHZ_CHAN) { status->band = NL80211_BAND_5GHZ; } else { /* Shouldn't happen unless list of advertised channels to @@ -7216,7 +7392,7 @@ void ath12k_wmi_fw_stats_dump(struct ath12k *ar, else buf[len] = 0; - ath12k_debugfs_fw_stats_reset(ar); + ath12k_fw_stats_reset(ar); } static void @@ -7335,7 +7511,7 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, u16 len) { const struct wmi_stats_event *ev = parse->ev; - struct ath12k_fw_stats stats = {0}; + struct ath12k_fw_stats *stats = parse->stats; struct ath12k *ar; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -7344,18 +7520,18 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, int i, ret = 0; const void *data = ptr; - INIT_LIST_HEAD(&stats.vdevs); - INIT_LIST_HEAD(&stats.bcn); - INIT_LIST_HEAD(&stats.pdevs); - if (!ev) { ath12k_warn(ab, "failed to fetch update stats ev"); return -EPROTO; } + if (!stats) + return -EINVAL; + rcu_read_lock(); - ar = ath12k_mac_get_ar_by_pdev_id(ab, le32_to_cpu(ev->pdev_id)); + stats->pdev_id = le32_to_cpu(ev->pdev_id); + ar = ath12k_mac_get_ar_by_pdev_id(ab, stats->pdev_id); if (!ar) { ath12k_warn(ab, "invalid pdev id %d in update stats event\n", le32_to_cpu(ev->pdev_id)); @@ -7398,8 +7574,8 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, if (!dst) continue; ath12k_wmi_pull_vdev_stats(src, dst); - stats.stats_id = WMI_REQUEST_VDEV_STAT; - list_add_tail(&dst->list, &stats.vdevs); + stats->stats_id = WMI_REQUEST_VDEV_STAT; + list_add_tail(&dst->list, &stats->vdevs); } for (i = 0; i < le32_to_cpu(ev->num_bcn_stats); i++) { const struct ath12k_wmi_bcn_stats_params *src; @@ -7417,8 +7593,8 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, if (!dst) continue; ath12k_wmi_pull_bcn_stats(src, dst); - stats.stats_id = WMI_REQUEST_BCN_STAT; - list_add_tail(&dst->list, &stats.bcn); + stats->stats_id = WMI_REQUEST_BCN_STAT; + list_add_tail(&dst->list, &stats->bcn); } for (i = 0; i < le32_to_cpu(ev->num_pdev_stats); i++) { const struct ath12k_wmi_pdev_stats_params *src; @@ -7430,7 +7606,7 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, goto exit; } - stats.stats_id = WMI_REQUEST_PDEV_STAT; + stats->stats_id = WMI_REQUEST_PDEV_STAT; data += sizeof(*src); len -= sizeof(*src); @@ -7442,11 +7618,9 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, ath12k_wmi_pull_pdev_stats_base(&src->base, dst); ath12k_wmi_pull_pdev_stats_tx(&src->tx, dst); ath12k_wmi_pull_pdev_stats_rx(&src->rx, dst); - list_add_tail(&dst->list, &stats.pdevs); + list_add_tail(&dst->list, &stats->pdevs); } - complete(&ar->fw_stats_complete); - ath12k_debugfs_fw_stats_process(ar, &stats); exit: rcu_read_unlock(); return ret; @@ -7472,16 +7646,74 @@ static int ath12k_wmi_tlv_fw_stats_parse(struct ath12k_base *ab, return ret; } +static int ath12k_wmi_pull_fw_stats(struct ath12k_base *ab, struct sk_buff *skb, + struct ath12k_fw_stats *stats) +{ + struct wmi_tlv_fw_stats_parse parse = {}; + + stats->stats_id = 0; + parse.stats = stats; + + return ath12k_wmi_tlv_iter(ab, skb->data, skb->len, + ath12k_wmi_tlv_fw_stats_parse, + &parse); +} + static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *skb) { + struct ath12k_fw_stats stats = {}; + struct ath12k *ar; int ret; - struct wmi_tlv_fw_stats_parse parse = {}; - ret = ath12k_wmi_tlv_iter(ab, skb->data, skb->len, - ath12k_wmi_tlv_fw_stats_parse, - &parse); - if (ret) - ath12k_warn(ab, "failed to parse fw stats %d\n", ret); + INIT_LIST_HEAD(&stats.pdevs); + INIT_LIST_HEAD(&stats.vdevs); + INIT_LIST_HEAD(&stats.bcn); + + ret = ath12k_wmi_pull_fw_stats(ab, skb, &stats); + if (ret) { + ath12k_warn(ab, "failed to pull fw stats: %d\n", ret); + goto free; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "event update stats"); + + rcu_read_lock(); + ar = ath12k_mac_get_ar_by_pdev_id(ab, stats.pdev_id); + if (!ar) { + rcu_read_unlock(); + ath12k_warn(ab, "failed to get ar for pdev_id %d: %d\n", + stats.pdev_id, ret); + goto free; + } + + spin_lock_bh(&ar->data_lock); + + /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + * debugfs fw stats. Therefore, processing it separately. + */ + if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { + list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); + ar->fw_stats.fw_stats_done = true; + goto complete; + } + + /* WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT are currently requested only + * via debugfs fw stats. Hence, processing these in debugfs context. + */ + ath12k_debugfs_fw_stats_process(ar, &stats); + +complete: + complete(&ar->fw_stats_complete); + spin_unlock_bh(&ar->data_lock); + rcu_read_unlock(); + + /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised + * at this point, no need to free the individual list. + */ + return; + +free: + ath12k_fw_stats_free(&stats); } /* PDEV_CTL_FAILSAFE_CHECK_EVENT is received from FW when the frequency scanned @@ -8640,6 +8872,9 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) case WMI_HALPHY_STATS_CTRL_PATH_EVENTID: ath12k_wmi_process_tpc_stats(ab, skb); break; + case WMI_11D_NEW_COUNTRY_EVENTID: + ath12k_reg_11d_new_cc_event(ab, skb); + break; /* add Unsupported events (rare) here */ case WMI_TBTTOFFSET_EXT_UPDATE_EVENTID: case WMI_PEER_OPER_MODE_CHANGE_EVENTID: diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 1ba33e30ddd2..80fdbc566518 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -216,9 +216,9 @@ enum wmi_host_hw_mode_priority { }; enum WMI_HOST_WLAN_BAND { - WMI_HOST_WLAN_2G_CAP = 1, - WMI_HOST_WLAN_5G_CAP = 2, - WMI_HOST_WLAN_2G_5G_CAP = 3, + WMI_HOST_WLAN_2GHZ_CAP = 1, + WMI_HOST_WLAN_5GHZ_CAP = 2, + WMI_HOST_WLAN_2GHZ_5GHZ_CAP = 3, }; enum wmi_cmd_group { @@ -2461,6 +2461,7 @@ struct wmi_init_cmd { } __packed; #define WMI_RSRC_CFG_HOST_SVC_FLAG_REG_CC_EXT_SUPPORT_BIT 4 +#define WMI_RSRC_CFG_HOST_SVC_FLAG_REO_QREF_SUPPORT_BIT 12 #define WMI_RSRC_CFG_FLAGS2_RX_PEER_METADATA_VERSION GENMASK(5, 4) #define WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64 BIT(5) #define WMI_RSRC_CFG_FLAGS2_CALC_NEXT_DTIM_COUNT_SET BIT(9) @@ -2690,8 +2691,8 @@ enum wmi_channel_width { * 2 - index for 160 MHz, first 3 bytes valid * 3 - index for 320 MHz, first 3 bytes valid */ -#define WMI_MAX_EHT_SUPP_MCS_2G_SIZE 2 -#define WMI_MAX_EHT_SUPP_MCS_5G_SIZE 4 +#define WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE 2 +#define WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE 4 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_80 0 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_160 1 @@ -2730,8 +2731,8 @@ struct ath12k_wmi_caps_ext_params { struct ath12k_wmi_ppe_threshold_params eht_ppet_2ghz; struct ath12k_wmi_ppe_threshold_params eht_ppet_5ghz; __le32 eht_cap_info_internal; - __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2G_SIZE]; - __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5G_SIZE]; + __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE]; + __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE]; __le32 eml_capability; __le32 mld_capability; } __packed; @@ -4026,6 +4027,28 @@ struct wmi_init_country_cmd { } cc_info; } __packed; +struct wmi_11d_scan_start_arg { + u32 vdev_id; + u32 scan_period_msec; + u32 start_interval_msec; +}; + +struct wmi_11d_scan_start_cmd { + __le32 tlv_header; + __le32 vdev_id; + __le32 scan_period_msec; + __le32 start_interval_msec; +} __packed; + +struct wmi_11d_scan_stop_cmd { + __le32 tlv_header; + __le32 vdev_id; +} __packed; + +struct wmi_11d_new_cc_event { + __le32 new_alpha2; +} __packed; + struct wmi_delba_send_cmd { __le32 tlv_header; __le32 vdev_id; @@ -4108,7 +4131,17 @@ struct ath12k_wmi_eht_rate_set_params { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 -#define MAX_6G_REG_RULES 5 +#define MAX_6GHZ_REG_RULES 5 + +struct wmi_set_current_country_arg { + u8 alpha2[REG_ALPHA2_LEN]; +}; + +struct wmi_set_current_country_cmd { + __le32 tlv_header; + __le32 pdev_id; + __le32 new_alpha2; +} __packed; enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, @@ -5990,11 +6023,17 @@ int ath12k_wmi_send_bcn_offload_control_cmd(struct ath12k *ar, u32 vdev_id, u32 bcn_ctrl_op); int ath12k_wmi_send_init_country_cmd(struct ath12k *ar, struct ath12k_wmi_init_country_arg *arg); +int +ath12k_wmi_send_set_current_country_cmd(struct ath12k *ar, + struct wmi_set_current_country_arg *arg); int ath12k_wmi_peer_rx_reorder_queue_setup(struct ath12k *ar, int vdev_id, const u8 *addr, dma_addr_t paddr, u8 tid, u8 ba_window_size_valid, u32 ba_window_size); +int ath12k_wmi_send_11d_scan_start_cmd(struct ath12k *ar, + struct wmi_11d_scan_start_arg *arg); +int ath12k_wmi_send_11d_scan_stop_cmd(struct ath12k *ar, u32 vdev_id); int ath12k_wmi_rx_reord_queue_remove(struct ath12k *ar, struct ath12k_wmi_rx_reorder_queue_remove_arg *arg); diff --git a/drivers/net/wireless/ath/wcn36xx/testmode.c b/drivers/net/wireless/ath/wcn36xx/testmode.c index e5142c052985..d7a2a483cbc4 100644 --- a/drivers/net/wireless/ath/wcn36xx/testmode.c +++ b/drivers/net/wireless/ath/wcn36xx/testmode.c @@ -56,7 +56,7 @@ static int wcn36xx_tm_cmd_ptt(struct wcn36xx *wcn, struct ieee80211_vif *vif, msg = buf; wcn36xx_dbg(WCN36XX_DBG_TESTMODE, - "testmode cmd wmi msg_id 0x%04X msg_len %d buf %pK buf_len %d\n", + "testmode cmd wmi msg_id 0x%04X msg_len %d buf %p buf_len %d\n", msg->msg_id, msg->msg_body_length, buf, buf_len); diff --git a/drivers/net/wireless/ath/wil6210/txrx.h b/drivers/net/wireless/ath/wil6210/txrx.h index 689f68d89a44..33ccd0b248d4 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.h +++ b/drivers/net/wireless/ath/wil6210/txrx.h @@ -7,6 +7,7 @@ #ifndef WIL6210_TXRX_H #define WIL6210_TXRX_H +#include <net/sock.h> #include "wil6210.h" #include "txrx_edma.h" @@ -617,7 +618,7 @@ static inline bool wil_need_txstat(struct sk_buff *skb) const u8 *da = wil_skb_get_da(skb); return is_unicast_ether_addr(da) && skb->sk && - (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS); + sock_flag(skb->sk, SOCK_WIFI_STATUS); } static inline void wil_consume_skb(struct sk_buff *skb, bool acked) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index d06c724f63d9..b056336d5da6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -741,15 +741,19 @@ static int brcmf_usb_dl_cmd(struct brcmf_usbdev_info *devinfo, u8 cmd, void *buffer, int buflen) { int ret; - char *tmpbuf; + char *tmpbuf = NULL; u16 size; - if ((!devinfo) || (devinfo->ctl_urb == NULL)) - return -EINVAL; + if (!devinfo || !devinfo->ctl_urb) { + ret = -EINVAL; + goto err; + } tmpbuf = kmalloc(buflen, GFP_ATOMIC); - if (!tmpbuf) - return -ENOMEM; + if (!tmpbuf) { + ret = -ENOMEM; + goto err; + } size = buflen; devinfo->ctl_urb->transfer_buffer_length = size; @@ -770,18 +774,23 @@ static int brcmf_usb_dl_cmd(struct brcmf_usbdev_info *devinfo, u8 cmd, ret = usb_submit_urb(devinfo->ctl_urb, GFP_ATOMIC); if (ret < 0) { brcmf_err("usb_submit_urb failed %d\n", ret); - goto finalize; + goto err; } if (!brcmf_usb_ioctl_resp_wait(devinfo)) { usb_kill_urb(devinfo->ctl_urb); ret = -ETIMEDOUT; + goto err; } else { memcpy(buffer, tmpbuf, buflen); } -finalize: kfree(tmpbuf); + return 0; + +err: + kfree(tmpbuf); + brcmf_err("dl cmd %u failed: err=%d\n", cmd, ret); return ret; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c index 50d817485cf9..0cb64fc56783 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c @@ -198,7 +198,7 @@ #define CST4313_SPROM_OTP_SEL_SHIFT 0 /* 4313 Chip specific ChipControl register bits */ - /* 12 mA drive strengh for later 4313 */ + /* 12 mA drive strength for later 4313 */ #define CCTRL_4313_12MA_LED_DRIVE 0x00000007 /* Manufacturer Ids */ @@ -453,7 +453,7 @@ ai_buscore_setup(struct si_info *sii, struct bcma_device *cc) /* get chipcommon chipstatus */ sii->chipst = bcma_read32(cc, CHIPCREGOFFS(chipstatus)); - /* get chipcommon capabilites */ + /* get chipcommon capabilities */ sii->pub.cccaps = bcma_read32(cc, CHIPCREGOFFS(capabilities)); /* get pmu rev and caps */ @@ -657,7 +657,7 @@ u16 ai_clkctl_fast_pwrup_delay(struct si_pub *sih) } /* - * clock control policy function throught chipcommon + * clock control policy function through chipcommon * * set dynamic clk control mode (forceslow, forcefast, dynamic) * returns true if we are forcing fast clock diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.h index 90b6e3982d2c..e791dd07ca78 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.h @@ -76,7 +76,7 @@ * conventions for the use the flash space: */ -/* Minumum amount of flash we support */ +/* Minimum amount of flash we support */ #define FLASH_MIN 0x00020000 /* Minimum flash size */ #define CC_SROM_OTP 0x800 /* SROM/OTP address space */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/ampdu.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/ampdu.c index a767cbb79185..e1d707a7c964 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/ampdu.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/ampdu.c @@ -479,7 +479,7 @@ void brcms_c_ampdu_reset_session(struct brcms_ampdu_session *session, /* * Preps the given packet for AMPDU based on the session data. If the - * frame cannot be accomodated in the current session, -ENOSPC is + * frame cannot be accommodated in the current session, -ENOSPC is * returned. */ int brcms_c_ampdu_add_frame(struct brcms_ampdu_session *session, @@ -529,7 +529,7 @@ int brcms_c_ampdu_add_frame(struct brcms_ampdu_session *session, } /* - * Now that we're sure this frame can be accomodated, update the + * Now that we're sure this frame can be accommodated, update the * session information. */ session->ampdu_len += len; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c index d1b9a18d0374..3878c4124e25 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c @@ -445,7 +445,7 @@ brcms_c_channel_reg_limits(struct brcms_cm_info *wlc_cm, u16 chanspec, /* * OFDM 40 MHz SISO has the same power as the corresponding - * MCS0-7 rate unless overriden by the locale specific code. + * MCS0-7 rate unless overridden by the locale specific code. * We set this value to 0 as a flag (presumably 0 dBm isn't * a possibility) and then copy the MCS0-7 value to the 40 MHz * value if it wasn't explicitly set. @@ -479,7 +479,7 @@ brcms_c_channel_reg_limits(struct brcms_cm_info *wlc_cm, u16 chanspec, /* * 20 MHz has the same power as the corresponding OFDM rate - * unless overriden by the locale specific code. + * unless overridden by the locale specific code. */ txpwr->mcs_20_siso[i] = txpwr->ofdm[i]; txpwr->mcs_40_siso[i] = 0; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c index 80c35027787a..c739bf7463b3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c @@ -1348,7 +1348,7 @@ static void prep_ampdu_frame(struct dma_info *di, struct sk_buff *p) ret = brcms_c_ampdu_add_frame(session, p); if (ret == -ENOSPC) { /* - * AMPDU cannot accomodate this frame. Close out the in- + * AMPDU cannot accommodate this frame. Close out the in- * progress AMPDU session and start a new one. */ ampdu_finalize(di); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 5b1d35601bbd..1c3d29dca424 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -48,9 +48,9 @@ FIF_BCN_PRBRESP_PROMISC | \ FIF_PSPOLL) -#define CHAN2GHZ(channel, freqency, chflags) { \ +#define CHAN2GHZ(channel, frequency, chflags) { \ .band = NL80211_BAND_2GHZ, \ - .center_freq = (freqency), \ + .center_freq = (frequency), \ .hw_value = (channel), \ .flags = chflags, \ .max_antenna_gain = 0, \ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c index 2738d4d6c60a..c1a9c1e442ee 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c @@ -921,7 +921,7 @@ brcms_c_dotxstatus(struct brcms_c_info *wlc, struct tx_status *txs) * The "fallback limit" is the number of tx attempts a given * MPDU is sent at the "primary" rate. Tx attempts beyond that * limit are sent at the "secondary" rate. - * A 'short frame' does not exceed RTS treshold. + * A 'short frame' does not exceed RTS threshold. */ u16 sfbl, /* Short Frame Rate Fallback Limit */ lfbl, /* Long Frame Rate Fallback Limit */ @@ -6259,7 +6259,7 @@ brcms_c_d11hdrs_mac80211(struct brcms_c_info *wlc, struct ieee80211_hw *hw, } /* - * Currently only support same setting for primay and + * Currently only support same setting for primary and * fallback rates. Unify flags for each rate into a * single value for the frame */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h index 9f76b880814e..b7ca0d9891c4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h @@ -603,7 +603,7 @@ enum brcms_bss_type { * cur_etheraddr: h/w address * flags: BSSCFG flags; see below * - * current_bss: BSS parms in ASSOCIATED state + * current_bss: BSS parameters in ASSOCIATED state * * * ID: 'unique' ID of this bsscfg, assigned at bsscfg allocation diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/pmu.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/pmu.c index 71b80381f3ad..c9a29e626daa 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/pmu.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/pmu.c @@ -31,7 +31,7 @@ #define EXT_ILP_HZ 32768 /* - * Duration for ILP clock frequency measurment in milliseconds + * Duration for ILP clock frequency measurement in milliseconds * * remark: 1000 must be an integer multiple of this duration */ diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 130b9a8aa7eb..67ee3b6e6d85 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -44,6 +44,8 @@ IWL_QU_C_HR_B_FW_PRE "-" __stringify(api) ".ucode" #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ IWL_QU_B_JF_B_FW_PRE "-" __stringify(api) ".ucode" +#define IWL_QU_C_JF_B_MODULE_FIRMWARE(api) \ + IWL_QU_C_JF_B_FW_PRE "-" __stringify(api) ".ucode" #define IWL_CC_A_MODULE_FIRMWARE(api) \ IWL_CC_A_FW_PRE "-" __stringify(api) ".ucode" @@ -422,6 +424,7 @@ const struct iwl_cfg iwl_cfg_quz_a0_hr_b0 = { MODULE_FIRMWARE(IWL_QU_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_QU_C_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QUZ_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h index 4fab6c66994e..3173fa96cb48 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h @@ -527,8 +527,8 @@ enum iwl_fw_ini_time_point { * @IWL_FW_INI_APPLY_POLICY_OVERRIDE_DATA: override trigger data. * Append otherwise * @IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD: send cmd once dump collected - * @IWL_FW_INI_APPLY_POLICY_RESET_HANDSHAKE: perform reset handshake and - * split dump to before/after with region marking + * @IWL_FW_INI_APPLY_POLICY_SPLIT_DUMP_RESET: split this dump into regions + * before and after the reset handshake */ enum iwl_fw_ini_trigger_apply_policy { IWL_FW_INI_APPLY_POLICY_MATCH_TIME_POINT = BIT(0), @@ -537,7 +537,7 @@ enum iwl_fw_ini_trigger_apply_policy { IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG = BIT(9), IWL_FW_INI_APPLY_POLICY_OVERRIDE_DATA = BIT(10), IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD = BIT(16), - IWL_FW_INI_APPLY_POLICY_RESET_HANDSHAKE = BIT(17), + IWL_FW_INI_APPLY_POLICY_SPLIT_DUMP_RESET = BIT(17), }; /** @@ -560,7 +560,7 @@ enum iwl_fw_ini_trigger_reset_fw_policy { * @IWL_FW_INI_DEBUG_DUMP_POLICY_MAX_LIMIT_600KB: mini dump only 600KB region dump * @IWL_FW_IWL_DEBUG_DUMP_POLICY_MAX_LIMIT_5MB: mini dump 5MB size dump * @IWL_FW_IWL_DEBUG_DUMP_POLICY_BEFORE_RESET: dump this region before reset - * handshake (if requested by %IWL_FW_INI_APPLY_POLICY_RESET_HANDSHAKE) + * handshake (if requested by %IWL_FW_INI_APPLY_POLICY_SPLIT_DUMP_RESET) */ enum iwl_fw_ini_dump_policy { IWL_FW_INI_DEBUG_DUMP_POLICY_NO_LIMIT = BIT(0), diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h index 4d8a12799c4d..4594a7c94bd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018, 2020-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018, 2020-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -146,6 +146,7 @@ struct iwl_phy_context_cmd_v1 { * @sbb_ctrl_channel_loc: location of the control channel * @puncture_mask: bitmap of punctured subchannels * @dsp_cfg_flags: set to 0 + * @secondary_ctrl_chnl_loc: location of secondary control channel * @reserved: reserved to align to 64 bit */ struct iwl_phy_context_cmd { @@ -164,11 +165,13 @@ struct iwl_phy_context_cmd { }; }; __le32 dsp_cfg_flags; - __le32 reserved; + u8 secondary_ctrl_chnl_loc; + u8 reserved[3]; } __packed; /* PHY_CONTEXT_CMD_API_VER_3, * PHY_CONTEXT_CMD_API_VER_4, * PHY_CONTEXT_CMD_API_VER_5, - * PHY_CONTEXT_CMD_API_VER_6 + * PHY_CONTEXT_CMD_API_VER_6, + * PHY_CONTEXT_CMD_API_S_VER_7 */ #endif /* __iwl_fw_api_phy_ctxt_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index 37ec26596ee7..1238c23ac48b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -54,7 +54,7 @@ struct iwl_ltr_config_cmd_v1 { * @flags: See &enum iwl_ltr_config_flags * @static_long: static LTR Long register value. * @static_short: static LTR Short register value. - * @ltr_cfg_values: LTR parameters table values (in usec) in folowing order: + * @ltr_cfg_values: LTR parameters table values (in usec) in following order: * TX, RX, Short Idle, Long Idle. Used only if %LTR_CFG_FLAG_UPDATE_VALUES * is set. * @ltr_short_idle_timeout: LTR Short Idle timeout (in usec). Used only if @@ -89,6 +89,7 @@ struct iwl_ltr_config_cmd { * @POWER_FLAGS_LPRX_ENA_MSK: Low Power RX enable. * @POWER_FLAGS_UAPSD_MISBEHAVING_ENA_MSK: AP/GO's uAPSD misbehaving * detection enablement + * @POWER_FLAGS_ENABLE_SMPS_MSK: SMPS is allowed for this vif */ enum iwl_power_flags { POWER_FLAGS_POWER_SAVE_ENA_MSK = BIT(0), @@ -99,6 +100,7 @@ enum iwl_power_flags { POWER_FLAGS_ADVANCE_PM_ENA_MSK = BIT(9), POWER_FLAGS_LPRX_ENA_MSK = BIT(11), POWER_FLAGS_UAPSD_MISBEHAVING_ENA_MSK = BIT(12), + POWER_FLAGS_ENABLE_SMPS_MSK = BIT(14), }; #define IWL_POWER_VEC_SIZE 5 @@ -216,7 +218,6 @@ struct iwl_mac_power_cmd { /* CONTEXT_DESC_API_T_VER_1 */ __le32 id_and_color; - /* CLIENT_PM_POWER_TABLE_S_VER_1 */ __le16 flags; __le16 keep_alive_seconds; __le32 rx_data_timeout; @@ -237,7 +238,7 @@ struct iwl_mac_power_cmd { u8 heavy_rx_thld_percentage; u8 limited_ps_threshold; u8 reserved; -} __packed; +} __packed; /* CLIENT_PM_POWER_TABLE_S_VER_1, VER_2 */ /* * struct iwl_uapsd_misbehaving_ap_notif - FW sends this notification when @@ -690,7 +691,7 @@ struct iwl_sar_offset_mapping_cmd { * Roaming Energy Delta Threshold, otherwise use normal Energy Delta * Threshold. Typical energy threshold is -72dBm. * @bf_temp_threshold: This threshold determines the type of temperature - * filtering (Slow or Fast) that is selected (Units are in Celsuis): + * filtering (Slow or Fast) that is selected (Units are in Celsius): * If the current temperature is above this threshold - Fast filter * will be used, If the current temperature is below this threshold - * Slow filter will be used. @@ -698,12 +699,12 @@ struct iwl_sar_offset_mapping_cmd { * calculated for this and the last passed beacon is greater than this * threshold. Zero value means that the temperature change is ignored for * beacon filtering; beacons will not be forced to be sent to driver - * regardless of whether its temerature has been changed. + * regardless of whether its temperature has been changed. * @bf_temp_slow_filter: Send Beacon to driver if delta in temperature values * calculated for this and the last passed beacon is greater than this * threshold. Zero value means that the temperature change is ignored for * beacon filtering; beacons will not be forced to be sent to driver - * regardless of whether its temerature has been changed. + * regardless of whether its temperature has been changed. * @bf_enable_beacon_filter: 1, beacon filtering is enabled; 0, disabled. * @bf_debug_flag: beacon filtering debug configuration * @bf_escape_timer: Send beacons to to driver if no beacons were passed diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 691c879cb90d..42ddd03c1c65 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -193,10 +193,11 @@ enum iwl_rx_mpdu_amsdu_info { IWL_RX_MPDU_AMSDU_LAST_SUBFRAME = 0x80, }; -#define RX_MPDU_BAND_POS 6 -#define RX_MPDU_BAND_MASK 0xC0 -#define BAND_IN_RX_STATUS(_val) \ - (((_val) & RX_MPDU_BAND_MASK) >> RX_MPDU_BAND_POS) +enum iwl_rx_mpdu_mac_phy_band { + IWL_RX_MPDU_MAC_PHY_BAND_MAC_MASK = 0x0f, + IWL_RX_MPDU_MAC_PHY_BAND_PHY_MASK = 0x30, + IWL_RX_MPDU_MAC_PHY_BAND_BAND_MASK = 0xc0, +}; enum iwl_rx_l3_proto_values { IWL_RX_L3_TYPE_NONE, @@ -668,9 +669,10 @@ struct iwl_rx_mpdu_desc { */ __le16 phy_info; /** - * @mac_phy_idx: MAC/PHY index + * @mac_phy_band: MAC ID, PHY ID, band; + * see &enum iwl_rx_mpdu_mac_phy_band */ - u8 mac_phy_idx; + u8 mac_phy_band; /* DW4 */ union { struct { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h index 0a9f14fb04be..00713a991879 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018, 2020 - 2021, 2023 - 2024 Intel Corporation + * Copyright (C) 2012-2014, 2018, 2020-2021, 2023-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -584,6 +584,9 @@ struct iwl_stats_ntfy_per_phy { __le32 last_tx_ch_width_indx; } __packed; /* STATISTICS_NTFY_PER_PHY_API_S_VER_1 */ +/* unknown channel load (due to not being active on channel) */ +#define IWL_STATS_UNKNOWN_CHANNEL_LOAD 0xffffffff + /** * struct iwl_stats_ntfy_per_sta * diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 03f639fbf9b6..e70eebf079be 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2624,6 +2624,12 @@ enum iwl_dump_ini_region_selector { IWL_INI_DUMP_LATE_REGIONS, }; +static bool iwl_dump_due_to_error(enum iwl_fw_ini_time_point tp_id) +{ + return tp_id == IWL_FW_INI_TIME_POINT_FW_ASSERT || + tp_id == IWL_FW_INI_TIME_POINT_FW_HW_ERROR; +} + static u32 iwl_dump_ini_dump_regions(struct iwl_fw_runtime *fwrt, struct iwl_fwrt_dump_data *dump_data, @@ -2689,8 +2695,7 @@ iwl_dump_ini_dump_regions(struct iwl_fw_runtime *fwrt, * debug data which also need to be collected. */ if (reg_type == IWL_FW_INI_REGION_DRAM_IMR) { - if (tp_id == IWL_FW_INI_TIME_POINT_FW_ASSERT || - tp_id == IWL_FW_INI_TIME_POINT_FW_HW_ERROR) + if (iwl_dump_due_to_error(tp_id)) imr_reg_data->reg_tlv = fwrt->trans->dbg.active_regions[i]; else @@ -2727,7 +2732,7 @@ static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt, ARRAY_SIZE(fwrt->trans->dbg.active_regions)); if (trigger->time_point & - cpu_to_le32(IWL_FW_INI_APPLY_POLICY_RESET_HANDSHAKE)) { + cpu_to_le32(IWL_FW_INI_APPLY_POLICY_SPLIT_DUMP_RESET)) { size += iwl_dump_ini_dump_regions(fwrt, dump_data, list, tp_id, regions_mask, &imr_reg_data, IWL_INI_DUMP_EARLY_REGIONS); @@ -2736,6 +2741,10 @@ static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt, regions_mask, &imr_reg_data, IWL_INI_DUMP_LATE_REGIONS); } else { + if (fw_has_capa(&fwrt->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT) && + iwl_dump_due_to_error(tp_id)) + iwl_trans_pcie_fw_reset_handshake(fwrt->trans); size += iwl_dump_ini_dump_regions(fwrt, dump_data, list, tp_id, regions_mask, &imr_reg_data, IWL_INI_DUMP_ALL_REGIONS); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 9860903ecd3f..5a1ec880ed72 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -102,6 +102,7 @@ enum iwl_ucode_tlv_type { IWL_UCODE_TLV_SEC_TABLE_ADDR = 66, IWL_UCODE_TLV_D3_KEK_KCK_ADDR = 67, IWL_UCODE_TLV_CURRENT_PC = 68, + IWL_UCODE_TLV_FSEQ_BIN_VERSION = 72, IWL_UCODE_TLV_FW_NUM_STATIONS = IWL_UCODE_TLV_CONST_BASE + 0, IWL_UCODE_TLV_FW_NUM_LINKS = IWL_UCODE_TLV_CONST_BASE + 1, @@ -402,6 +403,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_5G9_FOR_CA: supports (de)activating 5G9 * for CA from BIOS. * @IWL_UCODE_TLV_CAPA_UHB_CANADA_TAS_SUPPORT: supports %TAS_UHB_ALLOWED_CANADA + * @IWL_UCODE_TLV_CAPA_EXT_FSEQ_IMAGE_SUPPORT: external FSEQ image support * * @NUM_IWL_UCODE_TLV_CAPA: number of bits used */ @@ -504,6 +506,14 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_MONITOR_PASSIVE_CHANS = (__force iwl_ucode_tlv_capa_t)122, IWL_UCODE_TLV_CAPA_BIOS_OVERRIDE_5G9_FOR_CA = (__force iwl_ucode_tlv_capa_t)123, IWL_UCODE_TLV_CAPA_UHB_CANADA_TAS_SUPPORT = (__force iwl_ucode_tlv_capa_t)124, + IWL_UCODE_TLV_CAPA_EXT_FSEQ_IMAGE_SUPPORT = (__force iwl_ucode_tlv_capa_t)125, + + /* set 4 */ + /** + * @IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT: FW reset handshake is needed + * during assert handling even if the dump isn't split + */ + IWL_UCODE_TLV_CAPA_RESET_DURING_ASSERT = (__force iwl_ucode_tlv_capa_t)(4 * 32 + 0), NUM_IWL_UCODE_TLV_CAPA /* * This construction make both sparse (which cannot increment the previous @@ -994,6 +1004,10 @@ struct iwl_fw_dump_exclude { __le32 addr, size; }; +struct iwl_fw_fseq_bin_version { + __le32 major, minor; +}; /* FW_TLV_FSEQ_BIN_VERSION_S */ + static inline size_t _iwl_tlv_array_len(const struct iwl_ucode_tlv *tlv, size_t fixed_size, size_t var_size) { @@ -1011,4 +1025,18 @@ static inline size_t _iwl_tlv_array_len(const struct iwl_ucode_tlv *tlv, #define iwl_tlv_array_len_with_size(_tlv_ptr, _struct_ptr, _size) \ _iwl_tlv_array_len((_tlv_ptr), sizeof(*(_struct_ptr)), _size) + +/* external FSEQ file */ +#define IWL_FSEQ_FILE "intel/fseq-%04x-%04x" +#define IWL_FSEQ_MAGIC "INTEL-CNV-FSEQ\n\0" + +struct iwl_fseq_file { + char magic[16]; + char version[16]; + __le32 bt_len; + __le32 wifi_len; + u8 reserved[8]; + u8 data[]; +} __packed; + #endif /* __iwl_fw_file_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index a9e6bba2419e..5753d95986cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #ifndef __iwl_fw_runtime_h__ #define __iwl_fw_runtime_h__ @@ -110,6 +110,7 @@ struct iwl_txf_iter_data { * Only read the UEFI variables if locked. * @sar_profiles: sar profiles as read from WRDS/EWRD BIOS tables * @geo_profiles: geographic profiles as read from WGDS BIOS table + * @phy_filters: specific phy filters as read from WPFC BIOS table */ struct iwl_fw_runtime { struct iwl_trans *trans; @@ -184,6 +185,9 @@ struct iwl_fw_runtime { struct iwl_mcc_allowed_ap_type_cmd uats_table; bool uats_valid; u8 uefi_tables_lock_status; +#ifdef CONFIG_ACPI + struct iwl_phy_specific_cfg phy_filters; +#endif }; void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h index 20563a32a21a..6111ca970ed2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h @@ -78,11 +78,13 @@ enum iwl_prph_scratch_flags { /** * enum iwl_prph_scratch_ext_flags - PRPH scratch control ext flags + * @IWL_PRPH_SCRATCH_EXT_EXT_FSEQ: external FSEQ image provided * @IWL_PRPH_SCRATCH_EXT_URM_FW: switch to URM mode based on fw setting * @IWL_PRPH_SCRATCH_EXT_URM_PERM: switch to permanent URM mode * @IWL_PRPH_SCRATCH_EXT_32KHZ_CLK_VALID: use external 32 KHz clock */ enum iwl_prph_scratch_ext_flags { + IWL_PRPH_SCRATCH_EXT_EXT_FSEQ = BIT(0), IWL_PRPH_SCRATCH_EXT_URM_FW = BIT(4), IWL_PRPH_SCRATCH_EXT_URM_PERM = BIT(5), IWL_PRPH_SCRATCH_EXT_32KHZ_CLK_VALID = BIT(8), @@ -202,6 +204,19 @@ struct iwl_prph_scratch_ctrl_cfg { struct iwl_prph_scratch_step_cfg step_cfg; } __packed; /* PERIPH_SCRATCH_CTRL_CFG_S */ +#define IWL_NUM_DRAM_FSEQ_ENTRIES 8 + +/** + * struct iwl_context_info_dram_fseq - images DRAM map (with fseq) + * each entry in the map represents a DRAM chunk of up to 32 KB + * @common: UMAC/LMAC/virtual images + * @fseq_img: FSEQ image DRAM map + */ +struct iwl_context_info_dram_fseq { + struct iwl_context_info_dram_nonfseq common; + __le64 fseq_img[IWL_NUM_DRAM_FSEQ_ENTRIES]; +} __packed; /* PERIPH_SCRATCH_DRAM_MAP_S */ + /** * struct iwl_prph_scratch - peripheral scratch mapping * @ctrl_cfg: control and configuration of prph scratch @@ -215,7 +230,7 @@ struct iwl_prph_scratch { __le32 fseq_override; __le32 step_analog_params; __le32 reserved[8]; - struct iwl_context_info_dram dram; + struct iwl_context_info_dram_fseq dram; } __packed; /* PERIPH_SCRATCH_S */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-context-info.h b/drivers/net/wireless/intel/iwlwifi/iwl-context-info.h index dfd44fabf237..b495eb94d126 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-context-info.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-context-info.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020, 2022, 2024 Intel Corporation + * Copyright (C) 2018-2020, 2022, 2024-2025 Intel Corporation */ #ifndef __iwl_context_info_file_h__ #define __iwl_context_info_file_h__ @@ -78,13 +78,13 @@ struct iwl_context_info_control { } __packed; /** - * struct iwl_context_info_dram - images DRAM map + * struct iwl_context_info_dram_nonfseq - images DRAM map * each entry in the map represents a DRAM chunk of up to 32 KB * @umac_img: UMAC image DRAM map * @lmac_img: LMAC image DRAM map * @virtual_img: paged image DRAM map */ -struct iwl_context_info_dram { +struct iwl_context_info_dram_nonfseq { __le64 umac_img[IWL_MAX_DRAM_ENTRY]; __le64 lmac_img[IWL_MAX_DRAM_ENTRY]; __le64 virtual_img[IWL_MAX_DRAM_ENTRY]; @@ -177,7 +177,7 @@ struct iwl_context_info { struct iwl_context_info_early_dbg_cfg edbg_cfg; struct iwl_context_info_pnvm_cfg pnvm_cfg; __le32 reserved2[16]; - struct iwl_context_info_dram dram; + struct iwl_context_info_dram_nonfseq dram; __le32 reserved3[16]; } __packed; @@ -186,7 +186,7 @@ void iwl_pcie_ctxt_info_free(struct iwl_trans *trans); void iwl_pcie_ctxt_info_free_paging(struct iwl_trans *trans); int iwl_pcie_init_fw_sec(struct iwl_trans *trans, const struct fw_img *fw, - struct iwl_context_info_dram *ctxt_dram); + struct iwl_context_info_dram_nonfseq *ctxt_dram); void *iwl_pcie_ctxt_info_dma_alloc_coherent(struct iwl_trans *trans, size_t size, dma_addr_t *phys); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 3ff493e920d2..c09967e2c68c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2024 Intel Corporation + * Copyright (C) 2005-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2016 Intel Deutschland GmbH */ @@ -194,17 +194,19 @@ #define CSR_INT_BIT_RF_KILL (1 << 7) /* HW RFKILL switch GP_CNTRL[27] toggled */ #define CSR_INT_BIT_CT_KILL (1 << 6) /* Critical temp (chip too hot) rfkill */ #define CSR_INT_BIT_SW_RX (1 << 3) /* Rx, command responses */ +#define CSR_INT_BIT_RESET_DONE (1 << 2) /* reset handshake with firmware is done */ #define CSR_INT_BIT_WAKEUP (1 << 1) /* NIC controller waking up (pwr mgmt) */ #define CSR_INT_BIT_ALIVE (1 << 0) /* uCode interrupts once it initializes */ -#define CSR_INI_SET_MASK (CSR_INT_BIT_FH_RX | \ - CSR_INT_BIT_HW_ERR | \ - CSR_INT_BIT_FH_TX | \ - CSR_INT_BIT_SW_ERR | \ - CSR_INT_BIT_RF_KILL | \ - CSR_INT_BIT_SW_RX | \ - CSR_INT_BIT_WAKEUP | \ - CSR_INT_BIT_ALIVE | \ +#define CSR_INI_SET_MASK (CSR_INT_BIT_FH_RX | \ + CSR_INT_BIT_HW_ERR | \ + CSR_INT_BIT_FH_TX | \ + CSR_INT_BIT_SW_ERR | \ + CSR_INT_BIT_RF_KILL | \ + CSR_INT_BIT_SW_RX | \ + CSR_INT_BIT_WAKEUP | \ + CSR_INT_BIT_RESET_DONE | \ + CSR_INT_BIT_ALIVE | \ CSR_INT_BIT_RX_PERIODIC) /* interrupt flags in FH (flow handler) (PCI busmaster DMA) */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index c381511e9ec6..e30e11b987d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -143,6 +143,9 @@ static struct ieee80211_rate iwl_cfg80211_rates[] = { * @NVM_CHANNEL_VALID: channel is usable for this SKU/geo * @NVM_CHANNEL_IBSS: usable as an IBSS channel and deprecated * when %IWL_NVM_SBANDS_FLAGS_LAR enabled. + * @NVM_CHANNEL_ALLOW_20MHZ_ACTIVITY: active scanning allowed and + * AP allowed only in 20 MHz. Valid only + * when %IWL_NVM_SBANDS_FLAGS_LAR enabled. * @NVM_CHANNEL_ACTIVE: active scanning allowed and allows IBSS * when %IWL_NVM_SBANDS_FLAGS_LAR enabled. * @NVM_CHANNEL_RADAR: radar detection required @@ -159,20 +162,21 @@ static struct ieee80211_rate iwl_cfg80211_rates[] = { * @NVM_CHANNEL_AFC: client support connection to UHB AFC AP */ enum iwl_nvm_channel_flags { - NVM_CHANNEL_VALID = BIT(0), - NVM_CHANNEL_IBSS = BIT(1), - NVM_CHANNEL_ACTIVE = BIT(3), - NVM_CHANNEL_RADAR = BIT(4), - NVM_CHANNEL_INDOOR_ONLY = BIT(5), - NVM_CHANNEL_GO_CONCURRENT = BIT(6), - NVM_CHANNEL_UNIFORM = BIT(7), - NVM_CHANNEL_20MHZ = BIT(8), - NVM_CHANNEL_40MHZ = BIT(9), - NVM_CHANNEL_80MHZ = BIT(10), - NVM_CHANNEL_160MHZ = BIT(11), - NVM_CHANNEL_DC_HIGH = BIT(12), - NVM_CHANNEL_VLP = BIT(13), - NVM_CHANNEL_AFC = BIT(14), + NVM_CHANNEL_VALID = BIT(0), + NVM_CHANNEL_IBSS = BIT(1), + NVM_CHANNEL_ALLOW_20MHZ_ACTIVITY = BIT(2), + NVM_CHANNEL_ACTIVE = BIT(3), + NVM_CHANNEL_RADAR = BIT(4), + NVM_CHANNEL_INDOOR_ONLY = BIT(5), + NVM_CHANNEL_GO_CONCURRENT = BIT(6), + NVM_CHANNEL_UNIFORM = BIT(7), + NVM_CHANNEL_20MHZ = BIT(8), + NVM_CHANNEL_40MHZ = BIT(9), + NVM_CHANNEL_80MHZ = BIT(10), + NVM_CHANNEL_160MHZ = BIT(11), + NVM_CHANNEL_DC_HIGH = BIT(12), + NVM_CHANNEL_VLP = BIT(13), + NVM_CHANNEL_AFC = BIT(14), }; /** @@ -1655,6 +1659,10 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan, if (nvm_flags & NVM_CHANNEL_INDOOR_ONLY) flags |= NL80211_RRF_NO_OUTDOOR; + if (nvm_flags & NVM_CHANNEL_ALLOW_20MHZ_ACTIVITY && + flags & NL80211_RRF_NO_IR) + flags |= NL80211_RRF_ALLOW_20MHZ_ACTIVITY; + /* Set the GO concurrent flag only in case that NO_IR is set. * Otherwise it is meaningless */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index ce4954b0d524..44a249a753ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -328,6 +328,7 @@ iwl_trans_get_rb_size_order(enum iwl_amsdu_size rb_size) case IWL_AMSDU_4K: return get_order(4 * 1024); case IWL_AMSDU_8K: + return get_order(8 * 1024); case IWL_AMSDU_12K: return get_order(16 * 1024); default: diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-utils.c b/drivers/net/wireless/intel/iwlwifi/iwl-utils.c index b14ec98e28b6..c5b49851e4b9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-utils.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-utils.c @@ -4,7 +4,6 @@ */ #include <net/gso.h> #include <linux/ieee80211.h> -#include <net/gso.h> #include <net/ip.h> #include "iwl-drv.h" diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index ee99298eebf5..dc736fdc176d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1347,6 +1347,8 @@ int iwl_mld_no_wowlan_suspend(struct iwl_mld *mld) if (ret) { IWL_ERR(mld, "d3 suspend: trans_d3_suspend failed %d\n", ret); } else { + /* Async notification might send hcmds, which is not allowed in suspend */ + iwl_mld_cancel_async_notifications(mld); mld->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3; mld->fw_status.in_d3 = true; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 4b083d447ee2..6e9af37fb86d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -11,7 +11,6 @@ #include "fw/dbg.h" #include "fw/pnvm.h" #include "hcmd.h" -#include "iwl-nvm-parse.h" #include "power.h" #include "mcc.h" #include "led.h" @@ -239,22 +238,17 @@ static int iwl_mld_load_fw_wait_alive(struct iwl_mld *mld) iwl_fw_dbg_error_collect(&mld->fwrt, FW_DBG_TRIGGER_ALIVE_TIMEOUT); iwl_mld_print_alive_notif_timeout(mld); - goto alive_failure; + return ret; } if (!alive_valid) { IWL_ERR(mld, "Loaded firmware is not valid!\n"); - ret = -EIO; - goto alive_failure; + return -EIO; } iwl_trans_fw_alive(mld->trans, 0); return 0; - -alive_failure: - iwl_trans_stop_device(mld->trans); - return ret; } static int iwl_mld_run_fw_init_sequence(struct iwl_mld *mld) @@ -272,15 +266,11 @@ static int iwl_mld_run_fw_init_sequence(struct iwl_mld *mld) if (ret) return ret; - mld->trans->step_urm = - !!(iwl_read_umac_prph(mld->trans, CNVI_PMU_STEP_FLOW) & - CNVI_PMU_STEP_FLOW_FORCE_URM); - ret = iwl_pnvm_load(mld->trans, &mld->notif_wait, &mld->fw->ucode_capa); if (ret) { IWL_ERR(mld, "Timeout waiting for PNVM load %d\n", ret); - goto init_failure; + return ret; } iwl_dbg_tlv_time_point(&mld->fwrt, IWL_FW_INI_TIME_POINT_AFTER_ALIVE, @@ -298,31 +288,17 @@ static int iwl_mld_run_fw_init_sequence(struct iwl_mld *mld) if (ret) { IWL_ERR(mld, "Failed to send init config command: %d\n", ret); iwl_remove_notification(&mld->notif_wait, &init_wait); - goto init_failure; + return ret; } ret = iwl_wait_notification(&mld->notif_wait, &init_wait, MLD_INIT_COMPLETE_TIMEOUT); if (ret) { IWL_ERR(mld, "Failed to get INIT_COMPLETE %d\n", ret); - goto init_failure; - } - - if (!mld->nvm_data) { - mld->nvm_data = iwl_get_nvm(mld->trans, mld->fw, 0, 0); - if (IS_ERR(mld->nvm_data)) { - ret = PTR_ERR(mld->nvm_data); - mld->nvm_data = NULL; - IWL_ERR(mld, "Failed to read NVM: %d\n", ret); - goto init_failure; - } + return ret; } return 0; - -init_failure: - iwl_trans_stop_device(mld->trans); - return ret; } int iwl_mld_load_fw(struct iwl_mld *mld) @@ -333,7 +309,7 @@ int iwl_mld_load_fw(struct iwl_mld *mld) ret = iwl_trans_start_hw(mld->trans); if (ret) - goto err; + return ret; ret = iwl_mld_run_fw_init_sequence(mld); if (ret) @@ -361,9 +337,10 @@ void iwl_mld_stop_fw(struct iwl_mld *mld) iwl_trans_stop_device(mld->trans); - wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); - - iwl_mld_purge_async_handlers_list(mld); + /* HW is stopped, no more coming RX. Cancel all notifications in + * case they were sent just before stopping the HW. + */ + iwl_mld_cancel_async_notifications(mld); mld->fw_status.running = false; } @@ -526,7 +503,7 @@ int iwl_mld_start_fw(struct iwl_mld *mld) ret = iwl_mld_load_fw(mld); if (IWL_FW_CHECK(mld, ret, "Failed to start firmware %d\n", ret)) { iwl_fw_dbg_error_collect(&mld->fwrt, FW_DBG_TRIGGER_DRIVER); - goto error; + return ret; } IWL_DEBUG_INFO(mld, "uCode started.\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index e49e2260ac05..47b5b31b5b91 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -226,11 +226,6 @@ static void iwl_mld_fill_mac_cmd_sta(struct iwl_mld *mld, if (vif->probe_req_reg && vif->cfg.assoc && vif->p2p) cmd->filter_flags |= cpu_to_le32(MAC_CFG_FILTER_ACCEPT_PROBE_REQ); - - if (vif->p2p) - cmd->client.ctwin = - cpu_to_le32(vif->bss_conf.p2p_noa_attr.oppps_ctwindow & - IEEE80211_P2P_OPPPS_CTWINDOW_MASK); } static void iwl_mld_fill_mac_cmd_ap(struct iwl_mld *mld, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index ec14d0736cee..1ae522431f3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -59,6 +59,7 @@ enum iwl_mld_emlsr_blocked { * loaded enough to justify EMLSR. * @IWL_MLD_EMLSR_EXIT_RFI: Exit EMLSR due to RFI * @IWL_MLD_EMLSR_EXIT_FW_REQUEST: Exit EMLSR because the FW requested it + * @IWL_MLD_EMLSR_EXIT_INVALID: internal exit reason due to invalid data */ enum iwl_mld_emlsr_exit { IWL_MLD_EMLSR_EXIT_BLOCK = 0x1, @@ -72,6 +73,7 @@ enum iwl_mld_emlsr_exit { IWL_MLD_EMLSR_EXIT_CHAN_LOAD = 0x100, IWL_MLD_EMLSR_EXIT_RFI = 0x200, IWL_MLD_EMLSR_EXIT_FW_REQUEST = 0x400, + IWL_MLD_EMLSR_EXIT_INVALID = 0x800, }; /** diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 68d97d3b8f02..1ee5ac9a37ec 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -541,15 +541,6 @@ void iwl_mld_mac80211_stop(struct ieee80211_hw *hw, bool suspend) (IS_ENABLED(CONFIG_PM_SLEEP) && iwl_mld_no_wowlan_suspend(mld))) iwl_mld_stop_fw(mld); - /* HW is stopped, no more coming RX. OTOH, the worker can't run as the - * wiphy lock is held. Cancel it in case it was scheduled just before - * we stopped the HW. - */ - wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); - - /* Empty out the list, as the worker won't do that */ - iwl_mld_purge_async_handlers_list(mld); - /* Clear in_hw_restart flag when stopping the hw, as mac80211 won't * execute the restart. */ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 73d2166a4c25..87624730fb50 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -26,6 +26,8 @@ #include "hcmd.h" #include "fw/api/location.h" +#include "iwl-nvm-parse.h" + #define DRV_DESCRIPTION "Intel(R) MLD wireless driver for Linux" MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_LICENSE("GPL"); @@ -75,7 +77,6 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); - INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); @@ -287,7 +288,9 @@ static const struct iwl_hcmd_names iwl_mld_statistics_names[] = { * Access is done through binary search */ static const struct iwl_hcmd_names iwl_mld_prot_offload_names[] = { - HCMD_NAME(STORED_BEACON_NTF), + HCMD_NAME(WOWLAN_WAKE_PKT_NOTIFICATION), + HCMD_NAME(WOWLAN_INFO_NOTIFICATION), + HCMD_NAME(D3_END_NOTIFICATION), }; /* Please keep this array *SORTED* by hex value. @@ -411,10 +414,17 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, break; } + if (!ret) { + mld->nvm_data = iwl_get_nvm(mld->trans, mld->fw, 0, 0); + if (IS_ERR(mld->nvm_data)) { + IWL_ERR(mld, "Failed to read NVM: %d\n", ret); + ret = PTR_ERR(mld->nvm_data); + } + } + if (ret) { wiphy_unlock(mld->wiphy); rtnl_unlock(); - iwl_fw_flush_dumps(&mld->fwrt); goto err; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index a870e169e265..da16fff1ce86 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -52,7 +52,8 @@ static void iwl_mld_print_emlsr_blocked(struct iwl_mld *mld, u32 mask) HOW(BT_COEX) \ HOW(CHAN_LOAD) \ HOW(RFI) \ - HOW(FW_REQUEST) + HOW(FW_REQUEST) \ + HOW(INVALID) static const char * iwl_mld_get_emlsr_exit_string(enum iwl_mld_emlsr_exit exit) @@ -643,11 +644,11 @@ iwl_mld_emlsr_disallowed_with_link(struct iwl_mld *mld, { struct wiphy *wiphy = mld->wiphy; struct ieee80211_bss_conf *conf; - enum iwl_mld_emlsr_exit ret = 0; + u32 ret = 0; conf = wiphy_dereference(wiphy, vif->link_conf[link->link_id]); if (WARN_ON_ONCE(!conf)) - return false; + return IWL_MLD_EMLSR_EXIT_INVALID; if (link->chandef->chan->band == NL80211_BAND_2GHZ && mld->bt_is_active) ret |= IWL_MLD_EMLSR_EXIT_BT_COEX; @@ -772,8 +773,8 @@ iwl_mld_channel_load_allows_emlsr(struct iwl_mld *mld, if (a->chandef->width <= b->chandef->width) return true; - bw_a = nl80211_chan_width_to_mhz(a->chandef->width); - bw_b = nl80211_chan_width_to_mhz(b->chandef->width); + bw_a = cfg80211_chandef_get_width(a->chandef); + bw_b = cfg80211_chandef_get_width(b->chandef); ratio = bw_a / bw_b; switch (ratio) { @@ -804,8 +805,28 @@ iwl_mld_valid_emlsr_pair(struct ieee80211_vif *vif, iwl_mld_emlsr_disallowed_with_link(mld, vif, b, false)) return false; - if (a->chandef->chan->band == b->chandef->chan->band) - reason_mask |= IWL_MLD_EMLSR_EXIT_EQUAL_BAND; + if (a->chandef->chan->band == b->chandef->chan->band) { + const struct cfg80211_chan_def *c_low = a->chandef; + const struct cfg80211_chan_def *c_high = b->chandef; + u32 c_low_upper_edge, c_high_lower_edge; + + if (c_low->chan->center_freq > c_high->chan->center_freq) + swap(c_low, c_high); + + c_low_upper_edge = c_low->chan->center_freq + + cfg80211_chandef_get_width(c_low) / 2; + c_high_lower_edge = c_high->chan->center_freq - + cfg80211_chandef_get_width(c_high) / 2; + + if (a->chandef->chan->band == NL80211_BAND_5GHZ && + c_low_upper_edge <= 5330 && c_high_lower_edge >= 5490) { + /* This case is fine - HW/FW can deal with it, there's + * enough separation between the two channels. + */ + } else { + reason_mask |= IWL_MLD_EMLSR_EXIT_EQUAL_BAND; + } + } if (!iwl_mld_channel_load_allows_emlsr(mld, vif, a, b)) reason_mask |= IWL_MLD_EMLSR_EXIT_CHAN_LOAD; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c index fc18cba8aaa8..10f1bee89205 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c @@ -183,47 +183,6 @@ static void iwl_mld_handle_mu_mimo_grp_notif(struct iwl_mld *mld, } static void -iwl_mld_handle_stored_beacon_notif(struct iwl_mld *mld, - struct iwl_rx_packet *pkt) -{ - unsigned int pkt_len = iwl_rx_packet_payload_len(pkt); - struct iwl_stored_beacon_notif *sb = (void *)pkt->data; - struct ieee80211_rx_status rx_status = {}; - struct sk_buff *skb; - u32 size = le32_to_cpu(sb->common.byte_count); - - if (size == 0) - return; - - if (pkt_len < struct_size(sb, data, size)) - return; - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - IWL_ERR(mld, "alloc_skb failed\n"); - return; - } - - /* update rx_status according to the notification's metadata */ - rx_status.mactime = le64_to_cpu(sb->common.tsf); - /* TSF as indicated by the firmware is at INA time */ - rx_status.flag |= RX_FLAG_MACTIME_PLCP_START; - rx_status.device_timestamp = le32_to_cpu(sb->common.system_time); - rx_status.band = - iwl_mld_phy_band_to_nl80211(le16_to_cpu(sb->common.band)); - rx_status.freq = - ieee80211_channel_to_frequency(le16_to_cpu(sb->common.channel), - rx_status.band); - - /* copy the data */ - skb_put_data(skb, sb->data, size); - memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status)); - - /* pass it as regular rx to mac80211 */ - ieee80211_rx_napi(mld->hw, NULL, skb, NULL); -} - -static void iwl_mld_handle_channel_switch_start_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt) { @@ -361,8 +320,6 @@ CMD_VERSIONS(ct_kill_notif, CMD_VER_ENTRY(2, ct_kill_notif)) CMD_VERSIONS(temp_notif, CMD_VER_ENTRY(2, iwl_dts_measurement_notif)) -CMD_VERSIONS(stored_beacon_notif, - CMD_VER_ENTRY(4, iwl_stored_beacon_notif)) CMD_VERSIONS(roc_notif, CMD_VER_ENTRY(1, iwl_roc_notif)) CMD_VERSIONS(probe_resp_data_notif, @@ -473,8 +430,6 @@ const struct iwl_rx_handler iwl_mld_rx_handlers[] = { RX_HANDLER_OF_ROC(MAC_CONF_GROUP, ROC_NOTIF, roc_notif) RX_HANDLER_NO_OBJECT(DATA_PATH_GROUP, MU_GROUP_MGMT_NOTIF, mu_mimo_grp_notif, RX_HANDLER_SYNC) - RX_HANDLER_NO_OBJECT(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF, - stored_beacon_notif, RX_HANDLER_SYNC) RX_HANDLER_OF_VIF(MAC_CONF_GROUP, PROBE_RESPONSE_DATA_NOTIF, probe_resp_data_notif) RX_HANDLER_NO_OBJECT(PHY_OPS_GROUP, CT_KILL_NOTIFICATION, @@ -707,10 +662,14 @@ void iwl_mld_async_handlers_wk(struct wiphy *wiphy, struct wiphy_work *wk) } } -void iwl_mld_purge_async_handlers_list(struct iwl_mld *mld) +void iwl_mld_cancel_async_notifications(struct iwl_mld *mld) { struct iwl_async_handler_entry *entry, *tmp; + lockdep_assert_wiphy(mld->wiphy); + + wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); + spin_lock_bh(&mld->async_handlers_lock); list_for_each_entry_safe(entry, tmp, &mld->async_handlers_list, list) { iwl_mld_log_async_handler_op(mld, "Purged", &entry->rxb); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.h b/drivers/net/wireless/intel/iwlwifi/mld/notif.h index 2eaa1d4e138e..adcdd9dec192 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.h @@ -15,7 +15,7 @@ void iwl_mld_rx_rss(struct iwl_op_mode *op_mode, struct napi_struct *napi, void iwl_mld_async_handlers_wk(struct wiphy *wiphy, struct wiphy_work *wk); -void iwl_mld_purge_async_handlers_list(struct iwl_mld *mld); +void iwl_mld_cancel_async_notifications(struct iwl_mld *mld); enum iwl_mld_object_type { IWL_MLD_OBJECT_TYPE_NONE, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index c4f189bcece2..3e69f2a4fa81 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -33,7 +33,6 @@ struct iwl_mld_rx_phy_data { u32 gp2_on_air_rise; u16 phy_info; u8 energy_a, energy_b; - u8 channel; }; static void @@ -43,7 +42,6 @@ iwl_mld_fill_phy_data(struct iwl_rx_mpdu_desc *desc, phy_data->phy_info = le16_to_cpu(desc->phy_info); phy_data->rate_n_flags = le32_to_cpu(desc->v3.rate_n_flags); phy_data->gp2_on_air_rise = le32_to_cpu(desc->v3.gp2_on_air_rise); - phy_data->channel = desc->v3.channel; phy_data->energy_a = desc->v3.energy_a; phy_data->energy_b = desc->v3.energy_b; phy_data->data0 = desc->v3.phy_data0; @@ -1162,8 +1160,6 @@ static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, static void iwl_mld_rx_fill_status(struct iwl_mld *mld, struct sk_buff *skb, struct iwl_mld_rx_phy_data *phy_data, - struct iwl_rx_mpdu_desc *mpdu_desc, - struct ieee80211_hdr *hdr, int queue) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); @@ -1172,44 +1168,12 @@ static void iwl_mld_rx_fill_status(struct iwl_mld *mld, struct sk_buff *skb, u8 stbc = u32_get_bits(rate_n_flags, RATE_MCS_STBC_MSK); bool is_sgi = rate_n_flags & RATE_MCS_SGI_MSK; - if (WARN_ON_ONCE(phy_data->with_data && (!mpdu_desc || !hdr))) - return; - - /* Keep packets with CRC errors (and with overrun) for monitor mode - * (otherwise the firmware discards them) but mark them as bad. - */ - if (phy_data->with_data && - (!(mpdu_desc->status & cpu_to_le32(IWL_RX_MPDU_STATUS_CRC_OK)) || - !(mpdu_desc->status & cpu_to_le32(IWL_RX_MPDU_STATUS_OVERRUN_OK)))) { - IWL_DEBUG_RX(mld, "Bad CRC or FIFO: 0x%08X.\n", - le32_to_cpu(mpdu_desc->status)); - rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; - } - phy_data->info_type = IWL_RX_PHY_INFO_TYPE_NONE; - if (phy_data->with_data && - likely(!(phy_data->phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD))) { - rx_status->mactime = - le64_to_cpu(mpdu_desc->v3.tsf_on_air_rise); - - /* TSF as indicated by the firmware is at INA time */ - rx_status->flag |= RX_FLAG_MACTIME_PLCP_START; - } else { + if (phy_data->phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) phy_data->info_type = le32_get_bits(phy_data->data1, IWL_RX_PHY_DATA1_INFO_TYPE_MASK); - } - - /* management stuff on default queue */ - if (!queue && phy_data->with_data && - unlikely(ieee80211_is_beacon(hdr->frame_control) || - ieee80211_is_probe_resp(hdr->frame_control))) { - rx_status->boottime_ns = ktime_get_boottime_ns(); - - if (mld->scan.pass_all_sched_res == SCHED_SCAN_PASS_ALL_STATE_ENABLED) - mld->scan.pass_all_sched_res = SCHED_SCAN_PASS_ALL_STATE_FOUND; - } /* set the preamble flag if appropriate */ if (format == RATE_MCS_CCK_MSK && @@ -1735,15 +1699,11 @@ static void iwl_mld_rx_update_ampdu_ref(struct iwl_mld *mld, } static void -iwl_mld_fill_rx_status_band_freq(struct iwl_mld_rx_phy_data *phy_data, - struct iwl_rx_mpdu_desc *mpdu_desc, - struct ieee80211_rx_status *rx_status) +iwl_mld_fill_rx_status_band_freq(struct ieee80211_rx_status *rx_status, + u8 band, u8 channel) { - enum nl80211_band band; - - band = BAND_IN_RX_STATUS(mpdu_desc->mac_phy_idx); rx_status->band = iwl_mld_phy_band_to_nl80211(band); - rx_status->freq = ieee80211_channel_to_frequency(phy_data->channel, + rx_status->freq = ieee80211_channel_to_frequency(channel, rx_status->band); } @@ -1758,7 +1718,7 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, struct sk_buff *skb; size_t mpdu_desc_size = sizeof(*mpdu_desc); bool drop = false; - u8 crypto_len = 0; + u8 crypto_len = 0, band; u32 pkt_len = iwl_rx_packet_payload_len(pkt); u32 mpdu_len; enum iwl_mld_reorder_result reorder_res; @@ -1802,7 +1762,11 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, rx_status = IEEE80211_SKB_RXCB(skb); /* this is needed early */ - iwl_mld_fill_rx_status_band_freq(&phy_data, mpdu_desc, rx_status); + band = u8_get_bits(mpdu_desc->mac_phy_band, + IWL_RX_MPDU_MAC_PHY_BAND_BAND_MASK); + iwl_mld_fill_rx_status_band_freq(rx_status, band, + mpdu_desc->v3.channel); + rcu_read_lock(); @@ -1814,7 +1778,36 @@ void iwl_mld_rx_mpdu(struct iwl_mld *mld, struct napi_struct *napi, if (!queue && (phy_data.phy_info & IWL_RX_MPDU_PHY_AMPDU)) iwl_mld_rx_update_ampdu_ref(mld, &phy_data, rx_status); - iwl_mld_rx_fill_status(mld, skb, &phy_data, mpdu_desc, hdr, queue); + /* Keep packets with CRC errors (and with overrun) for monitor mode + * (otherwise the firmware discards them) but mark them as bad. + */ + if (!(mpdu_desc->status & cpu_to_le32(IWL_RX_MPDU_STATUS_CRC_OK)) || + !(mpdu_desc->status & cpu_to_le32(IWL_RX_MPDU_STATUS_OVERRUN_OK))) { + IWL_DEBUG_RX(mld, "Bad CRC or FIFO: 0x%08X.\n", + le32_to_cpu(mpdu_desc->status)); + rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; + } + + if (likely(!(phy_data.phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD))) { + rx_status->mactime = + le64_to_cpu(mpdu_desc->v3.tsf_on_air_rise); + + /* TSF as indicated by the firmware is at INA time */ + rx_status->flag |= RX_FLAG_MACTIME_PLCP_START; + } + + /* management stuff on default queue */ + if (!queue && unlikely(ieee80211_is_beacon(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control))) { + rx_status->boottime_ns = ktime_get_boottime_ns(); + + if (mld->scan.pass_all_sched_res == + SCHED_SCAN_PASS_ALL_STATE_ENABLED) + mld->scan.pass_all_sched_res = + SCHED_SCAN_PASS_ALL_STATE_FOUND; + } + + iwl_mld_rx_fill_status(mld, skb, &phy_data, queue); if (iwl_mld_rx_crypto(mld, sta, hdr, rx_status, mpdu_desc, queue, le32_to_cpu(pkt->len_n_flags), &crypto_len)) @@ -1956,6 +1949,7 @@ void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, struct ieee80211_rx_status *rx_status; struct sk_buff *skb; u32 format, rssi; + u8 channel; if (unlikely(mld->fw_status.in_hw_restart)) return; @@ -1968,9 +1962,10 @@ void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, desc = (void *)pkt->data; rssi = le32_to_cpu(desc->rssi); + channel = u32_get_bits(rssi, RX_NO_DATA_CHANNEL_MSK); + phy_data.energy_a = u32_get_bits(rssi, RX_NO_DATA_CHAIN_A_MSK); phy_data.energy_b = u32_get_bits(rssi, RX_NO_DATA_CHAIN_B_MSK); - phy_data.channel = u32_get_bits(rssi, RX_NO_DATA_CHANNEL_MSK); phy_data.data0 = desc->phy_info[0]; phy_data.data1 = desc->phy_info[1]; phy_data.phy_info = IWL_RX_MPDU_PHY_TSF_OVERLOAD; @@ -2018,13 +2013,13 @@ void iwl_mld_rx_monitor_no_data(struct iwl_mld *mld, struct napi_struct *napi, break; } - rx_status->band = phy_data.channel > 14 ? NL80211_BAND_5GHZ : + rx_status->band = channel > 14 ? NL80211_BAND_5GHZ : NL80211_BAND_2GHZ; - rx_status->freq = ieee80211_channel_to_frequency(phy_data.channel, + rx_status->freq = ieee80211_channel_to_frequency(channel, rx_status->band); - iwl_mld_rx_fill_status(mld, skb, &phy_data, NULL, NULL, queue); + iwl_mld_rx_fill_status(mld, skb, &phy_data, queue); /* No more radiotap info should be added after this point. * Mark it as mac header for upper layers to know where diff --git a/drivers/net/wireless/intel/iwlwifi/mld/stats.c b/drivers/net/wireless/intel/iwlwifi/mld/stats.c index 0715bbc31031..360a6bfbbfb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/stats.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/stats.c @@ -467,12 +467,18 @@ static void iwl_mld_fill_chanctx_stats(struct ieee80211_hw *hw, old_load = phy->avg_channel_load_not_by_us; new_load = le32_to_cpu(per_phy[phy->fw_id].channel_load_not_by_us); - if (IWL_FW_CHECK(phy->mld, new_load > 100, "Invalid channel load %u\n", - new_load)) + + if (IWL_FW_CHECK(phy->mld, + new_load != IWL_STATS_UNKNOWN_CHANNEL_LOAD && + new_load > 100, + "Invalid channel load %u\n", new_load)) return; - /* give a weight of 0.5 for the old value */ - phy->avg_channel_load_not_by_us = (new_load >> 1) + (old_load >> 1); + if (new_load != IWL_STATS_UNKNOWN_CHANNEL_LOAD) { + /* update giving a weight of 0.5 for the old value */ + phy->avg_channel_load_not_by_us = (new_load >> 1) + + (old_load >> 1); + } iwl_mld_emlsr_check_chan_load(hw, phy, old_load); } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tests/agg.c b/drivers/net/wireless/intel/iwlwifi/mld/tests/agg.c index 1fd664be1a7c..29b0248cec3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tests/agg.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tests/agg.c @@ -2,7 +2,7 @@ /* * KUnit tests for channel helper functions * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include <kunit/test.h> #include <kunit/static_stub.h> @@ -474,14 +474,14 @@ static struct iwl_rx_mpdu_desc *setup_mpdu_desc(void) KUNIT_ALLOC_AND_ASSERT(test, mpdu_desc); mpdu_desc->reorder_data |= - cpu_to_le32(FIELD_PREP(IWL_RX_MPDU_REORDER_BAID_MASK, - param->rx_pkt.baid)); + le32_encode_bits(param->rx_pkt.baid, + IWL_RX_MPDU_REORDER_BAID_MASK); mpdu_desc->reorder_data |= - cpu_to_le32(FIELD_PREP(IWL_RX_MPDU_REORDER_SN_MASK, - param->rx_pkt.sn)); + le32_encode_bits(param->rx_pkt.sn, + IWL_RX_MPDU_REORDER_SN_MASK); mpdu_desc->reorder_data |= - cpu_to_le32(FIELD_PREP(IWL_RX_MPDU_REORDER_NSSN_MASK, - param->rx_pkt.nssn)); + le32_encode_bits(param->rx_pkt.nssn, + IWL_RX_MPDU_REORDER_NSSN_MASK); if (param->rx_pkt.old_sn) mpdu_desc->reorder_data |= cpu_to_le32(IWL_RX_MPDU_REORDER_BA_OLD_SN); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 2b5a62604fc4..94b08bb6fd4f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -473,7 +473,7 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm, struct iwl_phy_specific_cfg *phy_filters) { #ifdef CONFIG_ACPI - *phy_filters = mvm->phy_filters; + *phy_filters = mvm->fwrt.phy_filters; #endif /* CONFIG_ACPI */ } @@ -651,11 +651,6 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm) iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_AFTER_ALIVE, NULL); - if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) - mvm->trans->step_urm = !!(iwl_read_umac_prph(mvm->trans, - CNVI_PMU_STEP_FLOW) & - CNVI_PMU_STEP_FLOW_FORCE_URM); - /* Send init config command to mark that we are sending NVM access * commands */ @@ -1270,7 +1265,7 @@ void iwl_mvm_get_bios_tables(struct iwl_mvm *mvm) } } - iwl_acpi_get_phy_filters(&mvm->fwrt, &mvm->phy_filters); + iwl_acpi_get_phy_filters(&mvm->fwrt, &mvm->fwrt.phy_filters); if (iwl_bios_get_eckv(&mvm->fwrt, &mvm->ext_clock_valid)) IWL_DEBUG_RADIO(mvm, "ECKV table doesn't exist in BIOS\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1e916a0ce082..5d8f50a455d7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -6426,17 +6426,10 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, bool sync, const void *data, u32 size) { - struct { - struct iwl_rxq_sync_cmd cmd; - struct iwl_mvm_internal_rxq_notif notif; - } __packed cmd = { - .cmd.rxq_mask = cpu_to_le32(BIT(mvm->trans->num_rx_queues) - 1), - .cmd.count = - cpu_to_le32(sizeof(struct iwl_mvm_internal_rxq_notif) + - size), - .notif.type = type, - .notif.sync = sync, - }; + DEFINE_RAW_FLEX(struct iwl_rxq_sync_cmd, cmd, payload, + sizeof(struct iwl_mvm_internal_rxq_notif)); + struct iwl_mvm_internal_rxq_notif *notif = + (struct iwl_mvm_internal_rxq_notif *)cmd->payload; struct iwl_host_cmd hcmd = { .id = WIDE_ID(DATA_PATH_GROUP, TRIGGER_RX_QUEUES_NOTIF_CMD), .data[0] = &cmd, @@ -6447,15 +6440,21 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, }; int ret; + cmd->rxq_mask = cpu_to_le32(BIT(mvm->trans->num_rx_queues) - 1); + cmd->count = cpu_to_le32(sizeof(struct iwl_mvm_internal_rxq_notif) + + size); + notif->type = type; + notif->sync = sync; + /* size must be a multiple of DWORD */ - if (WARN_ON(cmd.cmd.count & cpu_to_le32(3))) + if (WARN_ON(cmd->count & cpu_to_le32(3))) return; if (!iwl_mvm_has_new_rx_api(mvm)) return; if (sync) { - cmd.notif.cookie = mvm->queue_sync_cookie; + notif->cookie = mvm->queue_sync_cookie; mvm->queue_sync_state = (1 << mvm->trans->num_rx_queues) - 1; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index f6391c7a3e29..1e3639fa6b27 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1348,10 +1348,6 @@ struct iwl_mvm { __le16 cur_aid; u8 cur_bssid[ETH_ALEN]; -#ifdef CONFIG_ACPI - struct iwl_phy_specific_cfg phy_filters; -#endif - /* report rx timestamp in ptp clock time */ bool rx_ts_ptp; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 14ea89f931bb..358643ae0e42 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -2157,7 +2157,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, } if (iwl_mvm_is_band_in_rx_supported(mvm)) { - u8 band = BAND_IN_RX_STATUS(desc->mac_phy_idx); + u8 band = u8_get_bits(desc->mac_phy_band, + IWL_RX_MPDU_MAC_PHY_BAND_BAND_MASK); rx_status->band = iwl_mvm_nl80211_band_from_phy(band); } else { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 19c905b641e2..af62c7f7c834 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2016 Intel Deutschland GmbH */ @@ -225,8 +225,6 @@ struct iwl_mvm_vif; * @IWL_AGG_ON: aggregation session is up * @IWL_EMPTYING_HW_QUEUE_ADDBA: establishing a BA session - waiting for the * HW queue to be empty from packets for this RA /TID. - * @IWL_EMPTYING_HW_QUEUE_DELBA: tearing down a BA session - waiting for the - * HW queue to be empty from packets for this RA /TID. */ enum iwl_mvm_agg_state { IWL_AGG_OFF = 0, @@ -234,7 +232,6 @@ enum iwl_mvm_agg_state { IWL_AGG_STARTING, IWL_AGG_ON, IWL_EMPTYING_HW_QUEUE_ADDBA, - IWL_EMPTYING_HW_QUEUE_DELBA, }; /** diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index f67afb66ef2b..91f6945f3f98 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1372,8 +1372,7 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, lockdep_assert_held(&mvmsta->lock); - if ((tid_data->state == IWL_AGG_ON || - tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) && + if (tid_data->state == IWL_AGG_ON && iwl_mvm_tid_queued(mvm, tid_data) == 0) { /* * Now that this aggregation or DQA queue is empty tell @@ -1402,15 +1401,6 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, tid_data->state = IWL_AGG_STARTING; ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; - - case IWL_EMPTYING_HW_QUEUE_DELBA: - IWL_DEBUG_TX_QUEUES(mvm, - "Can continue DELBA flow ssn = next_recl = %d\n", - tid_data->next_reclaimed); - tid_data->state = IWL_AGG_OFF; - ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); - break; - default: break; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 8aa7c455bdee..4f367c7fce25 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -182,7 +182,7 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, prph_sc_ctrl->step_cfg.mbx_addr_1 = cpu_to_le32(trans->mbx_addr_1_step); /* allocate ucode sections in dram and set addresses */ - ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram); + ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram.common); if (ret) goto err_free_prph_scratch; @@ -219,8 +219,23 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, cpu_to_le64(trans_pcie->prph_info_dma_addr); ctxt_info_gen3->prph_scratch_base_addr = cpu_to_le64(trans_pcie->prph_scratch_dma_addr); - ctxt_info_gen3->prph_scratch_size = - cpu_to_le32(sizeof(*prph_scratch)); + + /* + * This code assumes the FSEQ is last and we can make that + * optional; old devices _should_ be fine with a bigger size, + * but in simulation we check the size more precisely. + */ + BUILD_BUG_ON(offsetofend(typeof(*prph_scratch), dram.common) + + sizeof(prph_scratch->dram.fseq_img) != + sizeof(*prph_scratch)); + if (control_flags_ext & IWL_PRPH_SCRATCH_EXT_EXT_FSEQ) + ctxt_info_gen3->prph_scratch_size = + cpu_to_le32(sizeof(*prph_scratch)); + else + ctxt_info_gen3->prph_scratch_size = + cpu_to_le32(offsetofend(typeof(*prph_scratch), + dram.common)); + ctxt_info_gen3->cr_head_idx_arr_base_addr = cpu_to_le64(trans_pcie->rxq->rb_stts_dma); ctxt_info_gen3->tr_tail_idx_arr_base_addr = diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index 344e4d5a1c6e..3f0256b3565d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include "iwl-trans.h" #include "iwl-fh.h" @@ -83,7 +83,7 @@ void iwl_pcie_ctxt_info_free_paging(struct iwl_trans *trans) int iwl_pcie_init_fw_sec(struct iwl_trans *trans, const struct fw_img *fw, - struct iwl_context_info_dram *ctxt_dram) + struct iwl_context_info_dram_nonfseq *ctxt_dram) { struct iwl_self_init_dram *dram = &trans->init_dram; int i, ret, lmac_cnt, umac_cnt, paging_cnt; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 4a4f8de4efe2..337324eea1a1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1947,6 +1947,13 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id) handled |= CSR_INT_BIT_ALIVE; } + if (inta & CSR_INT_BIT_RESET_DONE) { + IWL_DEBUG_ISR(trans, "Reset flow completed\n"); + trans_pcie->fw_reset_state = FW_RESET_OK; + handled |= CSR_INT_BIT_RESET_DONE; + wake_up(&trans_pcie->fw_reset_waitq); + } + /* Safely ignore these bits for debug checks below */ inta &= ~(CSR_INT_BIT_SCD | CSR_INT_BIT_ALIVE); @@ -1968,7 +1975,12 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id) IWL_ERR(trans, "Microcode SW error detected. " " Restarting 0x%X.\n", inta); isr_stats->sw++; - iwl_pcie_irq_handle_error(trans); + if (trans_pcie->fw_reset_state == FW_RESET_REQUESTED) { + trans_pcie->fw_reset_state = FW_RESET_ERROR; + wake_up(&trans_pcie->fw_reset_waitq); + } else { + iwl_pcie_irq_handle_error(trans); + } handled |= CSR_INT_BIT_SW_ERR; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index 472f26f83ba8..08409e24aed6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -117,13 +117,23 @@ void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans) trans_pcie->fw_reset_state != FW_RESET_REQUESTED, FW_RESET_TIMEOUT); if (!ret || trans_pcie->fw_reset_state == FW_RESET_ERROR) { - u32 inta_hw = iwl_read32(trans, CSR_MSIX_HW_INT_CAUSES_AD); + bool reset_done; + u32 inta_hw; + + if (trans_pcie->msix_enabled) { + inta_hw = iwl_read32(trans, CSR_MSIX_HW_INT_CAUSES_AD); + reset_done = + inta_hw & MSIX_HW_INT_CAUSES_REG_RESET_DONE; + } else { + inta_hw = iwl_read32(trans, CSR_INT_MASK); + reset_done = inta_hw & CSR_INT_BIT_RESET_DONE; + } IWL_ERR(trans, - "timeout waiting for FW reset ACK (inta_hw=0x%x)\n", - inta_hw); + "timeout waiting for FW reset ACK (inta_hw=0x%x, reset_done %d)\n", + inta_hw, reset_done); - if (!(inta_hw & MSIX_HW_INT_CAUSES_REG_RESET_DONE)) { + if (!reset_done) { struct iwl_fw_error_dump_mode mode = { .type = IWL_ERR_TYPE_RESET_HS_TIMEOUT, .context = IWL_ERR_CONTEXT_FROM_OPMODE, @@ -379,6 +389,11 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans) iwl_pcie_get_rf_name(trans); mutex_unlock(&trans_pcie->mutex); + + if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) + trans->step_urm = !!(iwl_read_umac_prph(trans, + CNVI_PMU_STEP_FLOW) & + CNVI_PMU_STEP_FLOW_FORCE_URM); } static bool iwl_pcie_set_ltr(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 102a6123bba0..4cc7a2e5746d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2942,6 +2942,8 @@ static ssize_t iwl_dbgfs_rx_queue_read(struct file *file, for (i = 0; i < trans->num_rx_queues && pos < bufsz; i++) { struct iwl_rxq *rxq = &trans_pcie->rxq[i]; + spin_lock_bh(&rxq->lock); + pos += scnprintf(buf + pos, bufsz - pos, "queue#: %2d\n", i); pos += scnprintf(buf + pos, bufsz - pos, "\tread: %u\n", @@ -2962,6 +2964,7 @@ static ssize_t iwl_dbgfs_rx_queue_read(struct file *file, pos += scnprintf(buf + pos, bufsz - pos, "\tclosed_rb_num: Not Allocated\n"); } + spin_unlock_bh(&rxq->lock); } ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); kfree(buf); @@ -3662,8 +3665,11 @@ iwl_trans_pcie_dump_data(struct iwl_trans *trans, u32 dump_mask, /* Dump RBs is supported only for pre-9000 devices (1 queue) */ struct iwl_rxq *rxq = &trans_pcie->rxq[0]; /* RBs */ + spin_lock_bh(&rxq->lock); num_rbs = iwl_get_closed_rb_stts(trans, rxq); num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK; + spin_unlock_bh(&rxq->lock); + len += num_rbs * (sizeof(*data) + sizeof(struct iwl_fw_error_dump_rb) + (PAGE_SIZE << trans_pcie->rx_page_order)); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a099fdaafa45..60c12328c2f3 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1126,7 +1126,7 @@ mwifiex_change_vif_to_p2p(struct net_device *dev, HostCmd_ACT_GEN_SET, 0, NULL, true)) return -1; - if (mwifiex_sta_init_cmd(priv, false, false)) + if (mwifiex_sta_init_cmd(priv, false)) return -1; return 0; @@ -1167,7 +1167,7 @@ mwifiex_change_vif_to_sta_adhoc(struct net_device *dev, if (mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, HostCmd_ACT_GEN_SET, 0, NULL, true)) return -1; - if (mwifiex_sta_init_cmd(priv, false, false)) + if (mwifiex_sta_init_cmd(priv, false)) return -1; return 0; @@ -1204,7 +1204,7 @@ mwifiex_change_vif_to_ap(struct net_device *dev, if (mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, HostCmd_ACT_GEN_SET, 0, NULL, true)) return -1; - if (mwifiex_sta_init_cmd(priv, false, false)) + if (mwifiex_sta_init_cmd(priv, false)) return -1; return 0; @@ -2906,16 +2906,12 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, struct mwifiex_private *priv) { int rx_mcs_supp; - struct ieee80211_mcs_info mcs_set; - u8 *mcs = (u8 *)&mcs_set; struct mwifiex_adapter *adapter = priv->adapter; ht_info->ht_supported = true; ht_info->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K; ht_info->ampdu_density = IEEE80211_HT_MPDU_DENSITY_NONE; - memset(&ht_info->mcs, 0, sizeof(ht_info->mcs)); - /* Fill HT capability information */ if (ISSUPP_CHANWIDTH40(adapter->hw_dot_11n_dev_cap)) ht_info->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; @@ -2961,17 +2957,15 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, ht_info->cap |= IEEE80211_HT_CAP_SM_PS; rx_mcs_supp = GET_RXMCSSUPP(adapter->user_dev_mcs_support); + + memset(&ht_info->mcs, 0, sizeof(ht_info->mcs)); /* Set MCS for 1x1/2x2 */ - memset(mcs, 0xff, rx_mcs_supp); - /* Clear all the other values */ - memset(&mcs[rx_mcs_supp], 0, - sizeof(struct ieee80211_mcs_info) - rx_mcs_supp); + memset(ht_info->mcs.rx_mask, 0xff, rx_mcs_supp); + if (priv->bss_mode == NL80211_IFTYPE_STATION || ISSUPP_CHANWIDTH40(adapter->hw_dot_11n_dev_cap)) /* Set MCS32 for infra mode or ad-hoc mode with 40MHz support */ - SETHT_MCS32(mcs_set.rx_mask); - - memcpy((u8 *) &ht_info->mcs, mcs, sizeof(struct ieee80211_mcs_info)); + SETHT_MCS32(ht_info->mcs.rx_mask); ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; } @@ -3013,7 +3007,6 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, return ERR_PTR(-EFAULT); } - priv->wdev.wiphy = wiphy; priv->wdev.iftype = NL80211_IFTYPE_STATION; if (type == NL80211_IFTYPE_UNSPECIFIED) @@ -3022,8 +3015,6 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, priv->bss_mode = type; priv->bss_type = MWIFIEX_BSS_TYPE_STA; - priv->frame_type = MWIFIEX_DATA_FRAME_TYPE_ETH_II; - priv->bss_priority = 0; priv->bss_role = MWIFIEX_BSS_ROLE_STA; break; @@ -3043,14 +3034,10 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, return ERR_PTR(-EFAULT); } - priv->wdev.wiphy = wiphy; priv->wdev.iftype = NL80211_IFTYPE_AP; priv->bss_type = MWIFIEX_BSS_TYPE_UAP; - priv->frame_type = MWIFIEX_DATA_FRAME_TYPE_ETH_II; - priv->bss_priority = 0; priv->bss_role = MWIFIEX_BSS_ROLE_UAP; - priv->bss_started = 0; priv->bss_mode = type; break; @@ -3070,7 +3057,6 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, return ERR_PTR(-EFAULT); } - priv->wdev.wiphy = wiphy; /* At start-up, wpa_supplicant tries to change the interface * to NL80211_IFTYPE_STATION if it is not managed mode. */ @@ -3083,10 +3069,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, */ priv->bss_type = MWIFIEX_BSS_TYPE_P2P; - priv->frame_type = MWIFIEX_DATA_FRAME_TYPE_ETH_II; - priv->bss_priority = 0; priv->bss_role = MWIFIEX_BSS_ROLE_STA; - priv->bss_started = 0; if (mwifiex_cfg80211_init_p2p_client(priv)) { memset(&priv->wdev, 0, sizeof(priv->wdev)); @@ -3100,6 +3083,11 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, return ERR_PTR(-EINVAL); } + priv->wdev.wiphy = wiphy; + priv->bss_priority = 0; + priv->bss_started = 0; + priv->frame_type = MWIFIEX_DATA_FRAME_TYPE_ETH_II; + dev = alloc_netdev_mqs(sizeof(struct mwifiex_private *), name, name_assign_type, ether_setup, IEEE80211_NUM_ACS, 1); @@ -3122,7 +3110,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, if (ret) goto err_set_bss_mode; - ret = mwifiex_sta_init_cmd(priv, false, false); + ret = mwifiex_sta_init_cmd(priv, false); if (ret) goto err_sta_init; } @@ -4703,7 +4691,7 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) void *wdev_priv; struct wiphy *wiphy; struct mwifiex_private *priv = adapter->priv[MWIFIEX_BSS_TYPE_STA]; - u8 *country_code; + const u8 *country_code; u32 thr, retry; struct cfg80211_ops *ops; diff --git a/drivers/net/wireless/marvell/mwifiex/cfp.c b/drivers/net/wireless/marvell/mwifiex/cfp.c index d7fd79214bcf..686bf12b6c26 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfp.c +++ b/drivers/net/wireless/marvell/mwifiex/cfp.c @@ -153,7 +153,7 @@ struct region_code_mapping { u8 region[IEEE80211_COUNTRY_STRING_LEN] __nonstring; }; -static struct region_code_mapping region_code_mapping_t[] = { +static const struct region_code_mapping region_code_mapping_t[] = { { 0x10, "US " }, /* US FCC */ { 0x20, "CA " }, /* IC Canada */ { 0x30, "FR " }, /* France */ @@ -165,7 +165,7 @@ static struct region_code_mapping region_code_mapping_t[] = { }; /* This function converts integer code to region string */ -u8 *mwifiex_11d_code_2_region(u8 code) +const u8 *mwifiex_11d_code_2_region(u8 code) { u8 i; diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index 5573e2ded72f..3bf27efe4537 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -159,11 +159,9 @@ static int mwifiex_cmd_host_cmd(struct mwifiex_private *priv, * sending. Afterwards, it logs the command ID and action for debugging * and sets up the command timeout timer. */ -static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv, +static int mwifiex_dnld_cmd_to_fw(struct mwifiex_adapter *adapter, struct cmd_ctrl_node *cmd_node) { - - struct mwifiex_adapter *adapter = priv->adapter; int ret; struct host_cmd_ds_command *host_cmd; uint16_t cmd_code; @@ -367,8 +365,7 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter) (test_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags) && !adapter->sleep_period.period)) { adapter->pm_wakeup_card_req = true; - mwifiex_hs_activated_event(mwifiex_get_priv - (adapter, MWIFIEX_BSS_ROLE_ANY), true); + mwifiex_hs_activated_event(adapter, true); } return ret; @@ -473,8 +470,7 @@ void mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter) int mwifiex_process_event(struct mwifiex_adapter *adapter) { int ret, i; - struct mwifiex_private *priv = - mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + struct mwifiex_private *priv; struct sk_buff *skb = adapter->event_skb; u32 eventcause; struct mwifiex_rxinfo *rx_info; @@ -744,7 +740,6 @@ mwifiex_insert_cmd_to_pending_q(struct mwifiex_adapter *adapter, */ int mwifiex_exec_next_cmd(struct mwifiex_adapter *adapter) { - struct mwifiex_private *priv; struct cmd_ctrl_node *cmd_node; int ret = 0; struct host_cmd_ds_command *host_cmd; @@ -768,7 +763,6 @@ int mwifiex_exec_next_cmd(struct mwifiex_adapter *adapter) struct cmd_ctrl_node, list); host_cmd = (struct host_cmd_ds_command *) (cmd_node->cmd_skb->data); - priv = cmd_node->priv; if (adapter->ps_state != PS_STATE_AWAKE) { mwifiex_dbg(adapter, ERROR, @@ -783,18 +777,17 @@ int mwifiex_exec_next_cmd(struct mwifiex_adapter *adapter) spin_unlock_bh(&adapter->cmd_pending_q_lock); spin_unlock_bh(&adapter->mwifiex_cmd_lock); - ret = mwifiex_dnld_cmd_to_fw(priv, cmd_node); - priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + ret = mwifiex_dnld_cmd_to_fw(adapter, cmd_node); + /* Any command sent to the firmware when host is in sleep * mode should de-configure host sleep. We should skip the * host sleep configuration command itself though */ - if (priv && (host_cmd->command != - cpu_to_le16(HostCmd_CMD_802_11_HS_CFG_ENH))) { + if (host_cmd->command != cpu_to_le16(HostCmd_CMD_802_11_HS_CFG_ENH)) { if (adapter->hs_activated) { clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); - mwifiex_hs_activated_event(priv, false); + mwifiex_hs_activated_event(adapter, false); } } @@ -810,8 +803,7 @@ int mwifiex_exec_next_cmd(struct mwifiex_adapter *adapter) int mwifiex_process_cmdresp(struct mwifiex_adapter *adapter) { struct host_cmd_ds_command *resp; - struct mwifiex_private *priv = - mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + struct mwifiex_private *priv; int ret = 0; uint16_t orig_cmdresp_no; uint16_t cmdresp_no; @@ -900,18 +892,6 @@ int mwifiex_process_cmdresp(struct mwifiex_adapter *adapter) ret = mwifiex_process_sta_cmdresp(priv, cmdresp_no, resp); } - /* Check init command response */ - if (adapter->hw_status == MWIFIEX_HW_STATUS_INITIALIZING) { - if (ret) { - mwifiex_dbg(adapter, ERROR, - "%s: cmd %#x failed during\t" - "initialization\n", __func__, cmdresp_no); - mwifiex_init_fw_complete(adapter); - return -1; - } else if (adapter->last_init_cmd == cmdresp_no) - adapter->hw_status = MWIFIEX_HW_STATUS_INIT_DONE; - } - if (adapter->curr_cmd) { if (adapter->curr_cmd->wait_q_enabled) adapter->cmd_wait_q.status = ret; @@ -1030,10 +1010,6 @@ mwifiex_cmd_timeout_func(struct timer_list *t) mwifiex_cancel_pending_ioctl(adapter); } } - if (adapter->hw_status == MWIFIEX_HW_STATUS_INITIALIZING) { - mwifiex_init_fw_complete(adapter); - return; - } if (adapter->if_ops.device_dump) adapter->if_ops.device_dump(adapter); @@ -1160,27 +1136,27 @@ mwifiex_check_ps_cond(struct mwifiex_adapter *adapter) * This event is generated by the driver, with a blank event body. */ void -mwifiex_hs_activated_event(struct mwifiex_private *priv, u8 activated) +mwifiex_hs_activated_event(struct mwifiex_adapter *adapter, u8 activated) { if (activated) { if (test_bit(MWIFIEX_IS_HS_CONFIGURED, - &priv->adapter->work_flags)) { - priv->adapter->hs_activated = true; - mwifiex_update_rxreor_flags(priv->adapter, + &adapter->work_flags)) { + adapter->hs_activated = true; + mwifiex_update_rxreor_flags(adapter, RXREOR_FORCE_NO_DROP); - mwifiex_dbg(priv->adapter, EVENT, + mwifiex_dbg(adapter, EVENT, "event: hs_activated\n"); - priv->adapter->hs_activate_wait_q_woken = true; + adapter->hs_activate_wait_q_woken = true; wake_up_interruptible( - &priv->adapter->hs_activate_wait_q); + &adapter->hs_activate_wait_q); } else { - mwifiex_dbg(priv->adapter, EVENT, + mwifiex_dbg(adapter, EVENT, "event: HS not configured\n"); } } else { - mwifiex_dbg(priv->adapter, EVENT, + mwifiex_dbg(adapter, EVENT, "event: hs_deactivated\n"); - priv->adapter->hs_activated = false; + adapter->hs_activated = false; } } @@ -1204,7 +1180,7 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv, if (phs_cfg->action == cpu_to_le16(HS_ACTIVATE) && adapter->iface_type != MWIFIEX_USB) { - mwifiex_hs_activated_event(priv, true); + mwifiex_hs_activated_event(adapter, true); return 0; } else { mwifiex_dbg(adapter, CMD, @@ -1217,11 +1193,11 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv, if (conditions != HS_CFG_CANCEL) { set_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); if (adapter->iface_type == MWIFIEX_USB) - mwifiex_hs_activated_event(priv, true); + mwifiex_hs_activated_event(adapter, true); } else { clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); if (adapter->hs_activated) - mwifiex_hs_activated_event(priv, false); + mwifiex_hs_activated_event(adapter, false); } return 0; @@ -1250,9 +1226,7 @@ mwifiex_process_hs_config(struct mwifiex_adapter *adapter) adapter->hs_activated = false; clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); clear_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags); - mwifiex_hs_activated_event(mwifiex_get_priv(adapter, - MWIFIEX_BSS_ROLE_ANY), - false); + mwifiex_hs_activated_event(adapter, false); } EXPORT_SYMBOL_GPL(mwifiex_process_hs_config); @@ -1302,9 +1276,7 @@ mwifiex_process_sleep_confirm_resp(struct mwifiex_adapter *adapter, } adapter->pm_wakeup_card_req = true; if (test_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags)) - mwifiex_hs_activated_event(mwifiex_get_priv - (adapter, MWIFIEX_BSS_ROLE_ANY), - true); + mwifiex_hs_activated_event(adapter, true); adapter->ps_state = PS_STATE_SLEEP; cmd->command = cpu_to_le16(command); cmd->seq_num = cpu_to_le16(seq_num); diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index ce0d42e72e94..32c374e47794 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -480,14 +480,12 @@ int mwifiex_init_lock_list(struct mwifiex_adapter *adapter) * - Initialize the private structure * - Add BSS priority tables to the adapter structure * - For each interface, send the init commands to firmware - * - Send the first command in command pending queue, if available */ int mwifiex_init_fw(struct mwifiex_adapter *adapter) { int ret; struct mwifiex_private *priv; u8 i, first_sta = true; - int is_cmd_pend_q_empty; adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; @@ -509,11 +507,10 @@ int mwifiex_init_fw(struct mwifiex_adapter *adapter) } if (adapter->mfg_mode) { adapter->hw_status = MWIFIEX_HW_STATUS_READY; - ret = -EINPROGRESS; } else { for (i = 0; i < adapter->priv_num; i++) { ret = mwifiex_sta_init_cmd(adapter->priv[i], - first_sta, true); + first_sta); if (ret == -1) return -1; @@ -522,17 +519,15 @@ int mwifiex_init_fw(struct mwifiex_adapter *adapter) } spin_lock_bh(&adapter->cmd_pending_q_lock); - is_cmd_pend_q_empty = list_empty(&adapter->cmd_pending_q); + WARN_ON(!list_empty(&adapter->cmd_pending_q)); spin_unlock_bh(&adapter->cmd_pending_q_lock); - if (!is_cmd_pend_q_empty) { - /* Send the first command in queue and return */ - if (mwifiex_main_process(adapter) != -1) - ret = -EINPROGRESS; - } else { - adapter->hw_status = MWIFIEX_HW_STATUS_READY; - } - return ret; + adapter->hw_status = MWIFIEX_HW_STATUS_READY; + + if (adapter->if_ops.init_fw_port) + adapter->if_ops.init_fw_port(adapter); + + return 0; } /* diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 0e1f53940401..1485f949ad4e 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -6,6 +6,7 @@ */ #include <linux/suspend.h> +#include <net/sock.h> #include "main.h" #include "wmm.h" @@ -354,13 +355,6 @@ process_start: if (adapter->cmd_resp_received) { adapter->cmd_resp_received = false; mwifiex_process_cmdresp(adapter); - - /* call mwifiex back when init_fw is done */ - if (adapter->hw_status == MWIFIEX_HW_STATUS_INIT_DONE) { - adapter->hw_status = MWIFIEX_HW_STATUS_READY; - mwifiex_init_fw_complete(adapter); - maybe_quirk_fw_disable_ds(adapter); - } } /* Check if we need to confirm Sleep Request @@ -415,10 +409,7 @@ process_start: if (adapter->hs_activated) { clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); - mwifiex_hs_activated_event - (mwifiex_get_priv - (adapter, MWIFIEX_BSS_ROLE_ANY), - false); + mwifiex_hs_activated_event(adapter, false); } } @@ -438,10 +429,7 @@ process_start: if (adapter->hs_activated) { clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); - mwifiex_hs_activated_event - (mwifiex_get_priv - (adapter, MWIFIEX_BSS_ROLE_ANY), - false); + mwifiex_hs_activated_event(adapter, false); } } @@ -460,10 +448,7 @@ process_start: if (adapter->hs_activated) { clear_bit(MWIFIEX_IS_HS_CONFIGURED, &adapter->work_flags); - mwifiex_hs_activated_event - (mwifiex_get_priv - (adapter, MWIFIEX_BSS_ROLE_ANY), - false); + mwifiex_hs_activated_event(adapter, false); } } @@ -587,21 +572,11 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto err_dnld_fw; } - adapter->init_wait_q_woken = false; ret = mwifiex_init_fw(adapter); - if (ret == -1) { + if (ret == -1) goto err_init_fw; - } else if (!ret) { - adapter->hw_status = MWIFIEX_HW_STATUS_READY; - goto done; - } - /* Wait for mwifiex_init to complete */ - if (!adapter->mfg_mode) { - wait_event_interruptible(adapter->init_wait_q, - adapter->init_wait_q_woken); - if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) - goto err_init_fw; - } + + maybe_quirk_fw_disable_ds(adapter); if (!adapter->wiphy) { if (mwifiex_register_cfg80211(adapter)) { @@ -939,7 +914,7 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) multicast = is_multicast_ether_addr(skb->data); if (unlikely(!multicast && skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS && + sock_flag(skb->sk, SOCK_WIFI_STATUS) && priv->adapter->fw_api_ver == MWIFIEX_FW_V15)) skb = mwifiex_clone_skb_for_tx_status(priv, skb, @@ -1558,7 +1533,6 @@ mwifiex_reinit_sw(struct mwifiex_adapter *adapter) adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; clear_bit(MWIFIEX_SURPRISE_REMOVED, &adapter->work_flags); - init_waitqueue_head(&adapter->init_wait_q); clear_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags); adapter->hs_activated = false; clear_bit(MWIFIEX_IS_CMD_TIMEDOUT, &adapter->work_flags); @@ -1726,7 +1700,6 @@ mwifiex_add_card(void *card, struct completion *fw_done, adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; clear_bit(MWIFIEX_SURPRISE_REMOVED, &adapter->work_flags); - init_waitqueue_head(&adapter->init_wait_q); clear_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags); adapter->hs_activated = false; init_waitqueue_head(&adapter->hs_activate_wait_q); diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 63f1c900e096..9ac36bef980e 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -239,7 +239,6 @@ struct mwifiex_dbg { enum MWIFIEX_HARDWARE_STATUS { MWIFIEX_HW_STATUS_READY, MWIFIEX_HW_STATUS_INITIALIZING, - MWIFIEX_HW_STATUS_INIT_DONE, MWIFIEX_HW_STATUS_RESET, MWIFIEX_HW_STATUS_NOT_READY }; @@ -865,8 +864,6 @@ struct mwifiex_adapter { unsigned long work_flags; u32 fw_release_number; u8 intf_hdr_len; - u16 init_wait_q_woken; - wait_queue_head_t init_wait_q; void *card; struct mwifiex_if_ops if_ops; atomic_t bypass_tx_pending; @@ -919,7 +916,6 @@ struct mwifiex_adapter { struct cmd_ctrl_node *curr_cmd; /* spin lock for command */ spinlock_t mwifiex_cmd_lock; - u16 last_init_cmd; struct timer_list cmd_timer; struct list_head cmd_free_q; /* spin lock for cmd_free_q */ @@ -1060,8 +1056,6 @@ void mwifiex_free_priv(struct mwifiex_private *priv); int mwifiex_init_fw(struct mwifiex_adapter *adapter); -int mwifiex_init_fw_complete(struct mwifiex_adapter *adapter); - void mwifiex_shutdown_drv(struct mwifiex_adapter *adapter); int mwifiex_dnld_fw(struct mwifiex_adapter *, struct mwifiex_fw_image *); @@ -1125,7 +1119,7 @@ int mwifiex_ret_enh_power_mode(struct mwifiex_private *priv, struct host_cmd_ds_command *resp, struct mwifiex_ds_pm_cfg *pm_cfg); void mwifiex_process_hs_config(struct mwifiex_adapter *adapter); -void mwifiex_hs_activated_event(struct mwifiex_private *priv, +void mwifiex_hs_activated_event(struct mwifiex_adapter *adapter, u8 activated); int mwifiex_set_hs_params(struct mwifiex_private *priv, u16 action, int cmd_type, struct mwifiex_ds_hs_cfg *hs_cfg); @@ -1156,7 +1150,7 @@ void mwifiex_process_sta_txpd(struct mwifiex_private *priv, struct sk_buff *skb); void mwifiex_process_uap_txpd(struct mwifiex_private *priv, struct sk_buff *skb); -int mwifiex_sta_init_cmd(struct mwifiex_private *, u8 first_sta, bool init); +int mwifiex_sta_init_cmd(struct mwifiex_private *, u8 first_sta); int mwifiex_cmd_802_11_scan(struct host_cmd_ds_command *cmd, struct mwifiex_scan_cmd_config *scan_cfg); void mwifiex_queue_scan_cmd(struct mwifiex_private *priv, @@ -1565,7 +1559,7 @@ int mwifiex_add_wowlan_magic_pkt_filter(struct mwifiex_adapter *adapter); int mwifiex_set_mgmt_ies(struct mwifiex_private *priv, struct cfg80211_beacon_data *data); int mwifiex_del_mgmt_ies(struct mwifiex_private *priv); -u8 *mwifiex_11d_code_2_region(u8 code); +const u8 *mwifiex_11d_code_2_region(u8 code); void mwifiex_uap_set_channel(struct mwifiex_private *priv, struct mwifiex_uap_bss_param *bss_cfg, struct cfg80211_chan_def chandef); @@ -1598,7 +1592,6 @@ mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac); struct mwifiex_sta_node * mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac); u8 mwifiex_is_tdls_chan_switching(struct mwifiex_private *priv); -u8 mwifiex_is_tdls_off_chan(struct mwifiex_private *priv); u8 mwifiex_is_send_cmd_allowed(struct mwifiex_private *priv); int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer, u8 action_code, u8 dialog_token, diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index dd2a42e732f2..a760de191fce 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2971,7 +2971,7 @@ static int mwifiex_init_pcie(struct mwifiex_adapter *adapter) goto err_iomap2; } - pr_notice("PCI memory map Virt0: %pK PCI memory map Virt2: %pK\n", + pr_notice("PCI memory map Virt0: %p PCI memory map Virt2: %p\n", card->pci_mmap, card->pci_mmap1); ret = mwifiex_pcie_alloc_buffers(adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index c4689f5a1acc..c93281f5a47c 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -157,7 +157,7 @@ mwifiex_cmd_802_11_get_log(struct host_cmd_ds_command *cmd) */ static int mwifiex_cmd_tx_rate_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *cmd, - u16 cmd_action, u16 *pbitmap_rates) + u16 cmd_action, const u16 *pbitmap_rates) { struct host_cmd_ds_tx_rate_cfg *rate_cfg = &cmd->params.tx_rate_cfg; struct mwifiex_rate_scope *rate_scope; @@ -174,34 +174,19 @@ static int mwifiex_cmd_tx_rate_cfg(struct mwifiex_private *priv, rate_scope->type = cpu_to_le16(TLV_TYPE_RATE_SCOPE); rate_scope->length = cpu_to_le16 (sizeof(*rate_scope) - sizeof(struct mwifiex_ie_types_header)); - if (pbitmap_rates != NULL) { - rate_scope->hr_dsss_rate_bitmap = cpu_to_le16(pbitmap_rates[0]); - rate_scope->ofdm_rate_bitmap = cpu_to_le16(pbitmap_rates[1]); - for (i = 0; i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap); i++) - rate_scope->ht_mcs_rate_bitmap[i] = - cpu_to_le16(pbitmap_rates[2 + i]); - if (priv->adapter->fw_api_ver == MWIFIEX_FW_V15) { - for (i = 0; - i < ARRAY_SIZE(rate_scope->vht_mcs_rate_bitmap); - i++) - rate_scope->vht_mcs_rate_bitmap[i] = - cpu_to_le16(pbitmap_rates[10 + i]); - } - } else { - rate_scope->hr_dsss_rate_bitmap = - cpu_to_le16(priv->bitmap_rates[0]); - rate_scope->ofdm_rate_bitmap = - cpu_to_le16(priv->bitmap_rates[1]); - for (i = 0; i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap); i++) - rate_scope->ht_mcs_rate_bitmap[i] = - cpu_to_le16(priv->bitmap_rates[2 + i]); - if (priv->adapter->fw_api_ver == MWIFIEX_FW_V15) { - for (i = 0; - i < ARRAY_SIZE(rate_scope->vht_mcs_rate_bitmap); - i++) - rate_scope->vht_mcs_rate_bitmap[i] = - cpu_to_le16(priv->bitmap_rates[10 + i]); - } + if (!pbitmap_rates) + pbitmap_rates = priv->bitmap_rates; + + rate_scope->hr_dsss_rate_bitmap = cpu_to_le16(pbitmap_rates[0]); + rate_scope->ofdm_rate_bitmap = cpu_to_le16(pbitmap_rates[1]); + + for (i = 0; i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap); i++) + rate_scope->ht_mcs_rate_bitmap[i] = cpu_to_le16(pbitmap_rates[2 + i]); + + if (priv->adapter->fw_api_ver == MWIFIEX_FW_V15) { + for (i = 0; i < ARRAY_SIZE(rate_scope->vht_mcs_rate_bitmap); i++) + rate_scope->vht_mcs_rate_bitmap[i] = + cpu_to_le16(pbitmap_rates[10 + i]); } rate_drop = (struct mwifiex_rate_drop_pattern *) ((u8 *) rate_scope + @@ -2258,7 +2243,7 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, * - Set 11d control * - Set MAC control (this must be the last command to initialize firmware) */ -int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta, bool init) +int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta) { struct mwifiex_adapter *adapter = priv->adapter; int ret; @@ -2433,11 +2418,5 @@ int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta, bool init) ret = mwifiex_send_cmd(priv, HostCmd_CMD_11N_CFG, HostCmd_ACT_GEN_SET, 0, &tx_cfg, true); - if (init) { - /* set last_init_cmd before sending the command */ - priv->adapter->last_init_cmd = HostCmd_CMD_11N_CFG; - ret = -EINPROGRESS; - } - return ret; } diff --git a/drivers/net/wireless/marvell/mwifiex/txrx.c b/drivers/net/wireless/marvell/mwifiex/txrx.c index bd91678d26b4..f44e22f24511 100644 --- a/drivers/net/wireless/marvell/mwifiex/txrx.c +++ b/drivers/net/wireless/marvell/mwifiex/txrx.c @@ -24,8 +24,7 @@ int mwifiex_handle_rx_packet(struct mwifiex_adapter *adapter, struct sk_buff *skb) { - struct mwifiex_private *priv = - mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + struct mwifiex_private *priv; struct rxpd *local_rx_pd; struct mwifiex_rxinfo *rx_info = MWIFIEX_SKB_RXCB(skb); int ret; diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 1f1f6280a0f2..4c5b1de0e936 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -116,24 +116,6 @@ static struct mwifiex_debug_data items[] = { static int num_of_items = ARRAY_SIZE(items); /* - * Firmware initialization complete callback handler. - * - * This function wakes up the function waiting on the init - * wait queue for the firmware initialization to complete. - */ -int mwifiex_init_fw_complete(struct mwifiex_adapter *adapter) -{ - - if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) - if (adapter->if_ops.init_fw_port) - adapter->if_ops.init_fw_port(adapter); - - adapter->init_wait_q_woken = true; - wake_up_interruptible(&adapter->init_wait_q); - return 0; -} - -/* * This function sends init/shutdown command * to firmware. */ @@ -663,7 +645,7 @@ u8 mwifiex_is_tdls_chan_switching(struct mwifiex_private *priv) return false; } -u8 mwifiex_is_tdls_off_chan(struct mwifiex_private *priv) +static u8 mwifiex_is_tdls_off_chan(struct mwifiex_private *priv) { struct mwifiex_sta_node *sta_ptr; diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index bcb61dab7dc8..1b1222c73728 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -1428,13 +1428,13 @@ mwifiex_dequeue_tx_packet(struct mwifiex_adapter *adapter) } if (!ptr->is_11n_enabled || - ptr->ba_status || - priv->wps.session_enable) { + ptr->ba_status || + priv->wps.session_enable) { if (ptr->is_11n_enabled && - ptr->ba_status && - ptr->amsdu_in_ampdu && - mwifiex_is_amsdu_allowed(priv, tid) && - mwifiex_is_11n_aggragation_possible(priv, ptr, + ptr->ba_status && + ptr->amsdu_in_ampdu && + mwifiex_is_amsdu_allowed(priv, tid) && + mwifiex_is_11n_aggragation_possible(priv, ptr, adapter->tx_buf_size)) mwifiex_11n_aggregate_pkt(priv, ptr, ptr_index); /* ra_list_spinlock has been freed in diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index e5f553a1ea24..b7ea606bda08 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -9393,7 +9393,7 @@ static void rt2800_loft_search(struct rt2x00_dev *rt2x00dev, u8 ch_idx, p0, p1, pf, idx0, idx1, ibit); if (bidx != 5 && pf <= p0 && pf < p1) { - idxf[iorq] = idxf[iorq]; + /* no need to adjust idxf[] */; } else if (p0 < p1) { pf = p0; idxf[iorq] = idx0 & 0x3F; diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index cf3e976471c6..f6add19d1da1 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -1229,6 +1229,11 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw, /* MLD not supported here */ u32 bcn_int = data->link_data[0].beacon_int; u64 delta = abs(tsf - now); + struct ieee80211_bss_conf *conf; + + conf = link_conf_dereference_protected(vif, data->link_data[0].link_id); + if (conf && !conf->enable_beacon) + return; /* adjust after beaconing with new timestamp at old TBTT */ if (tsf > now) { @@ -2273,7 +2278,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, { struct mac80211_hwsim_link_data *link_data = arg; u32 link_id = link_data->link_id; - struct ieee80211_bss_conf *link_conf; + struct ieee80211_bss_conf *link_conf, *tx_bss_conf; struct mac80211_hwsim_data *data = container_of(link_data, struct mac80211_hwsim_data, link_data[link_id]); @@ -2292,10 +2297,11 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, vif->type != NL80211_IFTYPE_OCB) return; - if (vif->mbssid_tx_vif && vif->mbssid_tx_vif != vif) + tx_bss_conf = rcu_access_pointer(link_conf->tx_bss_conf); + if (tx_bss_conf && tx_bss_conf != link_conf) return; - if (vif->bss_conf.ema_ap) { + if (link_conf->ema_ap) { struct ieee80211_ema_beacons *ema; u8 i = 0; diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 07bf7f9aae01..204278eb215e 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -44,7 +44,7 @@ config PTP_1588_CLOCK_DTE depends on PTP_1588_CLOCK depends on NET && HAS_IOMEM depends on ARCH_BCM_MOBILE || (ARCH_BCM_IPROC && !(ARCH_BCM_NSP || ARCH_BCM_5301X)) || COMPILE_TEST - default y + default y if ARCH_BCM_MOBILE || ARCH_BCM_IPROC help This driver adds support for using the Digital timing engine (DTE) in the Broadcom SoC's as a PTP clock. @@ -59,7 +59,7 @@ config PTP_1588_CLOCK_QORIQ tristate "Freescale QorIQ 1588 timer as PTP clock" depends on GIANFAR || FSL_DPAA_ETH || FSL_DPAA2_ETH || FSL_ENETC || FSL_ENETC_VF || COMPILE_TEST depends on PTP_1588_CLOCK - default y + default y if GIANFAR || FSL_DPAA_ETH || FSL_DPAA2_ETH || FSL_ENETC || FSL_ENETC_VF help This driver adds support for using the Freescale QorIQ 1588 timer as a PTP clock. This clock is only useful if your PTP diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 4380e6ddb849..4bf421765d03 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -162,6 +162,7 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, { struct ptp_clock *ptp = container_of(pccontext->clk, struct ptp_clock, clock); + unsigned int i, pin_index, supported_extts_flags; struct ptp_sys_offset_extended *extoff = NULL; struct ptp_sys_offset_precise precise_offset; struct system_device_crosststamp xtstamp; @@ -172,7 +173,6 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, struct ptp_clock_request req; struct ptp_clock_caps caps; struct ptp_clock_time *pct; - unsigned int i, pin_index; struct ptp_pin_desc pd; struct timespec64 ts; int enable, err = 0; @@ -240,6 +240,18 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, err = -EINVAL; break; } + supported_extts_flags = ptp->info->supported_extts_flags; + /* The PTP_ENABLE_FEATURE flag is always supported. */ + supported_extts_flags |= PTP_ENABLE_FEATURE; + /* If the driver does not support strictly checking flags, the + * PTP_RISING_EDGE and PTP_FALLING_EDGE flags are merely + * hints which are not enforced. + */ + if (!(supported_extts_flags & PTP_STRICT_FLAGS)) + supported_extts_flags |= PTP_EXTTS_EDGES; + /* Reject unsupported flags */ + if (req.extts.flags & ~supported_extts_flags) + return -EOPNOTSUPP; req.type = PTP_CLK_REQ_EXTTS; enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0; if (mutex_lock_interruptible(&ptp->pincfg_mux)) @@ -312,6 +324,8 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, err = -EINVAL; break; } + if (req.perout.flags & ~ptp->info->supported_perout_flags) + return -EOPNOTSUPP; req.type = PTP_CLK_REQ_PEROUT; enable = req.perout.period.sec || req.perout.period.nsec; if (mutex_lock_interruptible(&ptp->pincfg_mux)) diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index fbb3fa8fc60b..b8d4df8c6da2 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -283,18 +283,6 @@ static int idtcm_extts_enable(struct idtcm_channel *channel, idtcm = channel->idtcm; old_mask = idtcm->extts_mask; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - - /* Reject requests to enable time stamping on falling edge */ - if ((rq->extts.flags & PTP_ENABLE_FEATURE) && - (rq->extts.flags & PTP_FALLING_EDGE)) - return -EOPNOTSUPP; - if (index >= MAX_TOD) return -EINVAL; @@ -2043,6 +2031,7 @@ static const struct ptp_clock_info idtcm_caps = { .n_per_out = 12, .n_ext_ts = MAX_TOD, .n_pins = MAX_REF_CLK, + .supported_extts_flags = PTP_RISING_EDGE | PTP_STRICT_FLAGS, .adjphase = &idtcm_adjphase, .getmaxphase = &idtcm_getmaxphase, .adjfine = &idtcm_adjfine, @@ -2060,6 +2049,7 @@ static const struct ptp_clock_info idtcm_caps_deprecated = { .n_per_out = 12, .n_ext_ts = MAX_TOD, .n_pins = MAX_REF_CLK, + .supported_extts_flags = PTP_RISING_EDGE | PTP_STRICT_FLAGS, .adjphase = &idtcm_adjphase, .getmaxphase = &idtcm_getmaxphase, .adjfine = &idtcm_adjfine, diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index cfced36c70bc..70002500170e 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -592,6 +592,7 @@ static const struct ptp_clock_info idtfc3_caps = { .max_adj = MAX_FFO_PPB, .n_per_out = 1, .n_ext_ts = 1, + .supported_extts_flags = PTP_STRICT_FLAGS | PTP_EXT_OFFSET, .adjphase = &idtfc3_adjphase, .adjfine = &idtfc3_adjfine, .adjtime = &idtfc3_adjtime, diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c index b2fd94d4f863..f01c50dfa44e 100644 --- a/drivers/ptp/ptp_idt82p33.c +++ b/drivers/ptp/ptp_idt82p33.c @@ -246,18 +246,6 @@ static int idt82p33_extts_enable(struct idt82p33_channel *channel, idt82p33 = channel->idt82p33; old_mask = idt82p33->extts_mask; - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - - /* Reject requests to enable time stamping on falling edge */ - if ((rq->extts.flags & PTP_ENABLE_FEATURE) && - (rq->extts.flags & PTP_FALLING_EDGE)) - return -EOPNOTSUPP; - if (index >= MAX_PHC_PLL) return -EINVAL; @@ -1187,6 +1175,9 @@ static void idt82p33_caps_init(u32 index, struct ptp_clock_info *caps, caps->pin_config = pin_cfg; + caps->supported_extts_flags = PTP_RISING_EDGE | + PTP_STRICT_FLAGS; + for (i = 0; i < max_pins; ++i) { ppd = &pin_cfg[i]; diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 2ccdca4f6960..c273855fbb18 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2372,7 +2372,7 @@ ptp_ocp_attr_group_add(struct ptp_ocp *bp, if (attr_tbl[i].cap & bp->fw_cap) count++; - bp->attr_group = kcalloc(count + 1, sizeof(struct attribute_group *), + bp->attr_group = kcalloc(count + 1, sizeof(*bp->attr_group), GFP_KERNEL); if (!bp->attr_group) return -ENOMEM; diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 60ed70a39d2c..b7f15f303ea2 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -611,7 +611,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) ism->dev.parent = &pdev->dev; ism->dev.release = ism_dev_release; device_initialize(&ism->dev); - dev_set_name(&ism->dev, dev_name(&pdev->dev)); + dev_set_name(&ism->dev, "%s", dev_name(&pdev->dev)); ret = device_add(&ism->dev); if (ret) goto err_dev; diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index 897cb8db5084..f9426a586653 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -148,8 +148,8 @@ static int ssb_gpio_irq_chipco_domain_init(struct ssb_bus *bus) if (bus->bustype != SSB_BUSTYPE_SSB) return 0; - bus->irq_domain = irq_domain_add_linear(NULL, chip->ngpio, - &irq_domain_simple_ops, chipco); + bus->irq_domain = irq_domain_create_linear(NULL, chip->ngpio, &irq_domain_simple_ops, + chipco); if (!bus->irq_domain) { err = -ENODEV; goto err_irq_domain; @@ -347,8 +347,8 @@ static int ssb_gpio_irq_extif_domain_init(struct ssb_bus *bus) if (bus->bustype != SSB_BUSTYPE_SSB) return 0; - bus->irq_domain = irq_domain_add_linear(NULL, chip->ngpio, - &irq_domain_simple_ops, extif); + bus->irq_domain = irq_domain_create_linear(NULL, chip->ngpio, &irq_domain_simple_ops, + extif); if (!bus->irq_domain) { err = -ENODEV; goto err_irq_domain; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index b9b9e9d40951..7cbfc7d718b3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -755,10 +755,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int err; int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); + bool busyloop_intr; do { - bool busyloop_intr = false; - + busyloop_intr = false; if (nvq->done_idx == VHOST_NET_BATCH) vhost_tx_batch(net, nvq, sock, &msg); @@ -769,13 +769,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(busyloop_intr)) { - vhost_poll_queue(&vq->poll); - } else if (unlikely(vhost_enable_notify(&net->dev, - vq))) { - vhost_disable_notify(&net->dev, vq); - continue; - } + /* Kicks are disabled at this point, break loop and + * process any remaining batched packets. Queue will + * be re-enabled afterwards. + */ break; } @@ -825,7 +822,22 @@ done: ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); + /* Kicks are still disabled, dispatch any remaining batched msgs. */ vhost_tx_batch(net, nvq, sock, &msg); + + if (unlikely(busyloop_intr)) + /* If interrupted while doing busy polling, requeue the + * handler to be fair handle_rx as well as other tasks + * waiting on cpu. + */ + vhost_poll_queue(&vq->poll); + else + /* All of our work has been completed; however, before + * leaving the TX handler, do one last check for work, + * and requeue handler if necessary. If there is no work, + * queue will be reenabled. + */ + vhost_net_busy_poll_try_queue(net, vq); } static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index fc8ba9142f2f..682bd8ec2c10 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -5,6 +5,7 @@ config AFS_FS select AF_RXRPC select DNS_RESOLVER select NETFS_SUPPORT + select CRYPTO_KRB5 help If you say Y here, you will get an experimental Andrew File System driver. It currently only supports unsecured read-only AFS access. diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 5efd7e13b304..b49b8fe682f3 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -8,6 +8,7 @@ kafs-y := \ addr_prefs.o \ callback.o \ cell.o \ + cm_security.o \ cmservice.o \ dir.o \ dir_edit.o \ diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c new file mode 100644 index 000000000000..edcbd249d202 --- /dev/null +++ b/fs/afs/cm_security.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Cache manager security. + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <crypto/krb5.h> +#include "internal.h" +#include "afs_cm.h" +#include "afs_fs.h" +#include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> + +#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c +#define xdr_round_up(x) (round_up((x), sizeof(__be32))) +#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32))) + +#ifdef CONFIG_RXGK +static int afs_create_yfs_cm_token(struct sk_buff *challenge, + struct afs_server *server); +#endif + +/* + * Respond to an RxGK challenge, adding appdata. + */ +static int afs_respond_to_challenge(struct sk_buff *challenge) +{ +#ifdef CONFIG_RXGK + struct krb5_buffer appdata = {}; + struct afs_server *server; +#endif + struct rxrpc_peer *peer; + unsigned long peer_data; + u16 service_id; + u8 security_index; + + rxrpc_kernel_query_challenge(challenge, &peer, &peer_data, + &service_id, &security_index); + + _enter("%u,%u", service_id, security_index); + + switch (service_id) { + /* We don't send CM_SERVICE RPCs, so don't expect a challenge + * therefrom. + */ + case FS_SERVICE: + case VL_SERVICE: + case YFS_FS_SERVICE: + case YFS_VL_SERVICE: + break; + default: + pr_warn("Can't respond to unknown challenge %u:%u", + service_id, security_index); + return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO, + afs_abort_unsupported_sec_class); + } + + switch (security_index) { +#ifdef CONFIG_RXKAD + case RXRPC_SECURITY_RXKAD: + return rxkad_kernel_respond_to_challenge(challenge); +#endif + +#ifdef CONFIG_RXGK + case RXRPC_SECURITY_RXGK: + return rxgk_kernel_respond_to_challenge(challenge, &appdata); + + case RXRPC_SECURITY_YFS_RXGK: + switch (service_id) { + case FS_SERVICE: + case YFS_FS_SERVICE: + server = (struct afs_server *)peer_data; + if (!server->cm_rxgk_appdata.data) { + mutex_lock(&server->cm_token_lock); + if (!server->cm_rxgk_appdata.data) + afs_create_yfs_cm_token(challenge, server); + mutex_unlock(&server->cm_token_lock); + } + if (server->cm_rxgk_appdata.data) + appdata = server->cm_rxgk_appdata; + break; + } + return rxgk_kernel_respond_to_challenge(challenge, &appdata); +#endif + + default: + return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO, + afs_abort_unsupported_sec_class); + } +} + +/* + * Process the OOB message queue, processing challenge packets. + */ +void afs_process_oob_queue(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, rx_oob_work); + struct sk_buff *oob; + enum rxrpc_oob_type type; + + while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) { + switch (type) { + case RXRPC_OOB_CHALLENGE: + afs_respond_to_challenge(oob); + break; + } + rxrpc_kernel_free_oob(oob); + } +} + +#ifdef CONFIG_RXGK +/* + * Create a securities keyring for the cache manager and attach a key to it for + * the RxGK tokens we want to use to secure the callback connection back from + * the fileserver. + */ +int afs_create_token_key(struct afs_net *net, struct socket *socket) +{ + const struct krb5_enctype *krb5; + struct key *ring; + key_ref_t key; + char K0[32], *desc; + int ret; + + ring = keyring_alloc("kafs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + KEY_POS_SEARCH | KEY_POS_WRITE | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(ring)) + return PTR_ERR(ring); + + ret = rxrpc_sock_set_security_keyring(socket->sk, ring); + if (ret < 0) + goto out; + + ret = -ENOPKG; + krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96); + if (!krb5) + goto out; + + if (WARN_ON_ONCE(krb5->key_len > sizeof(K0))) + goto out; + + ret = -ENOMEM; + desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u", + YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype); + if (!desc) + goto out; + + wait_for_random_bytes(); + get_random_bytes(K0, krb5->key_len); + + key = key_create(make_key_ref(ring, true), + "rxrpc_s", desc, + K0, krb5->key_len, + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA); + kfree(desc); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto out; + } + + net->fs_cm_token_key = key_ref_to_ptr(key); + ret = 0; +out: + key_put(ring); + return ret; +} + +/* + * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver. + */ +static int afs_create_yfs_cm_token(struct sk_buff *challenge, + struct afs_server *server) +{ + const struct krb5_enctype *conn_krb5, *token_krb5; + const struct krb5_buffer *token_key; + struct crypto_aead *aead; + struct scatterlist sg; + struct afs_net *net = server->cell->net; + const struct key *key = net->fs_cm_token_key; + size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset; + __be32 caps[1] = { + [0] = htonl(AFS_CAP_ERROR_TRANSLATION), + }; + __be32 *xdr; + void *appdata, *K0, *encbase; + u32 enctype; + int ret; + + if (!key) + return -ENOKEY; + + /* Assume that the fileserver is happy to use the same encoding type as + * we were told to use by the token obtained by the user. + */ + enctype = rxgk_kernel_query_challenge(challenge); + + conn_krb5 = crypto_krb5_find_enctype(enctype); + if (!conn_krb5) + return -ENOPKG; + token_krb5 = key->payload.data[0]; + token_key = (const struct krb5_buffer *)&key->payload.data[2]; + + /* struct rxgk_key { + * afs_uint32 enctype; + * opaque key<>; + * }; + */ + keysize = 4 + xdr_len_object(conn_krb5->key_len); + + /* struct RXGK_AuthName { + * afs_int32 kind; + * opaque data<AUTHDATAMAX>; + * opaque display<AUTHPRINTABLEMAX>; + * }; + */ + uuidsize = sizeof(server->uuid); + authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0); + + /* struct RXGK_Token { + * rxgk_key K0; + * RXGK_Level level; + * rxgkTime starttime; + * afs_int32 lifetime; + * afs_int32 bytelife; + * rxgkTime expirationtime; + * struct RXGK_AuthName identities<>; + * }; + */ + toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize); + + offset = 0; + encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset); + + /* struct RXGK_TokenContainer { + * afs_int32 kvno; + * afs_int32 enctype; + * opaque encrypted_token<>; + * }; + */ + contsize = 4 + 4 + xdr_len_object(encsize); + + /* struct YFSAppData { + * opr_uuid initiatorUuid; + * opr_uuid acceptorUuid; + * Capabilities caps; + * afs_int32 enctype; + * opaque callbackKey<>; + * opaque callbackToken<>; + * }; + */ + adatasize = 16 + 16 + + xdr_len_object(sizeof(caps)) + + 4 + + xdr_len_object(conn_krb5->key_len) + + xdr_len_object(contsize); + + ret = -ENOMEM; + appdata = kzalloc(adatasize, GFP_KERNEL); + if (!appdata) + goto out; + xdr = appdata; + + memcpy(xdr, &net->uuid, 16); /* appdata.initiatorUuid */ + xdr += 16 / 4; + memcpy(xdr, &server->uuid, 16); /* appdata.acceptorUuid */ + xdr += 16 / 4; + *xdr++ = htonl(ARRAY_SIZE(caps)); /* appdata.caps.len */ + memcpy(xdr, &caps, sizeof(caps)); /* appdata.caps */ + xdr += ARRAY_SIZE(caps); + *xdr++ = htonl(conn_krb5->etype); /* appdata.enctype */ + + *xdr++ = htonl(conn_krb5->key_len); /* appdata.callbackKey.len */ + K0 = xdr; + get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */ + xdr += xdr_round_up(conn_krb5->key_len) / 4; + + *xdr++ = htonl(contsize); /* appdata.callbackToken.len */ + *xdr++ = htonl(1); /* cont.kvno */ + *xdr++ = htonl(token_krb5->etype); /* cont.enctype */ + *xdr++ = htonl(encsize); /* cont.encrypted_token.len */ + + encbase = xdr; + xdr += offset / 4; + *xdr++ = htonl(conn_krb5->etype); /* token.K0.enctype */ + *xdr++ = htonl(conn_krb5->key_len); /* token.K0.key.len */ + memcpy(xdr, K0, conn_krb5->key_len); /* token.K0.key.data */ + xdr += xdr_round_up(conn_krb5->key_len) / 4; + + *xdr++ = htonl(RXRPC_SECURITY_ENCRYPT); /* token.level */ + *xdr++ = htonl(0); /* token.starttime */ + *xdr++ = htonl(0); /* " */ + *xdr++ = htonl(0); /* token.lifetime */ + *xdr++ = htonl(0); /* token.bytelife */ + *xdr++ = htonl(0); /* token.expirationtime */ + *xdr++ = htonl(0); /* " */ + *xdr++ = htonl(1); /* token.identities.count */ + *xdr++ = htonl(0); /* token.identities[0].kind */ + *xdr++ = htonl(uuidsize); /* token.identities[0].data.len */ + memcpy(xdr, &server->uuid, uuidsize); + xdr += xdr_round_up(uuidsize) / 4; + *xdr++ = htonl(0); /* token.identities[0].display.len */ + + xdr = encbase + xdr_round_up(encsize); + + if ((unsigned long)xdr - (unsigned long)appdata != adatasize) + pr_err("Appdata size incorrect %lx != %zx\n", + (unsigned long)xdr - (unsigned long)appdata, adatasize); + + aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN, + GFP_KERNEL); + if (IS_ERR(aead)) { + ret = PTR_ERR(aead); + goto out_token; + } + + sg_init_one(&sg, encbase, encsize); + ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false); + if (ret < 0) + goto out_aead; + + server->cm_rxgk_appdata.len = adatasize; + server->cm_rxgk_appdata.data = appdata; + appdata = NULL; + +out_aead: + crypto_free_aead(aead); +out_token: + kfree(appdata); +out: + return ret; +} +#endif /* CONFIG_RXGK */ diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 440b0e731093..1124ea4000cb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -20,6 +20,7 @@ #include <linux/uuid.h> #include <linux/mm_types.h> #include <linux/dns_resolver.h> +#include <crypto/krb5.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -176,8 +177,10 @@ struct afs_call { bool intr; /* T if interruptible */ bool unmarshalling_error; /* T if an unmarshalling error occurred */ bool responded; /* Got a response from the call (may be abort) */ + u8 security_ix; /* Security class */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ + u32 enctype; /* Security encoding type */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ union { /* place to extract temporary data */ @@ -281,6 +284,7 @@ struct afs_net { struct socket *socket; struct afs_call *spare_incoming_call; struct work_struct charge_preallocation_work; + struct work_struct rx_oob_work; struct mutex socket_mutex; atomic_t nr_outstanding_calls; atomic_t nr_superblocks; @@ -305,6 +309,7 @@ struct afs_net { struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ struct hlist_head fs_proc; /* procfs servers list */ + struct key *fs_cm_token_key; /* Key for creating CM tokens */ struct work_struct fs_prober; struct timer_list fs_probe_timer; atomic_t servers_outstanding; @@ -540,6 +545,8 @@ struct afs_server { struct list_head volumes; /* RCU list of afs_server_entry objects */ struct work_struct destroyer; /* Work item to try and destroy a server */ struct timer_list timer; /* Management timer */ + struct mutex cm_token_lock; /* Lock governing creation of appdata */ + struct krb5_buffer cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */ time64_t unuse_time; /* Time at which last unused */ unsigned long flags; #define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ @@ -1059,6 +1066,19 @@ extern void __net_exit afs_cell_purge(struct afs_net *); extern bool afs_cm_incoming_call(struct afs_call *); /* + * cm_security.c + */ +void afs_process_oob_queue(struct work_struct *work); +#ifdef CONFIG_RXGK +int afs_create_token_key(struct afs_net *net, struct socket *socket); +#else +static inline int afs_create_token_key(struct afs_net *net, struct socket *socket) +{ + return 0; +} +#endif + +/* * dir.c */ extern const struct file_operations afs_dir_file_operations; diff --git a/fs/afs/main.c b/fs/afs/main.c index c845c5daaeba..02475d415d88 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -73,6 +73,7 @@ static int __net_init afs_net_init(struct net *net_ns) generate_random_uuid((unsigned char *)&net->uuid); INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); + INIT_WORK(&net->rx_oob_work, afs_process_oob_queue); mutex_init(&net->socket_mutex); net->cells = RB_ROOT; diff --git a/fs/afs/misc.c b/fs/afs/misc.c index b8180bf2281f..8f2b3a177690 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> +#include <crypto/krb5.h> #include "internal.h" #include "afs_fs.h" #include "protocol_uae.h" @@ -103,6 +104,32 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGK_INCONSISTENCY: return -EPROTO; + case RXGK_PACKETSHORT: return -EPROTO; + case RXGK_BADCHALLENGE: return -EPROTO; + case RXGK_SEALEDINCON: return -EKEYREJECTED; + case RXGK_NOTAUTH: return -EKEYREJECTED; + case RXGK_EXPIRED: return -EKEYEXPIRED; + case RXGK_BADLEVEL: return -EKEYREJECTED; + case RXGK_BADKEYNO: return -EKEYREJECTED; + case RXGK_NOTRXGK: return -EKEYREJECTED; + case RXGK_UNSUPPORTED: return -EKEYREJECTED; + case RXGK_GSSERROR: return -EKEYREJECTED; +#ifdef RXGK_BADETYPE + case RXGK_BADETYPE: return -ENOPKG; +#endif +#ifdef RXGK_BADTOKEN + case RXGK_BADTOKEN: return -EKEYREJECTED; +#endif +#ifdef RXGK_BADETYPE + case RXGK_DATALEN: return -EPROTO; +#endif +#ifdef RXGK_BADQOP + case RXGK_BADQOP: return -EKEYREJECTED; +#endif + + case KRB5_PROG_KEYTYPE_NOSUPP: return -ENOPKG; + case RXGEN_OPCODE: return -ENOTSUPP; default: return -EREMOTEIO; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index d5e480a33859..c1cadf8fb346 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -24,8 +24,17 @@ static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID); +static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob); static int afs_deliver_cm_op_id(struct afs_call *); +static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = { + .notify_new_call = afs_rx_new_call, + .discard_new_call = afs_rx_discard_new_call, + .user_attach_call = afs_rx_attach, + .notify_oob = afs_rx_notify_oob, +}; + /* asynchronous incoming call initial processing */ static const struct afs_call_type afs_RXCMxxxx = { .name = "CB.xxxx", @@ -49,6 +58,7 @@ int afs_open_socket(struct afs_net *net) goto error_1; socket->sk->sk_allocation = GFP_NOFS; + socket->sk->sk_user_data = net; /* bind the callback manager's address to make this a server socket */ memset(&srx, 0, sizeof(srx)); @@ -64,6 +74,14 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) goto error_2; + ret = rxrpc_sock_set_manage_response(socket->sk, true); + if (ret < 0) + goto error_2; + + ret = afs_create_token_key(net, socket); + if (ret < 0) + pr_err("Couldn't create RxGK CM key: %d\n", ret); + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; @@ -84,8 +102,7 @@ int afs_open_socket(struct afs_net *net) * it sends back to us. */ - rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, - afs_rx_discard_new_call); + rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops); ret = kernel_listen(socket, INT_MAX); if (ret < 0) @@ -125,7 +142,9 @@ void afs_close_socket(struct afs_net *net) kernel_sock_shutdown(net->socket, SHUT_RDWR); flush_workqueue(afs_async_calls); + net->socket->sk->sk_user_data = NULL; sock_release(net->socket); + key_put(net->fs_cm_token_key); _debug("dework"); _leave(""); @@ -738,7 +757,6 @@ void afs_charge_preallocation(struct work_struct *work) if (rxrpc_kernel_charge_accept(net->socket, afs_wake_up_async_call, - afs_rx_attach, (unsigned long)call, GFP_KERNEL, call->debug_id) < 0) @@ -800,10 +818,14 @@ static int afs_deliver_cm_op_id(struct afs_call *call) if (!afs_cm_incoming_call(call)) return -ENOTSUPP; + call->security_ix = rxrpc_kernel_query_call_security(call->rxcall, + &call->service_id, + &call->enctype); + trace_afs_cb_call(call); call->work.func = call->type->work; - /* pass responsibility for the remainer of this message off to the + /* pass responsibility for the remainder of this message off to the * cache manager op */ return call->type->deliver(call); } @@ -952,3 +974,13 @@ noinline int afs_protocol_error(struct afs_call *call, call->unmarshalling_error = true; return -EBADMSG; } + +/* + * Wake up OOB notification processing. + */ +static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob) +{ + struct afs_net *net = sk->sk_user_data; + + schedule_work(&net->rx_oob_work); +} diff --git a/fs/afs/server.c b/fs/afs/server.c index 8755f2703815..a97562f831eb 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -131,6 +131,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t * timer_setup(&server->timer, afs_server_timer, 0); INIT_LIST_HEAD(&server->volumes); init_waitqueue_head(&server->probe_wq); + mutex_init(&server->cm_token_lock); INIT_LIST_HEAD(&server->probe_link); INIT_HLIST_NODE(&server->proc_link); spin_lock_init(&server->probe_lock); @@ -396,6 +397,7 @@ static void afs_server_rcu(struct rcu_head *rcu) afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state), afs_estate_trace_put_server); afs_put_cell(server->cell, afs_cell_trace_put_server); + kfree(server->cm_rxgk_appdata.data); kfree(server); } diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h index 62d998e62f47..71dd38f59be1 100644 --- a/include/crypto/krb5.h +++ b/include/crypto/krb5.h @@ -64,6 +64,11 @@ struct scatterlist; #define KEY_USAGE_SEED_INTEGRITY (0x55) /* + * Standard Kerberos error codes. + */ +#define KRB5_PROG_KEYTYPE_NOSUPP -1765328233 + +/* * Mode of operation. */ enum krb5_crypto_mode { diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index 333c0f49a9cd..0ddbe197a261 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -9,6 +9,7 @@ #define _KEYS_RXRPC_TYPE_H #include <linux/key.h> +#include <crypto/krb5.h> /* * key type for AF_RXRPC keys @@ -32,6 +33,21 @@ struct rxkad_key { }; /* + * RxRPC key for YFS-RxGK (type-6 security) + */ +struct rxgk_key { + s64 begintime; /* Time at which the ticket starts */ + s64 endtime; /* Time at which the ticket ends */ + u64 lifetime; /* Maximum lifespan of a connection (seconds) */ + u64 bytelife; /* Maximum number of bytes on a connection */ + unsigned int enctype; /* Encoding type */ + s8 level; /* Negotiated security RXRPC_SECURITY_PLAIN/AUTH/ENCRYPT */ + struct krb5_buffer key; /* Master key, K0 */ + struct krb5_buffer ticket; /* Ticket to be passed to server */ + u8 _key[]; /* Key storage */ +}; + +/* * list of tokens attached to an rxrpc key */ struct rxrpc_key_token { @@ -40,6 +56,7 @@ struct rxrpc_key_token { struct rxrpc_key_token *next; /* the next token in the list */ union { struct rxkad_key *kad; + struct rxgk_key *rxgk; }; }; diff --git a/include/linux/btf.h b/include/linux/btf.h index ebc0c0c9b944..b2983706292f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -522,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf, const char *suffix); int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, u32 arg_no); +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off); struct bpf_verifier_log; diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 325af611909f..0b61b8b996d4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -2,79 +2,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H - -#include <linux/in.h> -#include <linux/interrupt.h> -#include <linux/ktime.h> -#include <linux/list.h> -#include <linux/uio.h> -#include <linux/workqueue.h> - -#include <net/inet_connection_sock.h> -#include <net/inet_sock.h> -#include <net/inet_timewait_sock.h> -#include <net/tcp_states.h> #include <uapi/linux/dccp.h> -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - /* - * States involved in closing a DCCP connection: - * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq. - * - * 2) CLOSING can have three different meanings (RFC 4340, 8.3): - * a. Client has performed active-close, has sent a Close to the server - * from state OPEN or PARTOPEN, and is waiting for the final Reset - * (in this case, SOCK_DONE == 1). - * b. Client is asked to perform passive-close, by receiving a CloseReq - * in (PART)OPEN state. It sends a Close and waits for final Reset - * (in this case, SOCK_DONE == 0). - * c. Server performs an active-close as in (a), keeps TIMEWAIT state. - * - * 3) The following intermediate states are employed to give passively - * closing nodes a chance to process their unread data: - * - PASSIVE_CLOSE (from OPEN => CLOSED) and - * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above). - */ - DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1, - DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */ - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, - DCCP_PARTOPEN = TCP_MAX_STATES, - DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ - DCCP_MAX_STATES -}; - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, - DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, - DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), -}; - -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb_transport_header(skb); -} - -static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) -{ - skb_push(skb, headlen); - skb_reset_transport_header(skb); - return memset(skb_transport_header(skb), 0, headlen); -} - static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); @@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); } -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return __dccp_basic_hdr_len(dh); -} - static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { __u64 seq_nr = ntohs(dh->dccph_seq); @@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) return seq_nr; } -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); - return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); -} - -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) { return __dccp_basic_hdr_len(dh) + dccp_packet_hdr_len(dh->dccph_type); } -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return __dccp_hdr_len(dccp_hdr(skb)); -} - -/** - * struct dccp_request_sock - represent DCCP-specific connection request - * @dreq_inet_rsk: structure inherited from - * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1) - * @dreq_gss: greatest sequence number sent (for retransmitted Responses) - * @dreq_isr: initial sequence number received in the first Request - * @dreq_gsr: greatest sequence number received (for retransmitted Request(s)) - * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection - * The following two fields are analogous to the ones in dccp_sock: - * @dreq_timestamp_echo: last received timestamp to echo (13.1) - * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo - */ -struct dccp_request_sock { - struct inet_request_sock dreq_inet_rsk; - __u64 dreq_iss; - __u64 dreq_gss; - __u64 dreq_isr; - __u64 dreq_gsr; - __be32 dreq_service; - spinlock_t dreq_lock; - struct list_head dreq_featneg; - __u32 dreq_timestamp_echo; - __u32 dreq_timestamp_time; -}; - -static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) -{ - return (struct dccp_request_sock *)req; -} - -extern struct inet_timewait_death_row dccp_death_row; - -extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - struct sk_buff *skb); - -struct dccp_options_received { - u64 dccpor_ndp:48; - u32 dccpor_timestamp; - u32 dccpor_timestamp_echo; - u32 dccpor_elapsed_time; -}; - -struct ccid; - -enum dccp_role { - DCCP_ROLE_UNDEFINED, - DCCP_ROLE_LISTEN, - DCCP_ROLE_CLIENT, - DCCP_ROLE_SERVER, -}; - -struct dccp_service_list { - __u32 dccpsl_nr; - __be32 dccpsl_list[]; -}; - -#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) -#define DCCP_SERVICE_CODE_IS_ABSENT 0 - -static inline bool dccp_list_has_service(const struct dccp_service_list *sl, - const __be32 service) -{ - if (likely(sl != NULL)) { - u32 i = sl->dccpsl_nr; - while (i--) - if (sl->dccpsl_list[i] == service) - return true; - } - return false; -} - -struct dccp_ackvec; - -/** - * struct dccp_sock - DCCP socket state - * - * @dccps_swl - sequence number window low - * @dccps_swh - sequence number window high - * @dccps_awl - acknowledgement number window low - * @dccps_awh - acknowledgement number window high - * @dccps_iss - initial sequence number sent - * @dccps_isr - initial sequence number received - * @dccps_osr - first OPEN sequence number received - * @dccps_gss - greatest sequence number sent - * @dccps_gsr - greatest valid sequence number received - * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss - * @dccps_service - first (passive sock) or unique (active sock) service code - * @dccps_service_list - second .. last service code on passive socket - * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option - * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo - * @dccps_l_ack_ratio - feature-local Ack Ratio - * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) - * @dccps_pcslen - sender partial checksum coverage (via sockopt) - * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) - * @dccps_ndp_count - number of Non Data Packets since last data packet - * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) - * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) - * @dccps_hc_rx_ackvec - rx half connection ack vector - * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) - * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) - * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue - * @dccps_role - role of this sock, one of %dccp_role - * @dccps_hc_rx_insert_options - receiver wants to add options when acking - * @dccps_hc_tx_insert_options - sender wants to add options when sending - * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) - * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) - */ -struct dccp_sock { - /* inet_connection_sock has to be the first member of dccp_sock */ - struct inet_connection_sock dccps_inet_connection; -#define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime - __u64 dccps_swl; - __u64 dccps_swh; - __u64 dccps_awl; - __u64 dccps_awh; - __u64 dccps_iss; - __u64 dccps_isr; - __u64 dccps_osr; - __u64 dccps_gss; - __u64 dccps_gsr; - __u64 dccps_gar; - __be32 dccps_service; - __u32 dccps_mss_cache; - struct dccp_service_list *dccps_service_list; - __u32 dccps_timestamp_echo; - __u32 dccps_timestamp_time; - __u16 dccps_l_ack_ratio; - __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; - __u64 dccps_ndp_count:48; - unsigned long dccps_rate_last; - struct list_head dccps_featneg; - struct dccp_ackvec *dccps_hc_rx_ackvec; - struct ccid *dccps_hc_rx_ccid; - struct ccid *dccps_hc_tx_ccid; - struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; - enum dccp_role dccps_role:2; - __u8 dccps_hc_rx_insert_options:1; - __u8 dccps_hc_tx_insert_options:1; - __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; - struct timer_list dccps_xmit_timer; -}; - -#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ - dccps_inet_connection.icsk_inet.sk) - -static inline const char *dccp_role(const struct sock *sk) -{ - switch (dccp_sk(sk)->dccps_role) { - case DCCP_ROLE_UNDEFINED: return "undefined"; - case DCCP_ROLE_LISTEN: return "listen"; - case DCCP_ROLE_SERVER: return "server"; - case DCCP_ROLE_CLIENT: return "client"; - } - return NULL; -} - -extern void dccp_syn_ack_timeout(const struct request_sock *req); - #endif /* _LINUX_DCCP_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 8210ece94fa6..117718c24814 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -17,9 +17,13 @@ #include <linux/compat.h> #include <linux/if_ether.h> #include <linux/netlink.h> +#include <linux/timer_types.h> #include <uapi/linux/ethtool.h> #include <uapi/linux/net_tstamp.h> +#define ETHTOOL_MM_MAX_VERIFY_TIME_MS 128 +#define ETHTOOL_MM_MAX_VERIFY_RETRIES 3 + struct compat_ethtool_rx_flow_spec { u32 flow_type; union ethtool_flow_union h_u; @@ -718,6 +722,75 @@ struct ethtool_mm_stats { u64 MACMergeHoldCount; }; +enum ethtool_mmsv_event { + ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET, +}; + +/* MAC Merge verification mPacket type */ +enum ethtool_mpacket { + ETHTOOL_MPACKET_VERIFY, + ETHTOOL_MPACKET_RESPONSE, +}; + +struct ethtool_mmsv; + +/** + * struct ethtool_mmsv_ops - Operations for MAC Merge Software Verification + * @configure_tx: Driver callback for the event where the preemptible TX + * becomes active or inactive. Preemptible traffic + * classes must be committed to hardware only while + * preemptible TX is active. + * @configure_pmac: Driver callback for the event where the pMAC state + * changes as result of an administrative setting + * (ethtool) or a call to ethtool_mmsv_link_state_handle(). + * @send_mpacket: Driver-provided method for sending a Verify or a Response + * mPacket. + */ +struct ethtool_mmsv_ops { + void (*configure_tx)(struct ethtool_mmsv *mmsv, bool tx_active); + void (*configure_pmac)(struct ethtool_mmsv *mmsv, bool pmac_enabled); + void (*send_mpacket)(struct ethtool_mmsv *mmsv, enum ethtool_mpacket mpacket); +}; + +/** + * struct ethtool_mmsv - MAC Merge Software Verification + * @ops: operations for MAC Merge Software Verification + * @dev: pointer to net_device structure + * @lock: serialize access to MAC Merge state between + * ethtool requests and link state updates. + * @status: current verification FSM state + * @verify_timer: timer for verification in local TX direction + * @verify_enabled: indicates if verification is enabled + * @verify_retries: number of retries for verification + * @pmac_enabled: indicates if the preemptible MAC is enabled + * @verify_time: time for verification in milliseconds + * @tx_enabled: indicates if transmission is enabled + */ +struct ethtool_mmsv { + const struct ethtool_mmsv_ops *ops; + struct net_device *dev; + spinlock_t lock; + enum ethtool_mm_verify_status status; + struct timer_list verify_timer; + bool verify_enabled; + int verify_retries; + bool pmac_enabled; + u32 verify_time; + bool tx_enabled; +}; + +void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv); +void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up); +void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv, + enum ethtool_mmsv_event event); +void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv, + struct ethtool_mm_state *state); +void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg); +void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev, + const struct ethtool_mmsv_ops *ops); + /** * struct ethtool_rxfh_param - RXFH (RSS) parameters * @hfunc: Defines the current RSS hash function used by HW (or to be set to). @@ -926,10 +999,11 @@ struct kernel_ethtool_ts_info { * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to - * ethtool_op_get_ts_info(). Drivers must not zero statistics which they - * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET - * indicating driver does not report statistics. - * @get_ts_stats: Query the device hardware timestamping statistics. + * ethtool_op_get_ts_info(). + * @get_ts_stats: Query the device hardware timestamping statistics. Drivers + * must not zero statistics which they don't report. The stats structure + * is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not + * report statistics. * @get_module_info: Get the size and type of the eeprom contained within * a plug-in module. * @get_module_eeprom: Get the eeprom information from the plug-in module @@ -1329,6 +1403,17 @@ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); */ extern void ethtool_puts(u8 **data, const char *str); +/** + * ethtool_cpy - Write possibly-not-NUL-terminated string to ethtool string data + * @data: Pointer to a pointer to the start of string to write into + * @str: NUL-byte padded char array of size ETH_GSTRING_LEN to copy from + */ +#define ethtool_cpy(data, str) do { \ + BUILD_BUG_ON(sizeof(str) != ETH_GSTRING_LEN); \ + memcpy(*(data), str, ETH_GSTRING_LEN); \ + *(data) += ETH_GSTRING_LEN; \ +} while (0) + /* Link mode to forced speed capabilities maps */ struct ethtool_forced_speed_map { u32 speed; diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h new file mode 100644 index 000000000000..916dc4fe7de3 --- /dev/null +++ b/include/linux/fsl/ntmp.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2025 NXP */ +#ifndef __NETC_NTMP_H +#define __NETC_NTMP_H + +#include <linux/bitops.h> +#include <linux/if_ether.h> + +struct maft_keye_data { + u8 mac_addr[ETH_ALEN]; + __le16 resv; +}; + +struct maft_cfge_data { + __le16 si_bitmap; + __le16 resv; +}; + +struct netc_cbdr_regs { + void __iomem *pir; + void __iomem *cir; + void __iomem *mr; + + void __iomem *bar0; + void __iomem *bar1; + void __iomem *lenr; +}; + +struct netc_tbl_vers { + u8 maft_ver; + u8 rsst_ver; +}; + +struct netc_cbdr { + struct device *dev; + struct netc_cbdr_regs regs; + + int bd_num; + int next_to_use; + int next_to_clean; + + int dma_size; + void *addr_base; + void *addr_base_align; + dma_addr_t dma_base; + dma_addr_t dma_base_align; + + /* Serialize the order of command BD ring */ + spinlock_t ring_lock; +}; + +struct ntmp_user { + int cbdr_num; /* number of control BD ring */ + struct device *dev; + struct netc_cbdr *ring; + struct netc_tbl_vers tbl; +}; + +struct maft_entry_data { + struct maft_keye_data keye; + struct maft_cfge_data cfge; +}; + +#if IS_ENABLED(CONFIG_NXP_NETC_LIB) +int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs); +void ntmp_free_cbdr(struct netc_cbdr *cbdr); + +/* NTMP APIs */ +int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id); +int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table, + int count); +int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count); +#else +static inline int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs) +{ + return 0; +} + +static inline void ntmp_free_cbdr(struct netc_cbdr *cbdr) +{ +} + +static inline int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id) +{ + return 0; +} + +static inline int ntmp_rsst_update_entry(struct ntmp_user *user, + const u32 *table, int count) +{ + return 0; +} + +static inline int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 457b4fba88bd..17f917cb4540 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4087,6 +4087,9 @@ enum ieee80211_tdls_actioncode { /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) +/* Enable Beacon Protection */ +#define WLAN_EXT_CAPA11_BCN_PROTECT BIT(4) + /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 @@ -5615,6 +5618,80 @@ static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) return len >= fixed + elem_len; } +/** + * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay + * in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Padding delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR + * Padding Delay subfield. + */ + u32 pad_delay = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_PADDING_DELAY); + + if (!pad_delay || + pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US) + return 0; + + return 32 * (1 << (pad_delay - 1)); +} + +/** + * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition + * delay in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR + * Transition Delay subfield. + */ + u32 trans_delay = + u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY); + + /* invalid values also just use 0 */ + if (!trans_delay || + trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US) + return 0; + + return 16 * (1 << (trans_delay - 1)); +} + +/** + * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition + * timeout value in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition timeout (in microseconds) encoded in + * the EML Capabilities field + */ + +static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the + * Transition Timeout subfield. + */ + u8 timeout = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_TRANSITION_TIMEOUT); + + /* invalid values also just use 0 */ + if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU) + return 0; + + return 128 * (1 << (timeout - 1)); +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/mm.h b/include/linux/mm.h index bf55206935c4..8dc012e84033 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4265,4 +4265,62 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h deleted file mode 100644 index 13274c3def66..000000000000 --- a/include/linux/net/intel/iidc.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021, Intel Corporation. */ - -#ifndef _IIDC_H_ -#define _IIDC_H_ - -#include <linux/auxiliary_bus.h> -#include <linux/dcbnl.h> -#include <linux/device.h> -#include <linux/if_ether.h> -#include <linux/kernel.h> -#include <linux/netdevice.h> - -enum iidc_event_type { - IIDC_EVENT_BEFORE_MTU_CHANGE, - IIDC_EVENT_AFTER_MTU_CHANGE, - IIDC_EVENT_BEFORE_TC_CHANGE, - IIDC_EVENT_AFTER_TC_CHANGE, - IIDC_EVENT_CRIT_ERR, - IIDC_EVENT_NBITS /* must be last */ -}; - -enum iidc_reset_type { - IIDC_PFR, - IIDC_CORER, - IIDC_GLOBR, -}; - -enum iidc_rdma_protocol { - IIDC_RDMA_PROTOCOL_IWARP = BIT(0), - IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), -}; - -#define IIDC_MAX_USER_PRIORITY 8 -#define IIDC_MAX_DSCP_MAPPING 64 -#define IIDC_DSCP_PFC_MODE 0x1 - -/* Struct to hold per RDMA Qset info */ -struct iidc_rdma_qset_params { - /* Qset TEID returned to the RDMA driver in - * ice_add_rdma_qset and used by RDMA driver - * for calls to ice_del_rdma_qset - */ - u32 teid; /* Qset TEID */ - u16 qs_handle; /* RDMA driver provides this */ - u16 vport_id; /* VSI index */ - u8 tc; /* TC branch the Qset should belong to */ -}; - -struct iidc_qos_info { - u64 tc_ctx; - u8 rel_bw; - u8 prio_type; - u8 egress_virt_up; - u8 ingress_virt_up; -}; - -/* Struct to pass QoS info */ -struct iidc_qos_params { - struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; - u8 up2tc[IIDC_MAX_USER_PRIORITY]; - u8 vport_relative_bw; - u8 vport_priority_type; - u8 num_tc; - u8 pfc_mode; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; -}; - -struct iidc_event { - DECLARE_BITMAP(type, IIDC_EVENT_NBITS); - u32 reg; -}; - -struct ice_pf; - -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); -int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); -void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); - -/* Structure representing auxiliary driver tailored information about the core - * PCI dev, each auxiliary driver using the IIDC interface will have an - * instance of this struct dedicated to it. - */ - -struct iidc_auxiliary_dev { - struct auxiliary_device adev; - struct ice_pf *pf; -}; - -/* structure representing the auxiliary driver. This struct is to be - * allocated and populated by the auxiliary driver's owner. The core PCI - * driver will access these ops by performing a container_of on the - * auxiliary_device->dev.driver. - */ -struct iidc_auxiliary_drv { - struct auxiliary_driver adrv; - /* This event_handler is meant to be a blocking call. For instance, - * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not - * return until the auxiliary driver is ready for the MTU change to - * happen. - */ - void (*event_handler)(struct ice_pf *pf, struct iidc_event *event); -}; - -#endif /* _IIDC_H_*/ diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h new file mode 100644 index 000000000000..8baad1082042 --- /dev/null +++ b/include/linux/net/intel/iidc_rdma.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2025, Intel Corporation. */ + +#ifndef _IIDC_RDMA_H_ +#define _IIDC_RDMA_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/device.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <net/dscp.h> + +enum iidc_rdma_event_type { + IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE, + IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, + IIDC_RDMA_EVENT_BEFORE_TC_CHANGE, + IIDC_RDMA_EVENT_AFTER_TC_CHANGE, + IIDC_RDMA_EVENT_WARN_RESET, + IIDC_RDMA_EVENT_CRIT_ERR, + IIDC_RDMA_EVENT_NBITS /* must be last */ +}; + +struct iidc_rdma_event { + DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS); + u32 reg; +}; + +enum iidc_rdma_reset_type { + IIDC_FUNC_RESET, + IIDC_DEV_RESET, +}; + +enum iidc_rdma_protocol { + IIDC_RDMA_PROTOCOL_IWARP = BIT(0), + IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), +}; + +/* Structure to be populated by core LAN PCI driver */ +struct iidc_rdma_core_dev_info { + struct pci_dev *pdev; /* PCI device of corresponding to main function */ + struct auxiliary_device *adev; + /* Current active RDMA protocol */ + enum iidc_rdma_protocol rdma_protocol; + void *iidc_priv; /* elements unique to each driver */ +}; + +/* Structure representing auxiliary driver tailored information about the core + * PCI dev, each auxiliary driver using the IIDC interface will have an + * instance of this struct dedicated to it. + */ +struct iidc_rdma_core_auxiliary_dev { + struct auxiliary_device adev; + struct iidc_rdma_core_dev_info *cdev_info; +}; + +/* structure representing the auxiliary driver. This struct is to be + * allocated and populated by the auxiliary driver's owner. The core PCI + * driver will access these ops by performing a container_of on the + * auxiliary_device->dev.driver. + */ +struct iidc_rdma_core_auxiliary_drv { + struct auxiliary_driver adrv; + void (*event_handler)(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_event *event); +}; + +#endif /* _IIDC_RDMA_H_*/ diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h new file mode 100644 index 000000000000..b40eed0e13fe --- /dev/null +++ b/include/linux/net/intel/iidc_rdma_ice.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2025, Intel Corporation. */ + +#ifndef _IIDC_RDMA_ICE_H_ +#define _IIDC_RDMA_ICE_H_ + +#include <linux/dcbnl.h> + +#define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_DSCP_PFC_MODE 0x1 + +/** + * struct iidc_rdma_qset_params - Struct to hold per RDMA Qset info + * @teid: TEID of the Qset node + * @qs_handle: SW index of the Qset, RDMA provides this + * @vport_id: VSI index + * @tc: Traffic Class branch the QSet should belong to + */ +struct iidc_rdma_qset_params { + /* Qset TEID returned to the RDMA driver in + * ice_add_rdma_qset and used by RDMA driver + * for calls to ice_del_rdma_qset + */ + u32 teid; + u16 qs_handle; + u16 vport_id; + u8 tc; +}; + +struct iidc_rdma_qos_info { + u64 tc_ctx; + u8 rel_bw; + u8 prio_type; + u8 egress_virt_up; + u8 ingress_virt_up; +}; + +/* Struct to pass QoS info */ +struct iidc_rdma_qos_params { + struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; + u8 up2tc[IIDC_MAX_USER_PRIORITY]; + u8 vport_relative_bw; + u8 vport_priority_type; + u8 num_tc; + u8 pfc_mode; + u8 dscp_map[DSCP_MAX]; +}; + +struct iidc_rdma_priv_dev_info { + u8 pf_id; + u16 vport_id; + struct net_device *netdev; + struct iidc_rdma_qos_params qos_info; + u8 __iomem *hw_addr; +}; + +int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev, + enum iidc_rdma_reset_type reset_type); +int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, u16 vsi_id, + bool enable); +int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); +void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); + +#endif /* _IIDC_RDMA_ICE_H_*/ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ea022750e4e..73a97cf1bbce 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -688,6 +688,7 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS + /* "ops protected", see comment about net_device::lock */ struct xsk_buff_pool *pool; #endif @@ -1771,6 +1772,7 @@ enum netdev_reg_state { * @lltx: device supports lockless Tx. Deprecated for real HW * drivers. Mainly used by logical interfaces, such as * bonding and tunnels + * @netmem_tx: device support netmem_tx. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name @@ -1945,13 +1947,11 @@ enum netdev_reg_state { * * @reg_state: Register/unregister state machine * @dismantle: Device is going to be freed - * @rtnl_link_state: This enum represents the phases of creating - * a new link - * * @needs_free_netdev: Should unregister perform free_netdev? * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside + * protected by @lock * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type @@ -2088,6 +2088,7 @@ struct net_device { struct_group(priv_flags_fast, unsigned long priv_flags:32; unsigned long lltx:1; + unsigned long netmem_tx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; @@ -2359,10 +2360,10 @@ struct net_device { bool dismantle; - enum { - RTNL_LINK_INITIALIZED, - RTNL_LINK_INITIALIZING, - } rtnl_link_state:16; + /** @moving_ns: device is changing netns, protected by @lock */ + bool moving_ns; + /** @rtnl_link_initializing: Device being created, suppress events */ + bool rtnl_link_initializing; bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); @@ -2521,7 +2522,7 @@ struct net_device { * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: - * @up + * @up, @moving_ns, @nd_net, @xdp_features * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues @@ -3267,7 +3268,7 @@ int call_netdevice_notifiers_info(unsigned long val, #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ - for_each_netdev_rcu(&init_net, slave) \ + for_each_netdev_rcu(dev_net_rcu(bond), slave) \ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) @@ -3502,7 +3503,12 @@ struct softnet_data { }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); -DECLARE_PER_CPU(struct page_pool *, system_page_pool); + +struct page_pool_bh { + struct page_pool *pool; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) @@ -4689,9 +4695,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /* * txq->trans_start can be read locklessly from dev_watchdog() */ -static inline void txq_trans_update(struct netdev_queue *txq) +static inline void txq_trans_update(const struct net_device *dev, + struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (!dev->lltx) WRITE_ONCE(txq->trans_start, jiffies); } @@ -5212,7 +5219,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) - txq_trans_update(txq); + txq_trans_update(dev, txq); return rc; } diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 38325e070296..848735b3a7c0 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -8,6 +8,9 @@ struct netdev_xmit { #ifdef CONFIG_NET_EGRESS u8 skip_txqueue; #endif +#if IS_ENABLED(CONFIG_NET_ACT_MIRRED) + u8 sched_mirred_nest; +#endif }; #endif diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index ddd111f04ca0..40ff0ec2b879 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -4,7 +4,7 @@ #ifndef _PDS_CORE_ADMINQ_H_ #define _PDS_CORE_ADMINQ_H_ -#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256 +#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256000 /* usecs */ enum pds_core_adminq_flags { PDS_AQ_FLAG_FASTPOLL = BIT(1), /* completion poll at 1ms */ @@ -463,7 +463,6 @@ struct pds_core_lif_getattr_cmd { * @rsvd: Word boundary padding * @comp_index: Index in the descriptor ring for which this is the completion * @state: LIF state (enum pds_core_lif_state) - * @name: LIF name string, 0 terminated * @features: Features (enum pds_core_hw_features) * @rsvd2: Word boundary padding * @color: Color bit diff --git a/include/linux/phy.h b/include/linux/phy.h index a2bfae80c449..7c29d346d4b3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1753,56 +1753,13 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum, struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -#if IS_ENABLED(CONFIG_PHYLIB) int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); -struct phy_device *device_phy_find_device(struct device *dev); struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -#else -static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id) -{ - return 0; -} -static inline -struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode) -{ - return 0; -} - -static inline -struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) -{ - return NULL; -} - -static inline struct phy_device *device_phy_find_device(struct device *dev) -{ - return NULL; -} - -static inline -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) -{ - return NULL; -} - -static inline -struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) -{ - return NULL; -} - -static inline int phy_device_register(struct phy_device *phy) -{ - return 0; -} - -static inline void phy_device_free(struct phy_device *phydev) { } -#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); @@ -2046,6 +2003,9 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, enum ethtool_link_mode_bit_indices linkmode, u32 *val); +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); @@ -2102,6 +2062,7 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct netlink_ext_ack *extack); extern const struct bus_type mdio_bus_type; +extern const struct class mdio_bus_class; struct mdio_board_info { const char *bus_id; @@ -2110,17 +2071,8 @@ struct mdio_board_info { const void *platform_data; }; -#if IS_ENABLED(CONFIG_MDIO_DEVICE) int mdiobus_register_board_info(const struct mdio_board_info *info, unsigned int n); -#else -static inline int mdiobus_register_board_info(const struct mdio_board_info *i, - unsigned int n) -{ - return 0; -} -#endif - /** * phy_module_driver() - Helper macro for registering PHY drivers diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa8..8ca2235f78d5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d68d09bedd1..eced7e9bf69a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -68,6 +68,22 @@ struct ptp_system_timestamp { * @n_per_out: The number of programmable periodic signals. * @n_pins: The number of programmable pins. * @pps: Indicates whether the clock supports a PPS callback. + * + * @supported_perout_flags: The set of flags the driver supports for the + * PTP_PEROUT_REQUEST ioctl. The PTP core will + * reject a request with any flag not specified + * here. + * + * @supported_extts_flags: The set of flags the driver supports for the + * PTP_EXTTS_REQUEST ioctl. The PTP core will use + * this list to reject unsupported requests. + * PTP_ENABLE_FEATURE is assumed and does not need to + * be included. If PTP_STRICT_FLAGS is *not* set, + * then both PTP_RISING_EDGE and PTP_FALLING_EDGE + * will be assumed. Note that PTP_STRICT_FLAGS must + * be set if the drivers wants to honor + * PTP_EXTTS_REQUEST2 and any future flags. + * * @pin_config: Array of length 'n_pins'. If the number of * programmable pins is nonzero, then drivers must * allocate and initialize this array. @@ -174,6 +190,8 @@ struct ptp_clock_info { int n_per_out; int n_pins; int pps; + unsigned int supported_perout_flags; + unsigned int supported_extts_flags; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b974a277975a..c7397b17bb08 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -481,9 +481,6 @@ enum { /* generate software time stamp on packet tx completion */ SKBTX_COMPLETION_TSTAMP = 1 << 3, - /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 4, - /* determine hardware time stamp based on time or cycles */ SKBTX_HW_TSTAMP_NETDEV = 1 << 5, @@ -1710,13 +1707,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); + struct ubuf_info *uarg, bool devmem); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); +struct net_devmem_dmabuf_binding; + int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, - size_t length); + size_t length, + struct net_devmem_dmabuf_binding *binding); int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length); @@ -1724,12 +1724,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb, static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { - return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len); + return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len, + NULL); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, - struct ubuf_info *uarg); + struct ubuf_info *uarg, + struct net_devmem_dmabuf_binding *binding); /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) @@ -3700,6 +3702,10 @@ static inline dma_addr_t __skb_frag_dma_map(struct device *dev, size_t offset, size_t size, enum dma_data_direction dir) { + if (skb_frag_is_net_iov(frag)) { + return netmem_to_net_iov(frag->netmem)->dma_addr + offset + + frag->offset; + } return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } @@ -4105,8 +4111,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb); -struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, - struct sk_buff_head *queue, +struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); @@ -4146,6 +4151,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); +int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, + int offset, int len, int flags); int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 0f3c58007488..9e49372ef1a0 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -17,7 +17,7 @@ */ static inline void __skb_frag_ref(skb_frag_t *frag) { - get_page(skb_frag_page(frag)); + get_netmem(skb_frag_netmem(frag)); } /** @@ -40,7 +40,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle) if (recycle && napi_pp_put_page(netmem)) return; #endif - put_page(netmem_to_page(netmem)); + put_netmem(netmem); } /** diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index a476648858a6..d8949a4ed0dc 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -192,7 +192,7 @@ struct mtk_wed_device { }; struct mtk_wed_ops { - int (*attach)(struct mtk_wed_device *dev); + int (*attach)(struct mtk_wed_device *dev) __releases(RCU); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs, bool reset); int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, diff --git a/include/linux/socket.h b/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c4ec8bb8144e..26ddf95d23f9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -233,13 +233,14 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; + void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv, + unsigned long *interfaces); int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); - void (*speed_mode_2500)(struct net_device *ndev, void *priv); int (*mac_finish)(struct net_device *ndev, void *priv, unsigned int mode, @@ -276,7 +277,6 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned long eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1669d95bb0f9..a8af71623ba7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -385,7 +385,8 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + syn_fastopen_child:1; /* created TFO passive child socket */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h deleted file mode 100644 index a5acc768085d..000000000000 --- a/include/linux/tfrc.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _LINUX_TFRC_H_ -#define _LINUX_TFRC_H_ -/* - * TFRC - Data Structures for the TCP-Friendly Rate Control congestion - * control mechanism as specified in RFC 3448. - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - */ -#include <linux/types.h> - -/** tfrc_rx_info - TFRC Receiver Data Structure - * - * @tfrcrx_x_recv: receiver estimate of sending rate (3.2.2) - * @tfrcrx_rtt: round-trip-time (communicated by sender) - * @tfrcrx_p: current estimate of loss event rate (3.2.2) - */ -struct tfrc_rx_info { - __u32 tfrcrx_x_recv; - __u32 tfrcrx_rtt; - __u32 tfrcrx_p; -}; - -/** tfrc_tx_info - TFRC Sender Data Structure - * - * @tfrctx_x: computed transmit rate (4.3 (4)) - * @tfrctx_x_recv: receiver estimate of send rate (4.3) - * @tfrctx_x_calc: return value of throughput equation (3.1) - * @tfrctx_rtt: (moving average) estimate of RTT (4.3) - * @tfrctx_p: current loss event rate (5.4) - * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) - * @tfrctx_ipi: inter-packet interval (4.6) - * - * Note: X and X_recv are both maintained in units of 64 * bytes/second. This - * enables a finer resolution of sending rates and avoids problems with - * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits. - */ -struct tfrc_tx_info { - __u64 tfrctx_x; - __u64 tfrctx_x_recv; - __u32 tfrctx_x_calc; - __u32 tfrctx_rtt; - __u32 tfrctx_p; - __u32 tfrctx_rto; - __u32 tfrctx_ipi; -}; - -#endif /* _LINUX_TFRC_H_ */ diff --git a/include/linux/udp.h b/include/linux/udp.h index 0807e21cfec9..4e1a672af4c5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -101,6 +101,13 @@ struct udp_sock { /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; + + /* + * Accounting for the tunnel GRO fastpath. + * Unprotected by compilers guard, as it uses space available in + * the last UDP socket cacheline. + */ + struct hlist_node tunnel_list; }; #define udp_test_bit(nr, sk) \ @@ -209,6 +216,9 @@ static inline void udp_allow_gso(struct sock *sk) #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_from(__sk) \ + hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node) + #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) @@ -219,4 +229,13 @@ static inline void udp_allow_gso(struct sock *sk) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) +static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) +{ +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); +#else + return NULL; +#endif +} + #endif /* _LINUX_UDP_H */ diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index cf793d18e5df..0fb4c41c9bbf 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -16,6 +16,7 @@ struct sock; struct socket; struct rxrpc_call; struct rxrpc_peer; +struct krb5_buffer; enum rxrpc_abort_reason; enum rxrpc_interruptibility { @@ -24,23 +25,33 @@ enum rxrpc_interruptibility { RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */ }; +enum rxrpc_oob_type { + RXRPC_OOB_CHALLENGE, /* Security challenge for a connection */ +}; + /* * Debug ID counter for tracing. */ extern atomic_t rxrpc_debug_id; +/* + * Operations table for rxrpc to call out to a kernel application (e.g. kAFS). + */ +struct rxrpc_kernel_ops { + void (*notify_new_call)(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID); + void (*discard_new_call)(struct rxrpc_call *call, unsigned long user_call_ID); + void (*user_attach_call)(struct rxrpc_call *call, unsigned long user_call_ID); + void (*notify_oob)(struct sock *sk, struct sk_buff *oob); +}; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, - unsigned long); -typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); -void rxrpc_kernel_new_call_notification(struct socket *, - rxrpc_notify_new_call_t, - rxrpc_discard_new_call_t); +void rxrpc_kernel_set_notifications(struct socket *sock, + const struct rxrpc_kernel_ops *app_ops); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_peer *peer, struct key *key, @@ -72,16 +83,33 @@ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer); unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data); unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer); unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *); -int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, - rxrpc_user_attach_call_t, unsigned long, gfp_t, - unsigned int); +int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); -u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, - unsigned long); int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); int rxrpc_sock_set_security_keyring(struct sock *, struct key *); +int rxrpc_sock_set_manage_response(struct sock *sk, bool set); + +enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata); +struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock, + enum rxrpc_oob_type *_type); +void rxrpc_kernel_free_oob(struct sk_buff *oob); +void rxrpc_kernel_query_challenge(struct sk_buff *challenge, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata, + u16 *_service_id, u8 *_security_index); +int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code, + int error, enum rxrpc_abort_reason why); +int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge); +u32 rxgk_kernel_query_challenge(struct sk_buff *challenge); +int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge, + struct krb5_buffer *appdata); +u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call, + u16 *_service_id, u32 *_enctype); #endif /* _NET_RXRPC_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index efbd79c67be2..d1848dc8ec99 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1097,43 +1097,6 @@ int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef, int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef); /** - * ieee80211_chanwidth_rate_flags - return rate flags for channel width - * @width: the channel width of the channel - * - * In some channel types, not all rates may be used - for example CCK - * rates may not be used in 5/10 MHz channels. - * - * Returns: rate flags which apply for this channel width - */ -static inline enum ieee80211_rate_flags -ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return IEEE80211_RATE_SUPPORTS_5MHZ; - case NL80211_CHAN_WIDTH_10: - return IEEE80211_RATE_SUPPORTS_10MHZ; - default: - break; - } - return 0; -} - -/** - * ieee80211_chandef_rate_flags - returns rate flags for a channel - * @chandef: channel definition for the channel - * - * See ieee80211_chanwidth_rate_flags(). - * - * Returns: rate flags which apply for this channel - */ -static inline enum ieee80211_rate_flags -ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) -{ - return ieee80211_chanwidth_rate_flags(chandef->width); -} - -/** * ieee80211_chandef_max_power - maximum transmission power for the chandef * * In some regulations, the transmit power may depend on the configured channel @@ -1300,11 +1263,13 @@ struct cfg80211_crypto_settings { * struct cfg80211_mbssid_config - AP settings for multi bssid * * @tx_wdev: pointer to the transmitted interface in the MBSSID set + * @tx_link_id: link ID of the transmitted profile in an MLD. * @index: index of this AP in the multi bssid group. * @ema: set to true if the beacons should be sent out in EMA mode. */ struct cfg80211_mbssid_config { struct wireless_dev *tx_wdev; + u8 tx_link_id; u8 index; bool ema; }; @@ -1770,6 +1735,9 @@ struct cfg80211_ttlm_params { * @supported_oper_classes_len: number of supported operating classes * @support_p2p_ps: information if station supports P2P PS mechanism * @airtime_weight: airtime scheduler weight for this station + * @eml_cap_present: Specifies if EML capabilities field (@eml_cap) is + * present/updated + * @eml_cap: EML capabilities of this station * @link_sta_params: link related params. */ struct station_parameters { @@ -1794,6 +1762,8 @@ struct station_parameters { u8 supported_oper_classes_len; int support_p2p_ps; u16 airtime_weight; + bool eml_cap_present; + u16 eml_cap; struct link_station_parameters link_sta_params; }; diff --git a/include/net/devlink.h b/include/net/devlink.h index b8783126c1ed..0091f23a40f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -420,11 +420,11 @@ typedef u64 devlink_resource_occ_get_t(void *priv); #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { - DEVLINK_PARAM_TYPE_U8, - DEVLINK_PARAM_TYPE_U16, - DEVLINK_PARAM_TYPE_U32, - DEVLINK_PARAM_TYPE_STRING, - DEVLINK_PARAM_TYPE_BOOL, + DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8, + DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16, + DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING, + DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG, }; union devlink_param_value { diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e4fdc6b54cef..bea77934a235 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -40,6 +40,7 @@ FN(TCP_OFOMERGE) \ FN(TCP_RFC7323_PAWS) \ FN(TCP_RFC7323_PAWS_ACK) \ + FN(TCP_RFC7323_TW_PAWS) \ FN(TCP_RFC7323_TSECR) \ FN(TCP_LISTEN_OVERFLOW) \ FN(TCP_OLD_SEQUENCE) \ @@ -284,6 +285,12 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK, /** + * @SKB_DROP_REASON_TCP_RFC7323_TW_PAWS: PAWS check, socket is in + * TIME_WAIT state. + * Corresponds to LINUX_MIB_PAWS_TW_REJECTED. + */ + SKB_DROP_REASON_TCP_RFC7323_TW_PAWS, + /** * @SKB_DROP_REASON_TCP_RFC7323_TSECR: PAWS check, invalid TSEcr. * Corresponds to LINUX_MIB_TSECRREJECTED. */ diff --git a/include/net/dsa.h b/include/net/dsa.h index a0a9481c52c2..55e2d97f247e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1131,9 +1131,10 @@ struct dsa_switch_ops { * PTP functionality */ int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config); int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void (*port_txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, diff --git a/include/net/flow.h b/include/net/flow.h index 2a3f0c42f092..a1839c278d87 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -39,6 +39,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_L3MDEV_OIF 0x04 +#define FLOWI_FLAG_ANY_SPORT 0x08 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 949641e92539..4564b5d348b1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -175,14 +175,9 @@ struct inet_hashinfo { bool pernet; } ____cacheline_aligned_in_smp; -static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) +static inline struct inet_hashinfo *tcp_get_hashinfo(const struct sock *sk) { -#if IS_ENABLED(CONFIG_IP_DCCP) - return sk->sk_prot->h.hashinfo ? : - sock_net(sk)->ipv4.tcp_death_row.hashinfo; -#else return sock_net(sk)->ipv4.tcp_death_row.hashinfo; -#endif } static inline struct inet_listen_hashbucket * diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7c87873ae211..88b0dd4d8e09 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -198,6 +198,7 @@ struct fib6_info { fib6_destroying:1, unused:4; + struct list_head purge_link; struct rcu_head rcu; struct nexthop *nh; struct fib6_nh fib6_nh[]; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e3864b74e92a..48bb3cf41469 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -574,7 +574,8 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); -void fib_select_multipath(struct fib_result *res, int hash); +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a36a335cef9f..0c3d571a04a1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -377,10 +377,9 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); - -void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, - struct rtnl_link_ops *ops, - struct list_head *dev_to_kill); +void ip_tunnel_delete_net(struct net *net, unsigned int id, + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c498f685d01f..fdafc37d17cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -682,6 +682,9 @@ struct ieee80211_parsed_tpe { * responder functionality. * @ftmr_params: configurable lci/civic parameter when enabling FTM responder. * @nontransmitted: this BSS is a nontransmitted BSS profile + * @tx_bss_conf: Pointer to the BSS configuration of transmitting interface + * if MBSSID is enabled. This pointer is RCU-protected due to CSA finish + * and BSS color change flows accessing it. * @transmitter_bssid: the address of transmitter AP * @bssid_index: index inside the multiple BSSID set * @bssid_indicator: 2^bssid_indicator is the maximum number of APs in set @@ -804,6 +807,7 @@ struct ieee80211_bss_conf { struct ieee80211_ftm_responder_params *ftmr_params; /* Multiple BSSID data */ bool nontransmitted; + struct ieee80211_bss_conf __rcu *tx_bss_conf; u8 transmitter_bssid[ETH_ALEN]; u8 bssid_index; u8 bssid_indicator; @@ -2023,7 +2027,6 @@ enum ieee80211_neg_ttlm_res { * @txq: the multicast data TX queue * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. - * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -2052,8 +2055,6 @@ struct ieee80211_vif { bool probe_req_reg; bool rx_mcast_action_reg; - struct ieee80211_vif *mbssid_tx_vif; - /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -2488,6 +2489,7 @@ struct ieee80211_link_sta { * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. + * @eml_cap: EML capabilities of this MLO station * @cur: currently valid data as aggregated from the active links * For non MLO STA it will point to the deflink data. For MLO STA * ieee80211_sta_recalc_aggregates() must be called to update it. @@ -2522,6 +2524,7 @@ struct ieee80211_sta { bool mlo; bool spp_amsdu; u8 max_amsdu_subframes; + u16 eml_cap; struct ieee80211_sta_aggregates *cur; @@ -5347,22 +5350,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates); /** - * ieee80211_sta_set_expected_throughput - set the expected tpt for a station - * - * Call this function to notify mac80211 about a change in expected throughput - * to a station. A driver for a device that does rate control in firmware can - * call this function when the expected throughput estimate towards a station - * changes. The information is used to tune the CoDel AQM applied to traffic - * going towards that station (which can otherwise be too aggressive and cause - * slow stations to starve). - * - * @pubsta: the station to set throughput for. - * @thr: the current expected throughput in kbps. - */ -void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, - u32 thr); - -/** * ieee80211_tx_rate_update - transmit rate update callback * * Drivers should call this functions with a non-NULL pub sta diff --git a/include/net/mptcp.h b/include/net/mptcp.h index bfbad695951c..f7263fe2a2e4 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -101,18 +101,9 @@ struct mptcp_out_options { #define MPTCP_SCHED_MAX 128 #define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX) -#define MPTCP_SUBFLOWS_MAX 8 - -struct mptcp_sched_data { - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; -}; - struct mptcp_sched_ops { - int (*get_send)(struct mptcp_sock *msk, - struct mptcp_sched_data *data); - int (*get_retrans)(struct mptcp_sock *msk, - struct mptcp_sched_data *data); + int (*get_send)(struct mptcp_sock *msk); + int (*get_retrans)(struct mptcp_sock *msk); char name[MPTCP_SCHED_NAME_MAX]; struct module *owner; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd57d8fb54f1..025a7574b275 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -475,8 +475,8 @@ struct pernet_operations { void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); /* Following method is called with RTNL held. */ - void (*exit_batch_rtnl)(struct list_head *net_exit_list, - struct list_head *dev_kill_list); + void (*exit_rtnl)(struct net *net, + struct list_head *dev_kill_list); unsigned int * const id; const size_t size; }; diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..2a753813f849 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -48,6 +48,22 @@ static inline void netdev_unlock_ops(struct net_device *dev) netdev_unlock(dev); } +static inline void netdev_lock_ops_to_full(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_assert_locked(dev); + else + netdev_lock(dev); +} + +static inline void netdev_unlock_full_to_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_assert_locked(dev); + else + netdev_unlock(dev); +} + static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) @@ -64,6 +80,22 @@ netdev_ops_assert_locked_or_invisible(const struct net_device *dev) netdev_ops_assert_locked(dev); } +static inline void netdev_lock_ops_compat(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_lock(dev); + else + rtnl_lock(); +} + +static inline void netdev_unlock_ops_compat(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + rtnl_unlock(); +} + static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d01c82983b4d..069ff35a72de 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -85,9 +85,11 @@ struct netdev_queue_stats_tx { * for some of the events is not maintained, and reliable "total" cannot * be provided). * + * Ops are called under the instance lock if netdev_need_ops_lock() + * returns true, otherwise under rtnl_lock. * Device drivers can assume that when collecting total device stats, * the @get_base_stats and subsequent per-queue calls are performed - * "atomically" (without releasing the rtnl_lock). + * "atomically" (without releasing the relevant lock). * * Device drivers are encouraged to reset the per-queue statistics when * number of queues change. This is because the primary use case for diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index b2238b551dce..8cdcd138b33f 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -20,12 +20,12 @@ struct netdev_rx_queue { struct net_device *dev; netdevice_tracker dev_tracker; + /* All fields below are "ops protected", + * see comment about net_device::lock + */ #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif - /* NAPI instance for the queue - * "ops protected", see comment about net_device::lock - */ struct napi_struct *napi; struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; diff --git a/include/net/netlink.h b/include/net/netlink.h index 29e0db940382..90a560dc167a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -321,7 +321,13 @@ enum nla_policy_validation { * All other Unused - but note that it's a union * * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: + * NLA_U8, NLA_U16, + * NLA_U32, NLA_U64, + * NLA_S8, NLA_S16, + * NLA_S32, NLA_S64, + * NLA_MSECS, * NLA_BINARY Validation function called for the attribute. + * * All other Unused - but note that it's a union * * Example: @@ -612,6 +618,22 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh) } /** + * nlmsg_payload - message payload if the data fits in the len + * @nlh: netlink message header + * @len: struct length + * + * Returns: The netlink message payload/data if the length is sufficient, + * otherwise NULL. + */ +static inline void *nlmsg_payload(const struct nlmsghdr *nlh, size_t len) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(len)) + return NULL; + + return nlmsg_data(nlh); +} + +/** * nlmsg_attrdata - head of attributes data * @nlh: netlink message header * @hdrlen: length of family specific header diff --git a/include/net/netmem.h b/include/net/netmem.h index c61d5b21e7b4..386164fb9c18 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -8,6 +8,7 @@ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H +#include <linux/dma-mapping.h> #include <linux/mm.h> #include <net/net_debug.h> @@ -20,8 +21,17 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); */ #define NET_IOV 0x01UL +enum net_iov_type { + NET_IOV_DMABUF, + NET_IOV_IOURING, + + /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass. + */ + NET_IOV_MAX = ULONG_MAX +}; + struct net_iov { - unsigned long __unused_padding; + enum net_iov_type type; unsigned long pp_magic; struct page_pool *pp; struct net_iov_area *owner; @@ -264,4 +274,26 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) return __netmem_clear_lsb(netmem)->dma_addr; } +void get_netmem(netmem_ref netmem); +void put_netmem(netmem_ref netmem); + +#define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ + do { \ + if (!netmem_is_net_iov(NETMEM)) \ + dma_unmap_addr_set(PTR, ADDR_NAME, VAL); \ + else \ + dma_unmap_addr_set(PTR, ADDR_NAME, 0); \ + } while (0) + +static inline void netmem_dma_unmap_page_attrs(struct device *dev, + dma_addr_t addr, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + if (!addr) + return; + + dma_unmap_page_attrs(dev, addr, size, dir, attrs); +} + #endif /* _NET_NETMEM_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 650b2dc9199f..6373e3f17da8 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed { }; #endif +struct udp_tunnel_gro { + struct sock __rcu *sk; + struct hlist_head list; +}; + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -85,6 +90,11 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + /* Not in a pernet subsys because need to be available at GRO stage */ + struct udp_tunnel_gro udp_tunnel_gro[2]; +#endif + #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; @@ -277,4 +287,5 @@ struct netns_ipv4 { struct hlist_head *inet_addr_lst; struct delayed_work addr_chk_work; }; + #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5f2cfd84570a..47dc70d8100a 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -72,6 +72,7 @@ struct netns_ipv6 { struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; + spinlock_t fib_table_hash_lock; struct fib6_table *fib6_main_tbl; struct list_head fib6_walkers; rwlock_t fib6_walker_lock; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d9fb44e8b321..572e69cda476 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -152,6 +152,8 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; + bool dead; + spinlock_t lock; /* protect dead and f6i_list */ refcount_t refcnt; struct rcu_head rcu; diff --git a/include/net/p8022.h b/include/net/p8022.h deleted file mode 100644 index a29e224ac498..000000000000 --- a/include/net/p8022.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_P8022_H -#define _NET_P8022_H - -struct net_device; -struct packet_type; -struct sk_buff; - -struct datalink_proto * -register_8022_client(unsigned char type, - int (*func)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev)); -void unregister_8022_client(struct datalink_proto *proto); -#endif diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 582a3d00cbe2..93f2c31baf9b 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -395,6 +395,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +static inline void page_pool_recycle_direct_netmem(struct page_pool *pool, + netmem_ref netmem) +{ + page_pool_put_full_netmem(pool, netmem, true); +} + #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) @@ -492,4 +498,9 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +static inline bool page_pool_is_unreadable(struct page_pool *pool) +{ + return !!pool->mp_ops; +} + #endif /* _NET_PAGE_POOL_HELPERS_H */ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 36eb57d73abc..431b593de709 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include <linux/dma-direction.h> #include <linux/ptr_ring.h> #include <linux/types.h> +#include <linux/xarray.h> #include <net/netmem.h> #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -33,6 +34,9 @@ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) +/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + /* * Fast allocation side cache array/stack * @@ -221,6 +225,8 @@ struct page_pool { void *mp_priv; const struct memory_provider_ops *mp_ops; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/include/net/route.h b/include/net/route.h index c605fd5ec0c0..8e39aa822cf9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -326,6 +326,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport) + flow_flags |= FLOWI_FLAG_ANY_SPORT; + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); diff --git a/include/net/rps.h b/include/net/rps.h index e358e9711f27..507f4aa5d39b 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -57,9 +57,10 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - u32 mask; + struct rcu_head rcu; + u32 mask; - u32 ents[] ____cacheline_aligned_in_smp; + u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 69cb2e52b7da..979ac87b5d99 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -36,7 +36,7 @@ /** * enum sk_rst_reason - the reasons of socket reset * - * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols. + * The reasons of sk reset, which are used in TCP/MPTCP protocols. * * There are three parts in order: * 1) skb drop reasons: relying on drop reasons for such as passive reset diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c05fed05f2b..629368ab2787 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -803,6 +803,14 @@ static inline bool qdisc_tx_changing(const struct net_device *dev) return false; } +/* "noqueue" qdisc identified by not having any enqueue, see noqueue_init() */ +static inline bool qdisc_txq_has_no_queue(const struct netdev_queue *txq) +{ + struct Qdisc *qdisc = rcu_access_pointer(txq->qdisc); + + return qdisc->enqueue == NULL; +} + /* Is the device using the noop qdisc on all queues? */ static inline bool qdisc_tx_is_noop(const struct net_device *dev) { diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 64c42bd56bb2..3bfd261a53cc 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -161,7 +161,6 @@ const struct sctp_sm_table_entry *sctp_sm_lookup_event( enum sctp_event_type event_type, enum sctp_state state, union sctp_subtype event_subtype); -int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, gfp_t gfp); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dcd288fa1bb6..1ad7ce71d0a7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2152,8 +2152,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, const gfp_t gfp, const int peer_state); -void sctp_assoc_del_peer(struct sctp_association *asoc, - const union sctp_addr *addr); void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer); void sctp_assoc_control_transport(struct sctp_association *asoc, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 21e7fa2a1813..cddebafb9f77 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -16,9 +16,5 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport); u32 secure_tcpv6_ts_off(const struct net *net, const __be32 *saddr, const __be32 *daddr); -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport); #endif /* _NET_SECURE_SEQ */ diff --git a/include/net/sock.h b/include/net/sock.h index 694f954258d4..3e15d7105ad2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1781,7 +1781,6 @@ void sk_free(struct sock *sk); void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); -void sk_free_unlock_clone(struct sock *sk); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); @@ -1852,6 +1851,7 @@ struct sockcm_cookie { u32 tsflags; u32 ts_opt_id; u32 priority; + u32 dmabuf_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, @@ -2605,8 +2605,8 @@ struct sock_skb_cb { * using skb->cb[] would keep using it directly and utilize its * alignment guarantee. */ -#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ - sizeof(struct sock_skb_cb))) +#define SOCK_SKB_CB_OFFSET (sizeof_field(struct sk_buff, cb) - \ + sizeof(struct sock_skb_cb)) #define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ SOCK_SKB_CB_OFFSET)) @@ -2736,8 +2736,6 @@ static inline void _sock_tx_timestamp(struct sock *sk, *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } } - if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) - *tx_flags |= SKBTX_WIFI_STATUS; } static inline void sock_tx_timestamp(struct sock *sk, diff --git a/include/net/strparser.h b/include/net/strparser.h index 0a83010b3a64..0ed73e364faa 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -114,8 +114,6 @@ static inline void strp_pause(struct strparser *strp) /* May be called without holding lock for attached socket */ void strp_unpause(struct strparser *strp); -/* Must be called with process lock held (lock_sock) */ -void __strp_unpause(struct strparser *strp); static inline void save_strp_stats(struct strparser *strp, struct strp_aggr_stats *agg_stats) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4450c384ef17..5078ad868fee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -427,7 +427,8 @@ enum tcp_tw_status { enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th, - u32 *tw_isn); + u32 *tw_isn, + enum skb_drop_reason *drop_reason); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race, enum skb_drop_reason *drop_reason); diff --git a/include/net/udp.h b/include/net/udp.h index 6e89520e100d..a772510b2aa5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk) struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); + INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a93dc51f6323..2df3b8344eb5 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -191,6 +191,21 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) +void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); +void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); +#else +static inline void udp_tunnel_update_gro_lookup(struct net *net, + struct sock *sk, bool add) {} +static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} +#endif + +static inline void udp_tunnel_cleanup_gro(struct sock *sk) +{ + udp_tunnel_update_gro_rcv(sk, false); + udp_tunnel_update_gro_lookup(sock_net(sk), sk, false); +} + static inline void udp_tunnel_encap_enable(struct sock *sk) { if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 2dd23ee2bacd..e2f7ca045d3e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -296,7 +296,7 @@ struct vxlan_dev { struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; - spinlock_t hash_lock[FDB_HASH_SIZE]; + spinlock_t hash_lock; unsigned int addrcnt; struct gro_cells gro_cells; @@ -304,9 +304,10 @@ struct vxlan_dev { struct vxlan_vni_group __rcu *vnigrp; - struct hlist_head fdb_head[FDB_HASH_SIZE]; + struct rhashtable fdb_hash_tbl; struct rhashtable mdb_tbl; + struct hlist_head fdb_list; struct hlist_head mdb_list; unsigned int mdb_seq; }; diff --git a/include/net/xdp.h b/include/net/xdp.h index 48efacbaa35d..b40f1f96cb11 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -616,8 +616,12 @@ struct xdp_metadata_ops { u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); +void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val); void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); +void xdp_features_set_redirect_target_locked(struct net_device *dev, + bool support_sg); void xdp_features_clear_redirect_target(struct net_device *dev); +void xdp_features_clear_redirect_target_locked(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6db7fc9dbaa4..48d6deb3efd7 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1073,8 +1073,11 @@ int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged); int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid); -int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr); -int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr); +void ocelot_hwstamp_get(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg); +int ocelot_hwstamp_set(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, struct sk_buff *skb, struct sk_buff **clone); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8857f5ea77d4..7f83d242c8e9 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -663,19 +663,26 @@ TRACE_EVENT(afs_cb_call, __field(unsigned int, call) __field(u32, op) __field(u16, service_id) + __field(u8, security_ix) + __field(u32, enctype) ), TP_fast_assign( __entry->call = call->debug_id; __entry->op = call->operation_ID; __entry->service_id = call->service_id; + __entry->security_ix = call->security_ix; + __entry->enctype = call->enctype; ), - TP_printk("c=%08x %s", + TP_printk("c=%08x %s sv=%u sx=%u en=%u", __entry->call, __entry->service_id == 2501 ? __print_symbolic(__entry->op, yfs_cm_operations) : - __print_symbolic(__entry->op, afs_cm_operations)) + __print_symbolic(__entry->op, afs_cm_operations), + __entry->service_id, + __entry->security_ix, + __entry->enctype) ); TRACE_EVENT(afs_call, diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cad50d91077e..378d2dfc7392 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -25,6 +25,7 @@ EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \ EM(afs_abort_send_data_error, "afs-send-data") \ EM(afs_abort_unmarshal_error, "afs-unmarshal") \ + EM(afs_abort_unsupported_sec_class, "afs-unsup-sec-class") \ /* rxperf errors */ \ EM(rxperf_abort_general_error, "rxperf-error") \ EM(rxperf_abort_oom, "rxperf-oom") \ @@ -68,6 +69,38 @@ EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \ EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ EM(rxkad_abort_resp_version, "rxkad-resp-version") \ + /* RxGK security errors */ \ + EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \ + EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \ + EM(rxgk_abort_2_short_data, "rxgk2-short-data") \ + EM(rxgk_abort_2_short_encdata, "rxgk2-short-encdata") \ + EM(rxgk_abort_2_short_header, "rxgk2-short-hdr") \ + EM(rxgk_abort_bad_key_number, "rxgk-bad-key-num") \ + EM(rxgk_abort_chall_key_expired, "rxgk-chall-key-exp") \ + EM(rxgk_abort_chall_no_key, "rxgk-chall-nokey") \ + EM(rxgk_abort_chall_short, "rxgk-chall-short") \ + EM(rxgk_abort_resp_auth_dec, "rxgk-resp-auth-dec") \ + EM(rxgk_abort_resp_bad_callid, "rxgk-resp-bad-callid") \ + EM(rxgk_abort_resp_bad_nonce, "rxgk-resp-bad-nonce") \ + EM(rxgk_abort_resp_bad_param, "rxgk-resp-bad-param") \ + EM(rxgk_abort_resp_call_ctr, "rxgk-resp-call-ctr") \ + EM(rxgk_abort_resp_call_state, "rxgk-resp-call-state") \ + EM(rxgk_abort_resp_internal_error, "rxgk-resp-int-error") \ + EM(rxgk_abort_resp_nopkg, "rxgk-resp-nopkg") \ + EM(rxgk_abort_resp_short_applen, "rxgk-resp-short-applen") \ + EM(rxgk_abort_resp_short_auth, "rxgk-resp-short-auth") \ + EM(rxgk_abort_resp_short_call_list, "rxgk-resp-short-callls") \ + EM(rxgk_abort_resp_short_packet, "rxgk-resp-short-packet") \ + EM(rxgk_abort_resp_short_yfs_klen, "rxgk-resp-short-yfs-klen") \ + EM(rxgk_abort_resp_short_yfs_key, "rxgk-resp-short-yfs-key") \ + EM(rxgk_abort_resp_short_yfs_tkt, "rxgk-resp-short-yfs-tkt") \ + EM(rxgk_abort_resp_tok_dec, "rxgk-resp-tok-dec") \ + EM(rxgk_abort_resp_tok_internal_error, "rxgk-resp-tok-int-err") \ + EM(rxgk_abort_resp_tok_keyerr, "rxgk-resp-tok-keyerr") \ + EM(rxgk_abort_resp_tok_nokey, "rxgk-resp-tok-nokey") \ + EM(rxgk_abort_resp_tok_nopkg, "rxgk-resp-tok-nopkg") \ + EM(rxgk_abort_resp_tok_short, "rxgk-resp-tok-short") \ + EM(rxgk_abort_resp_xdr_align, "rxgk-resp-xdr-align") \ /* rxrpc errors */ \ EM(rxrpc_abort_call_improper_term, "call-improper-term") \ EM(rxrpc_abort_call_reset, "call-reset") \ @@ -77,6 +110,7 @@ EM(rxrpc_abort_call_timeout, "call-timeout") \ EM(rxrpc_abort_no_service_key, "no-serv-key") \ EM(rxrpc_abort_nomem, "nomem") \ + EM(rxrpc_abort_response_sendmsg, "resp-sendmsg") \ EM(rxrpc_abort_service_not_offered, "serv-not-offered") \ EM(rxrpc_abort_shut_down, "shut-down") \ EM(rxrpc_abort_unsupported_security, "unsup-sec") \ @@ -133,24 +167,33 @@ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ + EM(rxrpc_skb_get_post_oob, "GET post-oob ") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ EM(rxrpc_skb_get_to_recvmsg_oos, "GET to-recv-o") \ EM(rxrpc_skb_new_encap_rcv, "NEW encap-rcv") \ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ + EM(rxrpc_skb_new_response_rxgk, "NEW resp-rxgk") \ + EM(rxrpc_skb_new_response_rxkad, "NEW resp-rxkd") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \ + EM(rxrpc_skb_put_challenge, "PUT challenge") \ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_oob, "PUT oob ") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ + EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ + EM(rxrpc_skb_put_response, "PUT response ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ EM(rxrpc_skb_see_conn_work, "SEE conn-work") \ + EM(rxrpc_skb_see_oob_challenge, "SEE oob-chall") \ EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \ + EM(rxrpc_skb_see_recvmsg_oob, "SEE recvm-oob") \ EM(rxrpc_skb_see_reject, "SEE reject ") \ EM(rxrpc_skb_see_rotate, "SEE rotate ") \ E_(rxrpc_skb_see_version, "SEE version ") @@ -216,9 +259,11 @@ EM(rxrpc_conn_free, "FREE ") \ EM(rxrpc_conn_get_activate_call, "GET act-call") \ EM(rxrpc_conn_get_call_input, "GET inp-call") \ + EM(rxrpc_conn_get_challenge_input, "GET inp-chal") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_response, "GET response") \ EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ @@ -226,10 +271,12 @@ EM(rxrpc_conn_new_service, "NEW service ") \ EM(rxrpc_conn_put_call, "PUT call ") \ EM(rxrpc_conn_put_call_input, "PUT inp-call") \ + EM(rxrpc_conn_put_challenge_input, "PUT inp-chal") \ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ + EM(rxrpc_conn_put_oob, "PUT oob ") \ EM(rxrpc_conn_put_poke, "PUT poke ") \ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ @@ -331,6 +378,7 @@ EM(rxrpc_recvmsg_full, "FULL") \ EM(rxrpc_recvmsg_hole, "HOLE") \ EM(rxrpc_recvmsg_next, "NEXT") \ + EM(rxrpc_recvmsg_oobq, "OOBQ") \ EM(rxrpc_recvmsg_requeue, "REQU") \ EM(rxrpc_recvmsg_return, "RETN") \ EM(rxrpc_recvmsg_terminal, "TERM") \ @@ -455,8 +503,9 @@ EM(rxrpc_tx_point_call_final_resend, "CallFinalResend") \ EM(rxrpc_tx_point_conn_abort, "ConnAbort") \ EM(rxrpc_tx_point_reject, "Reject") \ + EM(rxrpc_tx_point_rxgk_challenge, "RxGKChall") \ EM(rxrpc_tx_point_rxkad_challenge, "RxkadChall") \ - EM(rxrpc_tx_point_rxkad_response, "RxkadResp") \ + EM(rxrpc_tx_point_response, "Response") \ EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \ E_(rxrpc_tx_point_version_reply, "VerReply") @@ -473,6 +522,7 @@ #define rxrpc_txbuf_traces \ EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \ + EM(rxrpc_txbuf_alloc_response, "ALLOC RESP ") \ EM(rxrpc_txbuf_free, "FREE ") \ EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \ EM(rxrpc_txbuf_get_trans, "GET TRANS ") \ @@ -480,6 +530,7 @@ EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \ EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ + EM(rxrpc_txbuf_put_response_tx, "PUT RESP TX") \ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ EM(rxrpc_txbuf_see_lost, "SEE LOST ") \ @@ -1150,6 +1201,39 @@ TRACE_EVENT(rxrpc_rx_conn_abort, __entry->abort_code) ); +TRACE_EVENT(rxrpc_tx_challenge, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + u32 version, u32 nonce), + + TP_ARGS(conn, serial, version, nonce), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(u32, version) + __field(u32, nonce) + __field(u16, service_id) + __field(u8, security_ix) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->version = version; + __entry->nonce = nonce; + __entry->service_id = conn->service_id; + __entry->security_ix = conn->security_ix; + ), + + TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x", + __entry->conn, + __entry->serial, + __entry->service_id, + __entry->security_ix, + __entry->version, + __entry->nonce) + ); + TRACE_EVENT(rxrpc_rx_challenge, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 nonce, u32 min_level), @@ -1162,6 +1246,8 @@ TRACE_EVENT(rxrpc_rx_challenge, __field(u32, version) __field(u32, nonce) __field(u32, min_level) + __field(u16, service_id) + __field(u8, security_ix) ), TP_fast_assign( @@ -1170,16 +1256,60 @@ TRACE_EVENT(rxrpc_rx_challenge, __entry->version = version; __entry->nonce = nonce; __entry->min_level = min_level; + __entry->service_id = conn->service_id; + __entry->security_ix = conn->security_ix; ), - TP_printk("C=%08x CHALLENGE %08x v=%x n=%x ml=%x", + TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x ml=%x", __entry->conn, __entry->serial, + __entry->service_id, + __entry->security_ix, __entry->version, __entry->nonce, __entry->min_level) ); +TRACE_EVENT(rxrpc_tx_response, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + struct rxrpc_skb_priv *rsp), + + TP_ARGS(conn, serial, rsp), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(rxrpc_serial_t, challenge) + __field(u32, version) + __field(u32, kvno) + __field(u16, ticket_len) + __field(u16, appdata_len) + __field(u16, service_id) + __field(u8, security_ix) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->challenge = rsp->resp.challenge_serial; + __entry->version = rsp->resp.version; + __entry->kvno = rsp->resp.kvno; + __entry->ticket_len = rsp->resp.ticket_len; + __entry->service_id = conn->service_id; + __entry->security_ix = conn->security_ix; + ), + + TP_printk("C=%08x RESPONSE r=%08x cr=%08x sv=%u+%u v=%x kv=%x tl=%u", + __entry->conn, + __entry->serial, + __entry->challenge, + __entry->service_id, + __entry->security_ix, + __entry->version, + __entry->kvno, + __entry->ticket_len) + ); + TRACE_EVENT(rxrpc_rx_response, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 kvno, u32 ticket_len), @@ -1192,6 +1322,7 @@ TRACE_EVENT(rxrpc_rx_response, __field(u32, version) __field(u32, kvno) __field(u32, ticket_len) + __field(u8, security_ix) ), TP_fast_assign( @@ -1200,11 +1331,13 @@ TRACE_EVENT(rxrpc_rx_response, __entry->version = version; __entry->kvno = kvno; __entry->ticket_len = ticket_len; + __entry->security_ix = conn->security_ix; ), - TP_printk("C=%08x RESPONSE %08x v=%x kvno=%x tl=%x", + TP_printk("C=%08x RESPONSE r=%08x sx=%u v=%x kvno=%x tl=%x", __entry->conn, __entry->serial, + __entry->security_ix, __entry->version, __entry->kvno, __entry->ticket_len) @@ -2668,6 +2801,30 @@ TRACE_EVENT(rxrpc_rack_timer, ktime_to_us(__entry->delay)) ); +TRACE_EVENT(rxrpc_rxgk_rekey, + TP_PROTO(struct rxrpc_connection *conn, + unsigned int current_key, unsigned int requested_key), + + TP_ARGS(conn, current_key, requested_key), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(unsigned int, current_key) + __field(unsigned int, requested_key) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->current_key = current_key; + __entry->requested_key = requested_key; + ), + + TP_printk("C=%08x cur=%x req=%x", + __entry->conn, + __entry->current_key, + __entry->requested_key) + ); + #undef EM #undef E_ diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 3836de435d9d..b5310439536e 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -19,7 +19,6 @@ /* The protocol traced by inet_sock_set_state */ #define inet_protocol_names \ EM(IPPROTO_TCP) \ - EM(IPPROTO_DCCP) \ EM(IPPROTO_SCTP) \ EMe(IPPROTO_MPTCP) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5d331383047b..de214f1dea58 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -21,7 +21,6 @@ TRACE_DEFINE_ENUM(SOCK_DGRAM); TRACE_DEFINE_ENUM(SOCK_RAW); TRACE_DEFINE_ENUM(SOCK_RDM); TRACE_DEFINE_ENUM(SOCK_SEQPACKET); -TRACE_DEFINE_ENUM(SOCK_DCCP); TRACE_DEFINE_ENUM(SOCK_PACKET); #define show_socket_type(type) \ @@ -31,7 +30,6 @@ TRACE_DEFINE_ENUM(SOCK_PACKET); { SOCK_RAW, "RAW" }, \ { SOCK_RDM, "RDM" }, \ { SOCK_SEQPACKET, "SEQPACKET" }, \ - { SOCK_DCCP, "DCCP" }, \ { SOCK_PACKET, "PACKET" }) /* This list is known to be incomplete, add new enums as needed. */ diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1a40c41ff8c3..53e878fa14d1 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -259,6 +259,30 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->saddr_v6, __entry->daddr_v6) ); +TRACE_EVENT(tcp_sendmsg_locked, + TP_PROTO(const struct sock *sk, const struct msghdr *msg, + const struct sk_buff *skb, int size_goal), + + TP_ARGS(sk, msg, skb, size_goal), + + TP_STRUCT__entry( + __field(const void *, skb_addr) + __field(int, skb_len) + __field(int, msg_left) + __field(int, size_goal) + ), + + TP_fast_assign( + __entry->skb_addr = skb; + __entry->skb_len = skb ? skb->len : 0; + __entry->msg_left = msg_data_left(msg); + __entry->size_goal = size_goal; + ), + + TP_printk("skb_addr %p skb_len %d msg_left %d size_goal %d", + __entry->skb_addr, __entry->skb_len, __entry->msg_left, + __entry->size_goal)); + DECLARE_TRACE(tcp_cwnd_reduction_tp, TP_PROTO(const struct sock *sk, int newly_acked_sacked, int newly_lost, int flag), @@ -269,7 +293,7 @@ DECLARE_TRACE(tcp_cwnd_reduction_tp, TRACE_EVENT(tcp_probe, - TP_PROTO(struct sock *sk, struct sk_buff *skb), + TP_PROTO(struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 9401aa343673..a5ee0f13740a 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -385,6 +385,21 @@ enum devlink_linecard_state { DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 }; +/* Variable attribute type. */ +enum devlink_var_attr_type { + /* Following values relate to the internal NLA_* values */ + DEVLINK_VAR_ATTR_TYPE_U8 = 1, + DEVLINK_VAR_ATTR_TYPE_U16, + DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_VAR_ATTR_TYPE_U64, + DEVLINK_VAR_ATTR_TYPE_STRING, + DEVLINK_VAR_ATTR_TYPE_FLAG, + DEVLINK_VAR_ATTR_TYPE_NUL_STRING = 10, + DEVLINK_VAR_ATTR_TYPE_BINARY, + __DEVLINK_VAR_ATTR_TYPE_CUSTOM_BASE = 0x80, + /* Any possible custom types, unrelated to NLA_* values go below */ +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 84833cca29fe..707c1844010c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2295,71 +2295,75 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define RXH_XFRM_SYM_OR_XOR (1 << 1) #define RXH_XFRM_NO_CHANGE 0xff -/* L2-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ -#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ -#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ -#define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ -#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ -#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ -#define AH_ESP_V6_FLOW 0x08 /* hash only */ -#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ -#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ -#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ -#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ -#define IP_USER_FLOW IPV4_USER_FLOW -#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ -#define IPV4_FLOW 0x10 /* hash only */ -#define IPV6_FLOW 0x11 /* hash only */ -#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ - -/* Used for GTP-U IPv4 and IPv6. - * The format of GTP packets only includes - * elements such as TEID and GTP version. - * It is primarily intended for data communication of the UE. - */ -#define GTPU_V4_FLOW 0x13 /* hash only */ -#define GTPU_V6_FLOW 0x14 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * The format of these GTP packets does not include TEID. - * Primarily expected to be used for communication - * to create sessions for UE data communication, - * commonly referred to as CSR (Create Session Request). - */ -#define GTPC_V4_FLOW 0x15 /* hash only */ -#define GTPC_V6_FLOW 0x16 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. - * After session creation, it becomes this packet. - * This is mainly used for requests to realize UE handover. - */ -#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ -#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ - -/* Use for GTP-U and extended headers for the PSC (PDU Session Container). - * The format of these GTP packets includes TEID and QFI. - * In 5G communication using UPF (User Plane Function), - * data communication with this extended header is performed. - */ -#define GTPU_EH_V4_FLOW 0x19 /* hash only */ -#define GTPU_EH_V6_FLOW 0x1a /* hash only */ - -/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. - * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by - * UL/DL included in the PSC. - * There are differences in the data included based on Downlink/Uplink, - * and can be used to distinguish packets. - * The functions described so far are useful when you want to - * handle communication from the mobile network in UPF, PGW, etc. - */ -#define GTPU_UL_V4_FLOW 0x1b /* hash only */ -#define GTPU_UL_V6_FLOW 0x1c /* hash only */ -#define GTPU_DL_V4_FLOW 0x1d /* hash only */ -#define GTPU_DL_V6_FLOW 0x1e /* hash only */ +enum { + /* L2-L4 network traffic flow types */ + TCP_V4_FLOW = 0x01, /* hash or spec (tcp_ip4_spec) */ + UDP_V4_FLOW = 0x02, /* hash or spec (udp_ip4_spec) */ + SCTP_V4_FLOW = 0x03, /* hash or spec (sctp_ip4_spec) */ + AH_ESP_V4_FLOW = 0x04, /* hash only */ + TCP_V6_FLOW = 0x05, /* hash or spec (tcp_ip6_spec; nfc only) */ + UDP_V6_FLOW = 0x06, /* hash or spec (udp_ip6_spec; nfc only) */ + SCTP_V6_FLOW = 0x07, /* hash or spec (sctp_ip6_spec; nfc only) */ + AH_ESP_V6_FLOW = 0x08, /* hash only */ + AH_V4_FLOW = 0x09, /* hash or spec (ah_ip4_spec) */ + ESP_V4_FLOW = 0x0a, /* hash or spec (esp_ip4_spec) */ + AH_V6_FLOW = 0x0b, /* hash or spec (ah_ip6_spec; nfc only) */ + ESP_V6_FLOW = 0x0c, /* hash or spec (esp_ip6_spec; nfc only) */ + IPV4_USER_FLOW = 0x0d, /* spec only (usr_ip4_spec) */ + IP_USER_FLOW = IPV4_USER_FLOW, + IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ + IPV4_FLOW = 0x10, /* hash only */ + IPV6_FLOW = 0x11, /* hash only */ + ETHER_FLOW = 0x12, /* spec only (ether_spec) */ + + /* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ + GTPU_V4_FLOW = 0x13, /* hash only */ + GTPU_V6_FLOW = 0x14, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ + GTPC_V4_FLOW = 0x15, /* hash only */ + GTPC_V6_FLOW = 0x16, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ + GTPC_TEID_V4_FLOW = 0x17, /* hash only */ + GTPC_TEID_V6_FLOW = 0x18, /* hash only */ + + /* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ + GTPU_EH_V4_FLOW = 0x19, /* hash only */ + GTPU_EH_V6_FLOW = 0x1a, /* hash only */ + + /* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ + GTPU_UL_V4_FLOW = 0x1b, /* hash only */ + GTPU_UL_V6_FLOW = 0x1c, /* hash only */ + GTPU_DL_V4_FLOW = 0x1d, /* hash only */ + GTPU_DL_V6_FLOW = 0x1e, /* hash only */ + + __FLOW_TYPE_COUNT, +}; /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2df6e4035d50..418c4be697ad 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_FIB_RULES_H -#define __LINUX_FIB_RULES_H +#ifndef _UAPI__LINUX_FIB_RULES_H +#define _UAPI__LINUX_FIB_RULES_H #include <linux/types.h> #include <linux/rtnetlink.h> diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index 1c392dd95a5e..aa7958b4e41d 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_IF_ADDR_H -#define __LINUX_IF_ADDR_H +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H #include <linux/types.h> #include <linux/netlink.h> diff --git a/include/uapi/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h index d1f5974c76e1..e69db764fbba 100644 --- a/include/uapi/linux/if_addrlabel.h +++ b/include/uapi/linux/if_addrlabel.h @@ -8,8 +8,8 @@ * YOSHIFUJI Hideaki @ USAGI/WIDE <yoshfuji@linux-ipv6.org> */ -#ifndef __LINUX_IF_ADDRLABEL_H -#define __LINUX_IF_ADDRLABEL_H +#ifndef _UAPI__LINUX_IF_ADDRLABEL_H +#define _UAPI__LINUX_IF_ADDRLABEL_H #include <linux/types.h> diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 0824fbc026a1..b35871cbeed7 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -11,8 +11,8 @@ * */ -#ifndef _LINUX_IF_ALG_H -#define _LINUX_IF_ALG_H +#ifndef _UAPI_LINUX_IF_ALG_H +#define _UAPI_LINUX_IF_ALG_H #include <linux/types.h> @@ -58,4 +58,4 @@ struct af_alg_iv { #define ALG_OP_DECRYPT 0 #define ALG_OP_ENCRYPT 1 -#endif /* _LINUX_IF_ALG_H */ +#endif /* _UAPI_LINUX_IF_ALG_H */ diff --git a/include/uapi/linux/if_arcnet.h b/include/uapi/linux/if_arcnet.h index b122cfac7128..473569eaf692 100644 --- a/include/uapi/linux/if_arcnet.h +++ b/include/uapi/linux/if_arcnet.h @@ -14,8 +14,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_ARCNET_H -#define _LINUX_IF_ARCNET_H +#ifndef _UAPI_LINUX_IF_ARCNET_H +#define _UAPI_LINUX_IF_ARCNET_H #include <linux/types.h> #include <linux/if_ether.h> @@ -127,4 +127,4 @@ struct archdr { } soft; }; -#endif /* _LINUX_IF_ARCNET_H */ +#endif /* _UAPI_LINUX_IF_ARCNET_H */ diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index d174914a837d..3bcc03f3aa4f 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -41,8 +41,8 @@ * - added definitions for various XOR hashing policies */ -#ifndef _LINUX_IF_BONDING_H -#define _LINUX_IF_BONDING_H +#ifndef _UAPI_LINUX_IF_BONDING_H +#define _UAPI_LINUX_IF_BONDING_H #include <linux/if.h> #include <linux/types.h> @@ -152,4 +152,4 @@ enum { }; #define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) -#endif /* _LINUX_IF_BONDING_H */ +#endif /* _UAPI_LINUX_IF_BONDING_H */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a5b743a2f775..73876c0e2bba 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -699,10 +699,11 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; -#define MDB_FLAGS_OFFLOAD (1 << 0) -#define MDB_FLAGS_FAST_LEAVE (1 << 1) -#define MDB_FLAGS_STAR_EXCL (1 << 2) -#define MDB_FLAGS_BLOCKED (1 << 3) +#define MDB_FLAGS_OFFLOAD (1 << 0) +#define MDB_FLAGS_FAST_LEAVE (1 << 1) +#define MDB_FLAGS_STAR_EXCL (1 << 2) +#define MDB_FLAGS_BLOCKED (1 << 3) +#define MDB_FLAGS_OFFLOAD_FAILED (1 << 4) __u8 flags; __u16 vid; struct { @@ -830,6 +831,7 @@ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MST_ENABLE, + BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION, BR_BOOLOPT_MAX }; diff --git a/include/uapi/linux/if_fc.h b/include/uapi/linux/if_fc.h index 3e3173282cc3..ff5ab92d16c2 100644 --- a/include/uapi/linux/if_fc.h +++ b/include/uapi/linux/if_fc.h @@ -18,8 +18,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_FC_H -#define _LINUX_IF_FC_H +#ifndef _UAPI_LINUX_IF_FC_H +#define _UAPI_LINUX_IF_FC_H #include <linux/types.h> @@ -49,4 +49,4 @@ struct fcllc { __be16 ethertype; /* ether type field */ }; -#endif /* _LINUX_IF_FC_H */ +#endif /* _UAPI_LINUX_IF_FC_H */ diff --git a/include/uapi/linux/if_hippi.h b/include/uapi/linux/if_hippi.h index 785a1452a66c..42c4ffd11dae 100644 --- a/include/uapi/linux/if_hippi.h +++ b/include/uapi/linux/if_hippi.h @@ -20,8 +20,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_HIPPI_H -#define _LINUX_IF_HIPPI_H +#ifndef _UAPI_LINUX_IF_HIPPI_H +#define _UAPI_LINUX_IF_HIPPI_H #include <linux/types.h> #include <asm/byteorder.h> @@ -151,4 +151,4 @@ struct hippi_hdr { struct hippi_snap_hdr snap; } __attribute__((packed)); -#endif /* _LINUX_IF_HIPPI_H */ +#endif /* _UAPI_LINUX_IF_HIPPI_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 318386cc5b0d..3ad2d5d98034 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1986,4 +1986,19 @@ enum { #define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) +/* OVPN section */ + +enum ovpn_mode { + OVPN_MODE_P2P, + OVPN_MODE_MP, +}; + +enum { + IFLA_OVPN_UNSPEC, + IFLA_OVPN_MODE, + __IFLA_OVPN_MAX, +}; + +#define IFLA_OVPN_MAX (__IFLA_OVPN_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 1d2718dd9647..6cd1d7a41dfb 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_IF_PACKET_H -#define __LINUX_IF_PACKET_H +#ifndef _UAPI__LINUX_IF_PACKET_H +#define _UAPI__LINUX_IF_PACKET_H #include <asm/byteorder.h> #include <linux/types.h> diff --git a/include/uapi/linux/if_plip.h b/include/uapi/linux/if_plip.h index 495a366112f2..054d86a9c6e6 100644 --- a/include/uapi/linux/if_plip.h +++ b/include/uapi/linux/if_plip.h @@ -9,8 +9,8 @@ * */ -#ifndef _LINUX_IF_PLIP_H -#define _LINUX_IF_PLIP_H +#ifndef _UAPI_LINUX_IF_PLIP_H +#define _UAPI_LINUX_IF_PLIP_H #include <linux/sockios.h> diff --git a/include/uapi/linux/if_slip.h b/include/uapi/linux/if_slip.h index 65937be53103..299bf7adc862 100644 --- a/include/uapi/linux/if_slip.h +++ b/include/uapi/linux/if_slip.h @@ -6,8 +6,8 @@ * KISS TNC driver. */ -#ifndef __LINUX_SLIP_H -#define __LINUX_SLIP_H +#ifndef _UAPI__LINUX_SLIP_H +#define _UAPI__LINUX_SLIP_H #define SL_MODE_SLIP 0 #define SL_MODE_CSLIP 1 diff --git a/include/uapi/linux/if_x25.h b/include/uapi/linux/if_x25.h index 3a5938e38370..861cfa983db4 100644 --- a/include/uapi/linux/if_x25.h +++ b/include/uapi/linux/if_x25.h @@ -13,8 +13,8 @@ * GNU General Public License for more details. */ -#ifndef _IF_X25_H -#define _IF_X25_H +#ifndef _UAPI_IF_X25_H +#define _UAPI_IF_X25_H #include <linux/types.h> @@ -24,4 +24,4 @@ #define X25_IFACE_DISCONNECT 0x02 #define X25_IFACE_PARAMS 0x03 -#endif /* _IF_X25_H */ +#endif /* _UAPI_IF_X25_H */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 42869770776e..44f2bb93e7e6 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include <linux/types.h> @@ -180,4 +180,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h index 0245269b037c..85182a839d42 100644 --- a/include/uapi/linux/ip6_tunnel.h +++ b/include/uapi/linux/ip6_tunnel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IP6_TUNNEL_H -#define _IP6_TUNNEL_H +#ifndef _UAPI_IP6_TUNNEL_H +#define _UAPI_IP6_TUNNEL_H #include <linux/types.h> #include <linux/if.h> /* For IFNAMSIZ. */ diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 84f622a66a7a..9dd41c2f58a6 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __NET_DROPMON_H -#define __NET_DROPMON_H +#ifndef _UAPI__NET_DROPMON_H +#define _UAPI__NET_DROPMON_H #include <linux/types.h> #include <linux/netlink.h> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 383213de612a..a93e6ea37fb3 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -7,8 +7,8 @@ * */ -#ifndef _NET_TIMESTAMPING_H -#define _NET_TIMESTAMPING_H +#ifndef _UAPI_NET_TIMESTAMPING_H +#define _UAPI_NET_TIMESTAMPING_H #include <linux/types.h> #include <linux/socket.h> /* for SO_TIMESTAMPING */ @@ -216,4 +216,4 @@ struct sock_txtime { __u32 flags; /* as defined by enum txtime_flags */ }; -#endif /* _NET_TIMESTAMPING_H */ +#endif /* _UAPI_NET_TIMESTAMPING_H */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7600bf62dbdf..7eb9571786b8 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -219,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 49c944e78463..7d6bc19a0153 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -394,6 +394,8 @@ enum nft_set_field_attributes { * @NFTA_SET_HANDLE: set handle (NLA_U64) * @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes) * @NFTA_SET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes) + * @NFTA_SET_TYPE: set backend type (NLA_STRING) + * @NFTA_SET_COUNT: number of set elements (NLA_U32) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -415,6 +417,8 @@ enum nft_set_attributes { NFTA_SET_HANDLE, NFTA_SET_EXPR, NFTA_SET_EXPRESSIONS, + NFTA_SET_TYPE, + NFTA_SET_COUNT, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index dfa61be43d2f..ff28200204bb 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __NETLINK_DIAG_H__ -#define __NETLINK_DIAG_H__ +#ifndef _UAPI__NETLINK_DIAG_H__ +#define _UAPI__NETLINK_DIAG_H__ #include <linux/types.h> diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ddcc4cda74af..e9ccf43fe3c6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8036,6 +8036,11 @@ enum nl80211_sar_specs_attrs { * Setting this flag is permitted only if the driver advertises EMA support * by setting wiphy->ema_max_profile_periodicity to non-zero. * + * @NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID: Link ID of the transmitted profile. + * This parameter is mandatory when NL80211_ATTR_MBSSID_CONFIG attributes + * are sent for a non-transmitted profile and if the transmitted profile + * is part of an MLD. For all other cases this parameter is unnecessary. + * * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute */ @@ -8047,6 +8052,7 @@ enum nl80211_mbssid_config_attributes { NL80211_MBSSID_CONFIG_ATTR_INDEX, NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX, NL80211_MBSSID_CONFIG_ATTR_EMA, + NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID, /* keep last */ __NL80211_MBSSID_CONFIG_ATTR_LAST, diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h new file mode 100644 index 000000000000..680d1522dc87 --- /dev/null +++ b/include/uapi/linux/ovpn.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ovpn.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_OVPN_H +#define _UAPI_LINUX_OVPN_H + +#define OVPN_FAMILY_NAME "ovpn" +#define OVPN_FAMILY_VERSION 1 + +#define OVPN_NONCE_TAIL_SIZE 8 + +enum ovpn_cipher_alg { + OVPN_CIPHER_ALG_NONE, + OVPN_CIPHER_ALG_AES_GCM, + OVPN_CIPHER_ALG_CHACHA20_POLY1305, +}; + +enum ovpn_del_peer_reason { + OVPN_DEL_PEER_REASON_TEARDOWN, + OVPN_DEL_PEER_REASON_USERSPACE, + OVPN_DEL_PEER_REASON_EXPIRED, + OVPN_DEL_PEER_REASON_TRANSPORT_ERROR, + OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT, +}; + +enum ovpn_key_slot { + OVPN_KEY_SLOT_PRIMARY, + OVPN_KEY_SLOT_SECONDARY, +}; + +enum { + OVPN_A_PEER_ID = 1, + OVPN_A_PEER_REMOTE_IPV4, + OVPN_A_PEER_REMOTE_IPV6, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + OVPN_A_PEER_REMOTE_PORT, + OVPN_A_PEER_SOCKET, + OVPN_A_PEER_SOCKET_NETNSID, + OVPN_A_PEER_VPN_IPV4, + OVPN_A_PEER_VPN_IPV6, + OVPN_A_PEER_LOCAL_IPV4, + OVPN_A_PEER_LOCAL_IPV6, + OVPN_A_PEER_LOCAL_PORT, + OVPN_A_PEER_KEEPALIVE_INTERVAL, + OVPN_A_PEER_KEEPALIVE_TIMEOUT, + OVPN_A_PEER_DEL_REASON, + OVPN_A_PEER_VPN_RX_BYTES, + OVPN_A_PEER_VPN_TX_BYTES, + OVPN_A_PEER_VPN_RX_PACKETS, + OVPN_A_PEER_VPN_TX_PACKETS, + OVPN_A_PEER_LINK_RX_BYTES, + OVPN_A_PEER_LINK_TX_BYTES, + OVPN_A_PEER_LINK_RX_PACKETS, + OVPN_A_PEER_LINK_TX_PACKETS, + + __OVPN_A_PEER_MAX, + OVPN_A_PEER_MAX = (__OVPN_A_PEER_MAX - 1) +}; + +enum { + OVPN_A_KEYCONF_PEER_ID = 1, + OVPN_A_KEYCONF_SLOT, + OVPN_A_KEYCONF_KEY_ID, + OVPN_A_KEYCONF_CIPHER_ALG, + OVPN_A_KEYCONF_ENCRYPT_DIR, + OVPN_A_KEYCONF_DECRYPT_DIR, + + __OVPN_A_KEYCONF_MAX, + OVPN_A_KEYCONF_MAX = (__OVPN_A_KEYCONF_MAX - 1) +}; + +enum { + OVPN_A_KEYDIR_CIPHER_KEY = 1, + OVPN_A_KEYDIR_NONCE_TAIL, + + __OVPN_A_KEYDIR_MAX, + OVPN_A_KEYDIR_MAX = (__OVPN_A_KEYDIR_MAX - 1) +}; + +enum { + OVPN_A_IFINDEX = 1, + OVPN_A_PEER, + OVPN_A_KEYCONF, + + __OVPN_A_MAX, + OVPN_A_MAX = (__OVPN_A_MAX - 1) +}; + +enum { + OVPN_CMD_PEER_NEW = 1, + OVPN_CMD_PEER_SET, + OVPN_CMD_PEER_GET, + OVPN_CMD_PEER_DEL, + OVPN_CMD_PEER_DEL_NTF, + OVPN_CMD_KEY_NEW, + OVPN_CMD_KEY_GET, + OVPN_CMD_KEY_SWAP, + OVPN_CMD_KEY_SWAP_NTF, + OVPN_CMD_KEY_DEL, + + __OVPN_CMD_MAX, + OVPN_CMD_MAX = (__OVPN_CMD_MAX - 1) +}; + +#define OVPN_MCGRP_PEERS "peers" + +#endif /* _UAPI_LINUX_OVPN_H */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 2c32080416b5..490821364165 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_PKT_CLS_H -#define __LINUX_PKT_CLS_H +#ifndef _UAPI__LINUX_PKT_CLS_H +#define _UAPI__LINUX_PKT_CLS_H #include <linux/types.h> #include <linux/pkt_sched.h> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 25a9a47001cd..9ea874395717 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_PKT_SCHED_H -#define __LINUX_PKT_SCHED_H +#ifndef _UAPI__LINUX_PKT_SCHED_H +#define _UAPI__LINUX_PKT_SCHED_H #include <linux/const.h> #include <linux/types.h> diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 8f8dc7a937a4..d9735abd4c79 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -36,26 +36,33 @@ struct sockaddr_rxrpc { #define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ #define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ #define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ +#define RXRPC_MANAGE_RESPONSE 7 /* [clnt] Want to manage RESPONSE packets */ /* * RxRPC control messages * - If neither abort or accept are specified, the message is a data message. * - terminal messages mean that a user call ID tag can be recycled + * - C/S/- indicate whether these are applicable to client, server or both * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() */ enum rxrpc_cmsg_type { - RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ - RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ - RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ - RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ - RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ - RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ - RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ - RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ - RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ - RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ - RXRPC_CHARGE_ACCEPT = 14, /* s-: Charge the accept pool with a user call ID */ + RXRPC_USER_CALL_ID = 1, /* -sr: User call ID specifier */ + RXRPC_ABORT = 2, /* -sr: Abort request / notification [terminal] */ + RXRPC_ACK = 3, /* S-r: RPC op final ACK received [terminal] */ + RXRPC_NET_ERROR = 5, /* --r: Network error received [terminal] */ + RXRPC_BUSY = 6, /* C-r: Server busy received [terminal] */ + RXRPC_LOCAL_ERROR = 7, /* --r: Local error generated [terminal] */ + RXRPC_NEW_CALL = 8, /* S-r: New incoming call notification */ + RXRPC_EXCLUSIVE_CALL = 10, /* Cs-: Call should be on exclusive connection */ + RXRPC_UPGRADE_SERVICE = 11, /* Cs-: Request service upgrade for client call */ + RXRPC_TX_LENGTH = 12, /* -s-: Total length of Tx data */ + RXRPC_SET_CALL_TIMEOUT = 13, /* -s-: Set one or more call timeouts */ + RXRPC_CHARGE_ACCEPT = 14, /* Ss-: Charge the accept pool with a user call ID */ + RXRPC_OOB_ID = 15, /* -sr: OOB message ID */ + RXRPC_CHALLENGED = 16, /* C-r: Info on a received CHALLENGE */ + RXRPC_RESPOND = 17, /* Cs-: Respond to a challenge */ + RXRPC_RESPONDED = 18, /* S-r: Data received in RESPONSE */ + RXRPC_RESP_RXGK_APPDATA = 19, /* Cs-: RESPONSE: RxGK app data to include */ RXRPC__SUPPORTED }; @@ -73,6 +80,7 @@ enum rxrpc_cmsg_type { #define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ #define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ #define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ +#define RXRPC_SECURITY_YFS_RXGK 6 /* YFS gssapi-based */ /* * RxRPC-level abort codes @@ -118,4 +126,49 @@ enum rxrpc_cmsg_type { #define RXKADDATALEN 19270411 /* user data too long */ #define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ +/* + * RxGK GSSAPI security abort codes. + */ +#if 0 /* Original standard abort codes (used by OpenAFS) */ +#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */ +#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */ +#define RXGK_BADCHALLENGE 1233242882 /* Invalid security challenge */ +#define RXGK_BADETYPE 1233242883 /* Invalid or impermissible encryption type */ +#define RXGK_BADLEVEL 1233242884 /* Invalid or impermissible security level */ +#define RXGK_BADKEYNO 1233242885 /* Key version number not found */ +#define RXGK_EXPIRED 1233242886 /* Token has expired */ +#define RXGK_NOTAUTH 1233242887 /* Caller not authorized */ +#define RXGK_BAD_TOKEN 1233242888 /* Security object was passed a bad token */ +#define RXGK_SEALED_INCON 1233242889 /* Sealed data inconsistent */ +#define RXGK_DATA_LEN 1233242890 /* User data too long */ +#define RXGK_BAD_QOP 1233242891 /* Inadequate quality of protection available */ +#else /* Revised standard abort codes (used by YFS) */ +#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */ +#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */ +#define RXGK_BADCHALLENGE 1233242882 /* Security challenge/response failed */ +#define RXGK_SEALEDINCON 1233242883 /* Sealed data is inconsistent */ +#define RXGK_NOTAUTH 1233242884 /* Caller not authorised */ +#define RXGK_EXPIRED 1233242885 /* Authentication expired */ +#define RXGK_BADLEVEL 1233242886 /* Unsupported or not permitted security level */ +#define RXGK_BADKEYNO 1233242887 /* Bad transport key number */ +#define RXGK_NOTRXGK 1233242888 /* Security layer is not rxgk */ +#define RXGK_UNSUPPORTED 1233242889 /* Endpoint does not support rxgk */ +#define RXGK_GSSERROR 1233242890 /* GSSAPI mechanism error */ +#endif + +/* + * Challenge information in the RXRPC_CHALLENGED control message. + */ +struct rxrpc_challenge { + __u16 service_id; /* The service ID of the connection (may be upgraded) */ + __u8 security_index; /* The security index of the connection */ + __u8 pad; /* Round out to a multiple of 4 bytes. */ + /* ... The security class gets to append extra information ... */ +}; + +struct rxgk_challenge { + struct rxrpc_challenge base; + __u32 enctype; /* Krb5 encoding type */ +}; + #endif /* _UAPI_LINUX_RXRPC_H */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index ec47f9b68a1b..1d234d7e1892 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -188,6 +188,7 @@ enum LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ + LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */ LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dc8fdc80e16b..bdac8c42fa82 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -184,6 +184,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ +#define TCPI_OPT_TFO_CHILD 128 /* child from a Fast Open option on SYN */ /* * Sender's congestion state indicating normal or abnormal situations diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index d85d671deed3..edca3e430305 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -43,5 +43,6 @@ struct udphdr { #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ #define UDP_ENCAP_RXRPC 6 #define TCP_ENCAP_ESPINTCP 7 /* Yikes, this is really xfrm encap types. */ +#define UDP_ENCAP_OVPNINUDP 8 /* OpenVPN traffic */ #endif /* _UAPI_LINUX_UDP_H */ diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index fe86606b9f30..0c64129f5d50 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -259,6 +259,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, niov->owner = &area->nia; area->freelist[i] = i; atomic_set(&area->user_refs[i], 0); + niov->type = NET_IOV_IOURING; } area->free_count = nr_iovs; @@ -809,7 +810,7 @@ static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, return io_zcrx_copy_frag(req, ifq, frag, off, len); niov = netmem_to_net_iov(frag->netmem); - if (niov->pp->mp_ops != &io_uring_pp_zc_ops || + if (!niov->pp || niov->pp->mp_ops != &io_uring_pp_zc_ops || io_pp_to_ifq(niov->pp) != ifq) return -EFAULT; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 16ba36f34dfa..324c47ab377a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6391,8 +6391,8 @@ static bool is_int_ptr(struct btf *btf, const struct btf_type *t) return btf_type_is_int(t); } -static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, - int off) +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, + int off) { const struct btf_param *args; const struct btf_type *t; @@ -6541,6 +6541,7 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { { "xprt_put_cong", 0x10 }, /* tcp */ { "tcp_send_reset", 0x11 }, + { "tcp_sendmsg_locked", 0x100 }, /* tegra_apb_dma */ { "tegra_dma_tx_status", 0x100 }, /* timer_migration */ @@ -6671,7 +6672,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, off); return false; } - arg = get_ctx_arg_idx(btf, t, off); + arg = btf_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments. diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 8aafd050b754..e81327d2cd63 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -112,3 +112,8 @@ CONFIG_BRANCH_PROFILE_NONE=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FTRACE=y CONFIG_FUNCTION_TRACER=y +# +# Preemption +# +CONFIG_DEBUG_PREEMPT=y +CONFIG_PREEMPT=y diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c index 6264e2013f25..b45ceb725780 100644 --- a/lib/pldmfw/pldmfw.c +++ b/lib/pldmfw/pldmfw.c @@ -728,6 +728,9 @@ pldm_send_package_data(struct pldmfw_priv *data) struct pldmfw_record *record = data->matching_record; const struct pldmfw_ops *ops = data->context->ops; + if (!ops->send_package_data) + return 0; + return ops->send_package_data(data->context, record->package_data, record->package_data_len); } @@ -755,6 +758,9 @@ pldm_send_component_tables(struct pldmfw_priv *data) if (!test_bit(index, bitmap)) continue; + if (!data->context->ops->send_component_table) + continue; + /* determine whether this is the start, middle, end, or both * the start and end of the component tables */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5669baf2a6fe..2073c3fdf9c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -897,9 +897,7 @@ static inline bool page_expected_state(struct page *page, #ifdef CONFIG_MEMCG page->memcg_data | #endif -#ifdef CONFIG_PAGE_POOL - ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | -#endif + page_pool_page_is_pp(page) | (page->flags & check_flags))) return false; @@ -926,10 +924,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) if (unlikely(page->memcg_data)) bad_reason = "page still charged to cgroup"; #endif -#ifdef CONFIG_PAGE_POOL - if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE)) + if (unlikely(page_pool_page_is_pp(page))) bad_reason = "page_pool leak"; -#endif return bad_reason; } diff --git a/net/802/Makefile b/net/802/Makefile index bfed80221b8b..99abc29d537c 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -3,12 +3,11 @@ # Makefile for the Linux 802.x protocol layers. # -# Check the p8022 selections against net/core/Makefile. -obj-$(CONFIG_LLC) += p8022.o psnap.o +obj-$(CONFIG_LLC) += psnap.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_ATALK) += p8022.o psnap.o +obj-$(CONFIG_ATALK) += psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/p8022.c b/net/802/p8022.c deleted file mode 100644 index 78c25168d7c9..000000000000 --- a/net/802/p8022.c +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * NET3: Support for 802.2 demultiplexing off Ethernet - * - * Demultiplex 802.2 encoded protocols. We match the entry by the - * SSAP/DSAP pair and then deliver to the registered datalink that - * matches. The control byte is ignored and handling of such items - * is up to the routine passed the frame. - * - * Unlike the 802.3 datalink we have a list of 802.2 entries as - * there are multiple protocols to demux. The list is currently - * short (3 or 4 entries at most). The current demux assumes this. - */ -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <net/datalink.h> -#include <linux/mm.h> -#include <linux/in.h> -#include <linux/init.h> -#include <net/llc.h> -#include <net/p8022.h> - -static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, - const unsigned char *dest) -{ - llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap); - return 0; -} - -struct datalink_proto *register_8022_client(unsigned char type, - int (*func)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev)) -{ - struct datalink_proto *proto; - - proto = kmalloc(sizeof(*proto), GFP_ATOMIC); - if (proto) { - proto->type[0] = type; - proto->header_length = 3; - proto->request = p8022_request; - proto->sap = llc_sap_open(type, func); - if (!proto->sap) { - kfree(proto); - proto = NULL; - } - } - return proto; -} - -void unregister_8022_client(struct datalink_proto *proto) -{ - llc_sap_put(proto->sap); - kfree(proto); -} - -EXPORT_SYMBOL(register_8022_client); -EXPORT_SYMBOL(unregister_8022_client); - -MODULE_DESCRIPTION("Support for 802.2 demultiplexing off Ethernet"); -MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 41be38264493..06908e37c3d9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -23,7 +23,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> -#include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> diff --git a/net/Kconfig b/net/Kconfig index c3fca69a7c83..202cc595e5e6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -245,7 +245,6 @@ source "net/bridge/netfilter/Kconfig" endif -source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" source "net/tipc/Kconfig" diff --git a/net/Makefile b/net/Makefile index 60ed5190eda8..aac960c41db6 100644 --- a/net/Makefile +++ b/net/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ endif -obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_RDS) += rds/ obj-$(CONFIG_WIRELESS) += wireless/ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index a08132888a3d..c0bc75513355 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -11,7 +11,7 @@ #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> -#include <linux/crc32c.h> +#include <linux/crc32.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/gfp.h> @@ -69,8 +69,6 @@ unsigned int batadv_hardif_generation; static int (*batadv_rx_handler[256])(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - struct workqueue_struct *batadv_event_workqueue; static void batadv_recv_handler_init(void); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 67af435ee04e..692109be2210 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2025.1" +#define BATADV_SOURCE_VERSION "2025.2" #endif /* B.A.T.M.A.N. parameters */ @@ -235,7 +235,6 @@ static inline int batadv_print_vid(unsigned short vid) extern struct list_head batadv_hardif_list; extern unsigned int batadv_hardif_generation; -extern unsigned char batadv_broadcast_addr[]; extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *mesh_iface); diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c index 59e7b5aacbc9..5bbc366f974d 100644 --- a/net/batman-adv/mesh-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -36,7 +36,6 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> -#include <net/net_namespace.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> @@ -77,18 +76,6 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) return 0; } -static int batadv_interface_open(struct net_device *dev) -{ - netif_start_queue(dev); - return 0; -} - -static int batadv_interface_release(struct net_device *dev) -{ - netif_stop_queue(dev); - return 0; -} - /** * batadv_sum_counter() - Sum the cpu-local counters for index 'idx' * @bat_priv: the bat priv with all the mesh interface information @@ -890,8 +877,6 @@ out: static const struct net_device_ops batadv_netdev_ops = { .ndo_init = batadv_meshif_init_late, - .ndo_open = batadv_interface_open, - .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, .ndo_vlan_rx_add_vid = batadv_interface_add_vid, .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 735ac8077821..9d72f4f15b3d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -124,7 +124,9 @@ send_skb_err: int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); + static const u8 broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + return batadv_send_skb_packet(skb, hard_iface, broadcast_addr); } /** diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4a3165920de1..8d0e04e770cb 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -14,7 +14,7 @@ #include <linux/cache.h> #include <linux/compiler.h> #include <linux/container_of.h> -#include <linux/crc32c.h> +#include <linux/crc32.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/etherdevice.h> diff --git a/net/bridge/br.c b/net/bridge/br.c index 183fcb362f9e..0adeafe11a36 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -284,6 +284,9 @@ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, case BR_BOOLOPT_MST_ENABLE: err = br_mst_set_enabled(br, on, extack); break; + case BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION: + br_opt_toggle(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION, on); + break; default: /* shouldn't be called with unsupported options */ WARN_ON(1); @@ -302,6 +305,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); case BR_BOOLOPT_MST_ENABLE: return br_opt_get(br, BROPT_MST_ENABLED); + case BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION: + return br_opt_get(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION); default: /* shouldn't be called with unsupported options */ WARN_ON(1); @@ -363,21 +368,20 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) clear_bit(opt, &br->options); } -static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) +static void __net_exit br_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { struct net_device *dev; - struct net *net; - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) - for_each_netdev(net, dev) - if (netif_is_bridge_master(dev)) - br_dev_delete(dev, dev_to_kill); + ASSERT_RTNL_NET(net); + + for_each_netdev(net, dev) + if (netif_is_bridge_master(dev)) + br_dev_delete(dev, dev_to_kill); } static struct pernet_operations br_net_ops = { - .exit_batch_rtnl = br_net_exit_batch_rtnl, + .exit_rtnl = br_net_exit_rtnl, }; static const struct stp_proto br_stp_proto = { diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 115a23054a58..1e2b51769eec 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -160,6 +160,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { if (br_is_neigh_suppress_enabled(p, vid)) return; + if (is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + parp->ar_op == htons(ARPOP_REQUEST)) + return; if (parp->ar_op != htons(ARPOP_RREQUEST) && parp->ar_op != htons(ARPOP_RREPLY) && (ipv4_is_zeronet(sip) || sip == tip)) { @@ -410,6 +413,10 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (br_is_neigh_suppress_enabled(p, vid)) return; + if (is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) + return; + if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && !msg->icmph.icmp6_solicited) { /* prevent flooding to neigh suppress ports */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 232133a0fd21..5f6ac9bf1527 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -189,7 +189,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || - br_multicast_is_router(brmctx, skb)) { + br_multicast_is_router(brmctx, skb) || + br->dev->flags & IFF_ALLMULTI) { local_rcv = true; DEV_STATS_INC(br->dev, multicast); } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 722203b98ff7..400eb872b403 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -144,6 +144,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) e->flags |= MDB_FLAGS_STAR_EXCL; if (flags & MDB_PG_FLAGS_BLOCKED) e->flags |= MDB_FLAGS_BLOCKED; + if (flags & MDB_PG_FLAGS_OFFLOAD_FAILED) + e->flags |= MDB_FLAGS_OFFLOAD_FAILED; } static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip, @@ -517,16 +519,17 @@ static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg) rtnl_mdb_nlmsg_pg_size(pg); } -void br_mdb_notify(struct net_device *dev, - struct net_bridge_mdb_entry *mp, - struct net_bridge_port_group *pg, - int type) +static void __br_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type, bool notify_switchdev) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - br_switchdev_mdb_notify(dev, mp, pg, type); + if (notify_switchdev) + br_switchdev_mdb_notify(dev, mp, pg, type); skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); if (!skb) @@ -544,6 +547,21 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } +void br_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) +{ + __br_mdb_notify(dev, mp, pg, type, true); +} + +void br_mdb_flag_change_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg) +{ + __br_mdb_notify(dev, mp, pg, RTM_NEWMDB, false); +} + static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct net_device *dev, int ifindex, u16 vid, u32 pid, diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 1820f09ff59c..3f24b4ee49c2 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -80,10 +80,10 @@ static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, if (br_vlan_get_state(v) == state) return; - br_vlan_set_state(v, state); - if (v->vid == vg->pvid) br_vlan_set_pvid_state(vg, state); + + br_vlan_set_state(v, state); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index dcbf058de1e3..7e0b2362b9ee 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2105,12 +2105,17 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) } } -void br_multicast_enable_port(struct net_bridge_port *port) +static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; spin_lock_bh(&br->multicast_lock); - __br_multicast_enable_port_ctx(&port->multicast_ctx); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + __br_multicast_enable_port_ctx(pmctx); spin_unlock_bh(&br->multicast_lock); } @@ -2137,11 +2142,67 @@ static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) br_multicast_rport_del_notify(pmctx, del); } +static void br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) +{ + struct net_bridge *br = pmctx->port->br; + + spin_lock_bh(&br->multicast_lock); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + + __br_multicast_disable_port_ctx(pmctx); + spin_unlock_bh(&br->multicast_lock); +} + +static void br_multicast_toggle_port(struct net_bridge_port *port, bool on) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + if (br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + rcu_read_lock(); + vg = nbp_vlan_group_rcu(port); + if (!vg) { + rcu_read_unlock(); + return; + } + + /* iterate each vlan, toggle vlan multicast context */ + list_for_each_entry_rcu(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast_port *pmctx = + &vlan->port_mcast_ctx; + u8 state = br_vlan_get_state(vlan); + /* enable vlan multicast context when state is + * LEARNING or FORWARDING + */ + if (on && br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(pmctx); + else + br_multicast_disable_port_ctx(pmctx); + } + rcu_read_unlock(); + return; + } +#endif + /* toggle port multicast context when vlan snooping is disabled */ + if (on) + br_multicast_enable_port_ctx(&port->multicast_ctx); + else + br_multicast_disable_port_ctx(&port->multicast_ctx); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + br_multicast_toggle_port(port, true); +} + void br_multicast_disable_port(struct net_bridge_port *port) { - spin_lock_bh(&port->br->multicast_lock); - __br_multicast_disable_port_ctx(&port->multicast_ctx); - spin_unlock_bh(&port->br->multicast_lock); + br_multicast_toggle_port(port, false); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) @@ -4211,6 +4272,32 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) #endif } +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + struct net_bridge *br; + + if (!br_vlan_should_use(v)) + return; + + if (br_vlan_is_master(v)) + return; + + br = v->port->br; + + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + if (br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(&v->port_mcast_ctx); + + /* Multicast is not disabled for the vlan when it goes in + * blocking state because the timers will expire and stop by + * themselves without sending more queries. + */ +#endif +} + void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge *br; @@ -4304,9 +4391,9 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, __br_multicast_open(&br->multicast_ctx); list_for_each_entry(p, &br->port_list, list) { if (on) - br_multicast_disable_port(p); + br_multicast_disable_port_ctx(&p->multicast_ctx); else - br_multicast_enable_port(p); + br_multicast_enable_port_ctx(&p->multicast_ctx); } list_for_each_entry(vlan, &vg->vlan_list, vlist) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d5b3c5936a79..db1bddb330ff 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -306,11 +306,12 @@ struct net_bridge_fdb_flush_desc { u16 vlan_id; }; -#define MDB_PG_FLAGS_PERMANENT BIT(0) -#define MDB_PG_FLAGS_OFFLOAD BIT(1) -#define MDB_PG_FLAGS_FAST_LEAVE BIT(2) -#define MDB_PG_FLAGS_STAR_EXCL BIT(3) -#define MDB_PG_FLAGS_BLOCKED BIT(4) +#define MDB_PG_FLAGS_PERMANENT BIT(0) +#define MDB_PG_FLAGS_OFFLOAD BIT(1) +#define MDB_PG_FLAGS_FAST_LEAVE BIT(2) +#define MDB_PG_FLAGS_STAR_EXCL BIT(3) +#define MDB_PG_FLAGS_BLOCKED BIT(4) +#define MDB_PG_FLAGS_OFFLOAD_FAILED BIT(5) #define PG_SRC_ENT_LIMIT 32 @@ -483,6 +484,7 @@ enum net_bridge_opts { BROPT_VLAN_BRIDGE_BINDING, BROPT_MCAST_VLAN_SNOOPING_ENABLED, BROPT_MST_ENABLED, + BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION, }; struct net_bridge { @@ -1002,6 +1004,8 @@ int br_mdb_hash_init(struct net_bridge *br); void br_mdb_hash_fini(struct net_bridge *br); void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type); +void br_mdb_flag_change_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg); void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type); void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, @@ -1051,6 +1055,7 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); @@ -1342,6 +1347,22 @@ br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx)); } + +static inline void +br_multicast_set_pg_offload_flags(struct net_bridge_port_group *p, + bool offloaded) +{ + p->flags &= ~(MDB_PG_FLAGS_OFFLOAD | MDB_PG_FLAGS_OFFLOAD_FAILED); + p->flags |= (offloaded ? MDB_PG_FLAGS_OFFLOAD : + MDB_PG_FLAGS_OFFLOAD_FAILED); +} + +static inline bool +br_mdb_should_notify(const struct net_bridge *br, u8 changed_flags) +{ + return br_opt_get(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION) && + (changed_flags & MDB_PG_FLAGS_OFFLOAD_FAILED); +} #else static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, @@ -1501,6 +1522,11 @@ static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pm { } +static inline void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, + u8 state) +{ +} + static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { @@ -1861,7 +1887,9 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts); -/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ +/* vlan state manipulation helpers using *_ONCE to annotate lock-free access, + * while br_vlan_set_state() may access data protected by multicast_lock. + */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) { return READ_ONCE(v->state); @@ -1870,6 +1898,7 @@ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state) { WRITE_ONCE(v->state, state); + br_multicast_update_vlan_mcast_ctx(v, state); } static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 7b41ee8740cb..95d7355a0407 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -504,9 +504,10 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri struct net_bridge_mdb_entry *mp; struct net_bridge_port *port = data->port; struct net_bridge *br = port->br; + u8 old_flags; - if (err) - goto err; + if (err == -EOPNOTSUPP) + goto out_free; spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &data->ip); @@ -516,11 +517,15 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri pp = &p->next) { if (p->key.port != port) continue; - p->flags |= MDB_PG_FLAGS_OFFLOAD; + + old_flags = p->flags; + br_multicast_set_pg_offload_flags(p, !err); + if (br_mdb_should_notify(br, old_flags ^ p->flags)) + br_mdb_flag_change_notify(br->dev, mp, p); } out: spin_unlock_bh(&br->multicast_lock); -err: +out_free: kfree(priv); } diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 816bb0fde718..6482de4d8750 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct ip_fraglist_iter iter; struct sk_buff *frag; - if (first_len - hlen > mtu || - skb_headroom(skb) < ll_rs) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < ll_rs) goto slow_path; skb_walk_frags(skb, frag) { - if (frag->len > mtu || - skb_headroom(frag) < hlen + ll_rs) + if (frag->len > mtu) goto blackhole; - if (skb_shared(frag)) + if (skb_shared(frag) || + skb_headroom(frag) < hlen + ll_rs) goto slow_path; } diff --git a/net/core/datagram.c b/net/core/datagram.c index f0693707aece..9ef5442536f5 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -63,6 +63,8 @@ #include <net/busy_poll.h> #include <crypto/hash.h> +#include "devmem.h" + /* * Is a socket 'connection oriented' ? */ @@ -163,8 +165,7 @@ done: return skb; } -struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, - struct sk_buff_head *queue, +struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last) @@ -261,7 +262,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, * However, this function was correct in any case. 8) */ spin_lock_irqsave(&queue->lock, cpu_flags); - skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error, + skb = __skb_try_recv_from_queue(queue, flags, off, &error, last); spin_unlock_irqrestore(&queue->lock, cpu_flags); if (error) @@ -692,9 +693,49 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb, return 0; } +static int +zerocopy_fill_skb_from_devmem(struct sk_buff *skb, struct iov_iter *from, + int length, + struct net_devmem_dmabuf_binding *binding) +{ + int i = skb_shinfo(skb)->nr_frags; + size_t virt_addr, size, off; + struct net_iov *niov; + + /* Devmem filling works by taking an IOVEC from the user where the + * iov_addrs are interpreted as an offset in bytes into the dma-buf to + * send from. We do not support other iter types. + */ + if (iov_iter_type(from) != ITER_IOVEC) + return -EFAULT; + + while (length && iov_iter_count(from)) { + if (i == MAX_SKB_FRAGS) + return -EMSGSIZE; + + virt_addr = (size_t)iter_iov_addr(from); + niov = net_devmem_get_niov_at(binding, virt_addr, &off, &size); + if (!niov) + return -EFAULT; + + size = min_t(size_t, size, length); + size = min_t(size_t, size, iter_iov_len(from)); + + get_netmem(net_iov_to_netmem(niov)); + skb_add_rx_frag_netmem(skb, i, net_iov_to_netmem(niov), off, + size, PAGE_SIZE); + iov_iter_advance(from, size); + length -= size; + i++; + } + + return 0; +} + int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, - size_t length) + size_t length, + struct net_devmem_dmabuf_binding *binding) { unsigned long orig_size = skb->truesize; unsigned long truesize; @@ -702,6 +743,8 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, if (msg && msg->msg_ubuf && msg->sg_from_iter) ret = msg->sg_from_iter(skb, from, length); + else if (binding) + ret = zerocopy_fill_skb_from_devmem(skb, from, length, binding); else ret = zerocopy_fill_skb_from_iter(skb, from, length); @@ -735,7 +778,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) if (skb_copy_datagram_from_iter(skb, 0, from, copy)) return -EFAULT; - return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U); + return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U, NULL); } EXPORT_SYMBOL(zerocopy_sg_from_iter); diff --git a/net/core/dev.c b/net/core/dev.c index 0d891634c692..fccf2167b235 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -462,7 +462,9 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -DEFINE_PER_CPU(struct page_pool *, system_page_pool); +DEFINE_PER_CPU(struct page_pool_bh, system_page_pool) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; #ifdef CONFIG_LOCKDEP /* @@ -828,7 +830,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id) dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); if (!dev) return NULL; @@ -1039,10 +1041,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) * This helper is intended for locking net_device after it has been looked up * using a lockless lookup helper. Lock prevents the instance from going away. */ -struct net_device *__netdev_put_lock(struct net_device *dev) +struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) { netdev_lock(dev); - if (dev->reg_state > NETREG_REGISTERED) { + if (dev->reg_state > NETREG_REGISTERED || + dev->moving_ns || !net_eq(dev_net(dev), net)) { netdev_unlock(dev); dev_put(dev); return NULL; @@ -1051,6 +1054,20 @@ struct net_device *__netdev_put_lock(struct net_device *dev) return dev; } +static struct net_device * +__netdev_put_lock_ops_compat(struct net_device *dev, struct net *net) +{ + netdev_lock_ops_compat(dev); + if (dev->reg_state > NETREG_REGISTERED || + dev->moving_ns || !net_eq(dev_net(dev), net)) { + netdev_unlock_ops_compat(dev); + dev_put(dev); + return NULL; + } + dev_put(dev); + return dev; +} + /** * netdev_get_by_index_lock() - find a device by its ifindex * @net: the applicable net namespace @@ -1070,7 +1087,19 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) if (!dev) return NULL; - return __netdev_put_lock(dev); + return __netdev_put_lock(dev, net); +} + +struct net_device * +netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) +{ + struct net_device *dev; + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return NULL; + + return __netdev_put_lock_ops_compat(dev, net); } struct net_device * @@ -1090,7 +1119,32 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev, dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); + if (dev) + return dev; + + (*index)++; + } while (true); +} + +struct net_device * +netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev, + unsigned long *index) +{ + if (dev) + netdev_unlock_ops_compat(dev); + + do { + rcu_read_lock(); + dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT); + if (!dev) { + rcu_read_unlock(); + return NULL; + } + dev_hold(dev); + rcu_read_unlock(); + + dev = __netdev_put_lock_ops_compat(dev, net); if (dev) return dev; @@ -3844,12 +3898,42 @@ sw_checksum: } EXPORT_SYMBOL(skb_csum_hwoffload_help); +static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb, + struct net_device *dev) +{ + struct skb_shared_info *shinfo; + struct net_iov *niov; + + if (likely(skb_frags_readable(skb))) + goto out; + + if (!dev->netmem_tx) + goto out_free; + + shinfo = skb_shinfo(skb); + + if (shinfo->nr_frags > 0) { + niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); + if (net_is_devmem_iov(niov) && + net_devmem_iov_binding(niov)->dev != dev) + goto out_free; + } + +out: + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again) { netdev_features_t features; - if (!skb_frags_readable(skb)) - goto out_kfree_skb; + skb = validate_xmit_unreadable_skb(skb, dev); + if (unlikely(!skb)) + goto out_null; features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); @@ -4946,7 +5030,8 @@ static void rps_trigger_softirq(void *data) struct softnet_data *sd = data; ____napi_schedule(sd, &sd->backlog); - sd->received_rps++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(sd->received_rps, sd->received_rps + 1); } #endif /* CONFIG_RPS */ @@ -5031,7 +5116,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); + new_flow = hash_32(skb_get_hash(skb), fl->log_buckets); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -5042,7 +5127,8 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) fl->buckets[old_flow]--; if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { - fl->count++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(fl->count, fl->count + 1); rcu_read_unlock(); return true; } @@ -5238,7 +5324,10 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog) struct sk_buff *skb = *pskb; int err, hroom, troom; - if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog)) + local_lock_nested_bh(&system_page_pool.bh_lock); + err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog); + local_unlock_nested_bh(&system_page_pool.bh_lock); + if (!err) return 0; /* In case we have to go down the path and also linearize, @@ -7515,7 +7604,8 @@ start: */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) { - sd->time_squeeze++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(sd->time_squeeze, sd->time_squeeze + 1); break; } } @@ -11047,8 +11137,7 @@ int register_netdevice(struct net_device *dev) * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing)) rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL); out: @@ -11971,8 +12060,7 @@ void unregister_netdevice_many_notify(struct list_head *head, */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing)) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL, NULL, 0, portid, nlh); @@ -12146,7 +12234,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); - netdev_unlock_ops(dev); + + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + dev->moving_ns = true; + netdev_unlock(dev); synchronize_net(); @@ -12184,7 +12276,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, move_netdevice_notifiers_dev_net(dev, net); /* Actually switch the network namespace */ + netdev_lock(dev); dev_net_set(dev, net); + netdev_unlock(dev); dev->ifindex = new_ifindex; if (new_name[0]) { @@ -12210,7 +12304,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); - netdev_lock_ops(dev); + netdev_lock(dev); + dev->moving_ns = false; + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); + /* Add the device back in the hashes */ list_netdevice(dev); /* Notify protocols, that a new device appeared. */ @@ -12621,7 +12719,7 @@ static int net_page_pool_create(int cpuid) return err; } - per_cpu(system_page_pool, cpuid) = pp_ptr; + per_cpu(system_page_pool.pool, cpuid) = pp_ptr; #endif return 0; } @@ -12751,13 +12849,13 @@ out: for_each_possible_cpu(i) { struct page_pool *pp_ptr; - pp_ptr = per_cpu(system_page_pool, i); + pp_ptr = per_cpu(system_page_pool.pool, i); if (!pp_ptr) continue; xdp_unreg_page_pool(pp_ptr); page_pool_destroy(pp_ptr); - per_cpu(system_page_pool, i) = NULL; + per_cpu(system_page_pool.pool, i) = NULL; } } diff --git a/net/core/dev.h b/net/core/dev.h index 7ee203395d8e..e93f36b7ddf3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -15,8 +15,9 @@ struct cpumask; /* Random bits of netdevice that don't need to be exposed */ #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ struct sd_flow_limit { - u64 count; - unsigned int num_buckets; + struct rcu_head rcu; + unsigned int count; + u8 log_buckets; unsigned int history_head; u16 history[FLOW_LIMIT_HISTORY]; u8 buckets[]; @@ -29,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); -struct net_device *__netdev_put_lock(struct net_device *dev); +struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index); @@ -41,6 +42,21 @@ DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T)); (var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \ ifindex++) +struct net_device * +netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex); +struct net_device * +netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev, + unsigned long *index); + +DEFINE_FREE(netdev_unlock_ops_compat, struct net_device *, + if (_T) netdev_unlock_ops_compat(_T)); + +#define for_each_netdev_lock_ops_compat_scoped(net, var_name, ifindex) \ + for (struct net_device *var_name __free(netdev_unlock_ops_compat) = NULL; \ + (var_name = netdev_xa_find_lock_ops_compat(net, var_name, \ + &ifindex)); \ + ifindex++) + #ifdef CONFIG_PROC_FS int __init dev_proc_init(void); #else diff --git a/net/core/devmem.c b/net/core/devmem.c index 2db428ab6b8b..0dba26baae18 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -16,6 +16,7 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/page_pool/memory_provider.h> +#include <net/sock.h> #include <trace/events/page_pool.h> #include "devmem.h" @@ -30,7 +31,7 @@ static const struct memory_provider_ops dmabuf_devmem_ops; bool net_is_devmem_iov(struct net_iov *niov) { - return niov->pp->mp_ops == &dmabuf_devmem_ops; + return niov->type == NET_IOV_DMABUF; } static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, @@ -52,8 +53,10 @@ static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } -void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) +void __net_devmem_dmabuf_binding_free(struct work_struct *wq) { + struct net_devmem_dmabuf_binding *binding = container_of(wq, typeof(*binding), unbind_w); + size_t size, avail; gen_pool_for_each_chunk(binding->chunk_pool, @@ -71,8 +74,10 @@ void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) dma_buf_detach(binding->dmabuf, binding->attachment); dma_buf_put(binding->dmabuf); xa_destroy(&binding->bound_rxqs); + kvfree(binding->tx_vec); kfree(binding); } +EXPORT_SYMBOL(__net_devmem_dmabuf_binding_free); struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) @@ -117,6 +122,13 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) unsigned long xa_idx; unsigned int rxq_idx; + xa_erase(&net_devmem_dmabuf_bindings, binding->id); + + /* Ensure no tx net_devmem_lookup_dmabuf() are in flight after the + * erase. + */ + synchronize_net(); + if (binding->list.next) list_del(&binding->list); @@ -131,8 +143,6 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } - xa_erase(&net_devmem_dmabuf_bindings, binding->id); - net_devmem_dmabuf_binding_put(binding); } @@ -166,8 +176,9 @@ err_close_rxq: } struct net_devmem_dmabuf_binding * -net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, - struct netlink_ext_ack *extack) +net_devmem_bind_dmabuf(struct net_device *dev, + enum dma_data_direction direction, + unsigned int dmabuf_fd, struct netlink_ext_ack *extack) { struct net_devmem_dmabuf_binding *binding; static u32 id_alloc_next; @@ -189,13 +200,6 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, } binding->dev = dev; - - err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, - binding, xa_limit_32b, &id_alloc_next, - GFP_KERNEL); - if (err < 0) - goto err_free_binding; - xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); refcount_set(&binding->ref, 1); @@ -208,26 +212,36 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, if (IS_ERR(binding->attachment)) { err = PTR_ERR(binding->attachment); NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); - goto err_free_id; + goto err_free_binding; } binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, - DMA_FROM_DEVICE); + direction); if (IS_ERR(binding->sgt)) { err = PTR_ERR(binding->sgt); NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); goto err_detach; } + if (direction == DMA_TO_DEVICE) { + binding->tx_vec = kvmalloc_array(dmabuf->size / PAGE_SIZE, + sizeof(struct net_iov *), + GFP_KERNEL); + if (!binding->tx_vec) { + err = -ENOMEM; + goto err_unmap; + } + } + /* For simplicity we expect to make PAGE_SIZE allocations, but the * binding can be much more flexible than that. We may be able to * allocate MTU sized chunks here. Leave that for future work... */ - binding->chunk_pool = - gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); + binding->chunk_pool = gen_pool_create(PAGE_SHIFT, + dev_to_node(&dev->dev)); if (!binding->chunk_pool) { err = -ENOMEM; - goto err_unmap; + goto err_tx_vec; } virtual = 0; @@ -268,27 +282,36 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, for (i = 0; i < owner->area.num_niovs; i++) { niov = &owner->area.niovs[i]; + niov->type = NET_IOV_DMABUF; niov->owner = &owner->area; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); + if (direction == DMA_TO_DEVICE) + binding->tx_vec[owner->area.base_virtual / PAGE_SIZE + i] = niov; } virtual += len; } + err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, + binding, xa_limit_32b, &id_alloc_next, + GFP_KERNEL); + if (err < 0) + goto err_free_chunks; + return binding; err_free_chunks: gen_pool_for_each_chunk(binding->chunk_pool, net_devmem_dmabuf_free_chunk_owner, NULL); gen_pool_destroy(binding->chunk_pool); +err_tx_vec: + kvfree(binding->tx_vec); err_unmap: dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, DMA_FROM_DEVICE); err_detach: dma_buf_detach(dmabuf, binding->attachment); -err_free_id: - xa_erase(&net_devmem_dmabuf_bindings, binding->id); err_free_binding: kfree(binding); err_put_dmabuf: @@ -296,6 +319,74 @@ err_put_dmabuf: return ERR_PTR(err); } +struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id) +{ + struct net_devmem_dmabuf_binding *binding; + + rcu_read_lock(); + binding = xa_load(&net_devmem_dmabuf_bindings, id); + if (binding) { + if (!net_devmem_dmabuf_binding_get(binding)) + binding = NULL; + } + rcu_read_unlock(); + + return binding; +} + +void net_devmem_get_net_iov(struct net_iov *niov) +{ + net_devmem_dmabuf_binding_get(net_devmem_iov_binding(niov)); +} + +void net_devmem_put_net_iov(struct net_iov *niov) +{ + net_devmem_dmabuf_binding_put(net_devmem_iov_binding(niov)); +} + +struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, + unsigned int dmabuf_id) +{ + struct net_devmem_dmabuf_binding *binding; + struct dst_entry *dst = __sk_dst_get(sk); + int err = 0; + + binding = net_devmem_lookup_dmabuf(dmabuf_id); + if (!binding || !binding->tx_vec) { + err = -EINVAL; + goto out_err; + } + + /* The dma-addrs in this binding are only reachable to the corresponding + * net_device. + */ + if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) { + err = -ENODEV; + goto out_err; + } + + return binding; + +out_err: + if (binding) + net_devmem_dmabuf_binding_put(binding); + + return ERR_PTR(err); +} + +struct net_iov * +net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding, + size_t virt_addr, size_t *off, size_t *size) +{ + if (virt_addr >= binding->dmabuf->size) + return NULL; + + *off = virt_addr % PAGE_SIZE; + *size = PAGE_SIZE - *off; + + return binding->tx_vec[virt_addr / PAGE_SIZE]; +} + /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) diff --git a/net/core/devmem.h b/net/core/devmem.h index a1aabc9685cc..58d8d3c1b945 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -25,12 +25,20 @@ struct net_devmem_dmabuf_binding { /* The user holds a ref (via the netlink API) for as long as they want * the binding to remain alive. Each page pool using this binding holds - * a ref to keep the binding alive. Each allocated net_iov holds a - * ref. + * a ref to keep the binding alive. The page_pool does not release the + * ref until all the net_iovs allocated from this binding are released + * back to the page_pool. * * The binding undos itself and unmaps the underlying dmabuf once all * those refs are dropped and the binding is no longer desired or in * use. + * + * net_devmem_get_net_iov() on dmabuf net_iovs will increment this + * reference, making sure that the binding remains alive until all the + * net_iovs are no longer used. net_iovs allocated from this binding + * that are stuck in the TX path for any reason (such as awaiting + * retransmits) hold a reference to the binding until the skb holding + * them is freed. */ refcount_t ref; @@ -46,6 +54,14 @@ struct net_devmem_dmabuf_binding { * active. */ u32 id; + + /* Array of net_iov pointers for this binding, sorted by virtual + * address. This array is convenient to map the virtual addresses to + * net_iovs in the TX path. + */ + struct net_iov **tx_vec; + + struct work_struct unbind_w; }; #if defined(CONFIG_NET_DEVMEM) @@ -62,14 +78,17 @@ struct dmabuf_genpool_chunk_owner { dma_addr_t base_dma_addr; }; -void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); +void __net_devmem_dmabuf_binding_free(struct work_struct *wq); struct net_devmem_dmabuf_binding * -net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, - struct netlink_ext_ack *extack); +net_devmem_bind_dmabuf(struct net_device *dev, + enum dma_data_direction direction, + unsigned int dmabuf_fd, struct netlink_ext_ack *extack); +struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id); void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); +void net_devmem_bind_tx_release(struct sock *sk); static inline struct dmabuf_genpool_chunk_owner * net_devmem_iov_to_chunk_owner(const struct net_iov *niov) @@ -98,10 +117,10 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); } -static inline void +static inline bool net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) { - refcount_inc(&binding->ref); + return refcount_inc_not_zero(&binding->ref); } static inline void @@ -110,30 +129,57 @@ net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding) if (!refcount_dec_and_test(&binding->ref)) return; - __net_devmem_dmabuf_binding_free(binding); + INIT_WORK(&binding->unbind_w, __net_devmem_dmabuf_binding_free); + schedule_work(&binding->unbind_w); } +void net_devmem_get_net_iov(struct net_iov *niov); +void net_devmem_put_net_iov(struct net_iov *niov); + struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); bool net_is_devmem_iov(struct net_iov *niov); +struct net_devmem_dmabuf_binding * +net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id); +struct net_iov * +net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding, size_t addr, + size_t *off, size_t *size); #else struct net_devmem_dmabuf_binding; static inline void -__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) +net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding) +{ +} + +static inline void net_devmem_get_net_iov(struct net_iov *niov) +{ +} + +static inline void net_devmem_put_net_iov(struct net_iov *niov) +{ +} + +static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq) { } static inline struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, + enum dma_data_direction direction, struct netlink_ext_ack *extack) { return ERR_PTR(-EOPNOTSUPP); } +static inline struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id) +{ + return NULL; +} + static inline void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) { @@ -172,6 +218,25 @@ static inline bool net_is_devmem_iov(struct net_iov *niov) { return false; } + +static inline struct net_devmem_dmabuf_binding * +net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct net_iov * +net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding, size_t addr, + size_t *off, size_t *size) +{ + return NULL; +} + +static inline struct net_devmem_dmabuf_binding * +net_devmem_iov_binding(const struct net_iov *niov) +{ + return NULL; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 70c634b9e7b0..93a04d18e505 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -17,6 +17,7 @@ struct dst_cache_pcpu { unsigned long refresh_ts; struct dst_entry *dst; + local_lock_t bh_lock; u32 cookie; union { struct in_addr in_saddr; @@ -65,10 +66,15 @@ fail: struct dst_entry *dst_cache_get(struct dst_cache *dst_cache) { + struct dst_entry *dst; + if (!dst_cache->cache) return NULL; - return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); + local_lock_nested_bh(&dst_cache->cache->bh_lock); + dst = dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); + local_unlock_nested_bh(&dst_cache->cache->bh_lock); + return dst; } EXPORT_SYMBOL_GPL(dst_cache_get); @@ -80,12 +86,16 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) if (!dst_cache->cache) return NULL; + local_lock_nested_bh(&dst_cache->cache->bh_lock); idst = this_cpu_ptr(dst_cache->cache); dst = dst_cache_per_cpu_get(dst_cache, idst); - if (!dst) + if (!dst) { + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return NULL; + } *saddr = idst->in_saddr.s_addr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return dst_rtable(dst); } EXPORT_SYMBOL_GPL(dst_cache_get_ip4); @@ -98,9 +108,11 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, if (!dst_cache->cache) return; + local_lock_nested_bh(&dst_cache->cache->bh_lock); idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(idst, dst, 0); idst->in_saddr.s_addr = saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); } EXPORT_SYMBOL_GPL(dst_cache_set_ip4); @@ -113,10 +125,13 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, if (!dst_cache->cache) return; + local_lock_nested_bh(&dst_cache->cache->bh_lock); + idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(idst, dst, rt6_get_cookie(dst_rt6_info(dst))); idst->in6_saddr = *saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); @@ -129,12 +144,17 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, if (!dst_cache->cache) return NULL; + local_lock_nested_bh(&dst_cache->cache->bh_lock); + idst = this_cpu_ptr(dst_cache->cache); dst = dst_cache_per_cpu_get(dst_cache, idst); - if (!dst) + if (!dst) { + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return NULL; + } *saddr = idst->in6_saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return dst; } EXPORT_SYMBOL_GPL(dst_cache_get_ip6); @@ -142,10 +162,14 @@ EXPORT_SYMBOL_GPL(dst_cache_get_ip6); int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp) { + unsigned int i; + dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu, gfp | __GFP_ZERO); if (!dst_cache->cache) return -ENOMEM; + for_each_possible_cpu(i) + local_lock_init(&per_cpu_ptr(dst_cache->cache, i)->bh_lock); dst_cache_reset(dst_cache); return 0; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 7af302080a66..8ca634964e36 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -874,13 +874,14 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, bool rtnl_held) { struct fib_rule *rule = NULL, *r, *last = NULL; - struct fib_rule_hdr *frh = nlmsg_data(nlh); int err = -EINVAL, unresolved = 0; struct fib_rules_ops *ops = NULL; struct nlattr *tb[FRA_MAX + 1]; bool user_priority = false; + struct fib_rule_hdr *frh; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + frh = nlmsg_payload(nlh, sizeof(*frh)); + if (!frh) { NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; } @@ -1002,13 +1003,14 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, bool rtnl_held) { struct fib_rule *rule = NULL, *nlrule = NULL; - struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct nlattr *tb[FRA_MAX+1]; bool user_priority = false; + struct fib_rule_hdr *frh; int err = -EINVAL; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + frh = nlmsg_payload(nlh, sizeof(*frh)); + if (!frh) { NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; } @@ -1260,12 +1262,12 @@ static int fib_valid_dumprule_req(const struct nlmsghdr *nlh, { struct fib_rule_hdr *frh; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + frh = nlmsg_payload(nlh, sizeof(*frh)); + if (!frh) { NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request"); return -EINVAL; } - frh = nlmsg_data(nlh); if (frh->dst_len || frh->src_len || frh->tos || frh->table || frh->res1 || frh->res2 || frh->action || frh->flags) { NL_SET_ERR_MSG(extack, diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index 941e26c1343d..9e9fb25314b9 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -18,9 +18,12 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, /* Keep enum and don't add default to trigger -Werror=switch */ switch (cmd) { + case NETDEV_XDP_FEAT_CHANGE: + netdev_assert_locked(dev); + fallthrough; + case NETDEV_CHANGE: case NETDEV_REGISTER: case NETDEV_UP: - case NETDEV_CHANGE: netdev_ops_assert_locked(dev); fallthrough; case NETDEV_DOWN: @@ -58,7 +61,6 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, case NETDEV_OFFLOAD_XSTATS_DISABLE: case NETDEV_OFFLOAD_XSTATS_REPORT_USED: case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: - case NETDEV_XDP_FEAT_CHANGE: ASSERT_RTNL(); break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a07249b59ae1..254067b719da 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2430,12 +2430,12 @@ static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, { struct ndtmsg *ndtm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { + ndtm = nlmsg_payload(nlh, sizeof(*ndtm)); + if (!ndtm) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); return -EINVAL; } - ndtm = nlmsg_data(nlh); if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); return -EINVAL; @@ -2747,12 +2747,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, if (strict_check) { struct ndmsg *ndm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); @@ -2855,12 +2855,12 @@ static int neigh_valid_get_req(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 3e92bf0f9060..4f0f0709a1cb 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -132,8 +132,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); + /* Pairs with WRITE_ONCE() in skb_flow_limit() */ if (fl) - flow_limit_count = fl->count; + flow_limit_count = READ_ONCE(fl->count); rcu_read_unlock(); #endif @@ -144,11 +145,11 @@ static int softnet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", - sd->processed, atomic_read(&sd->dropped), - sd->time_squeeze, 0, + READ_ONCE(sd->processed), atomic_read(&sd->dropped), + READ_ONCE(sd->time_squeeze), 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ - sd->received_rps, flow_limit_count, + READ_ONCE(sd->received_rps), flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0dfdf791ece..42ee7fce3d95 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -163,16 +163,45 @@ static void ops_pre_exit_list(const struct pernet_operations *ops, } } +static void ops_exit_rtnl_list(const struct list_head *ops_list, + const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + const struct pernet_operations *saved_ops = ops; + LIST_HEAD(dev_kill_list); + struct net *net; + + rtnl_lock(); + + list_for_each_entry(net, net_exit_list, exit_list) { + __rtnl_net_lock(net); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, ops_list, list) { + if (ops->exit_rtnl) + ops->exit_rtnl(net, &dev_kill_list); + } + + __rtnl_net_unlock(net); + } + + unregister_netdevice_many(&dev_kill_list); + + rtnl_unlock(); +} + static void ops_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { - struct net *net; if (ops->exit) { + struct net *net; + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); cond_resched(); } } + if (ops->exit_batch) ops->exit_batch(net_exit_list); } @@ -188,6 +217,56 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static void ops_undo_list(const struct list_head *ops_list, + const struct pernet_operations *ops, + struct list_head *net_exit_list, + bool expedite_rcu) +{ + const struct pernet_operations *saved_ops; + bool hold_rtnl = false; + + if (!ops) + ops = list_entry(ops_list, typeof(*ops), list); + + saved_ops = ops; + + list_for_each_entry_continue_reverse(ops, ops_list, list) { + hold_rtnl |= !!ops->exit_rtnl; + ops_pre_exit_list(ops, net_exit_list); + } + + /* Another CPU might be rcu-iterating the list, wait for it. + * This needs to be before calling the exit() notifiers, so the + * rcu_barrier() after ops_undo_list() isn't sufficient alone. + * Also the pre_exit() and exit() methods need this barrier. + */ + if (expedite_rcu) + synchronize_rcu_expedited(); + else + synchronize_rcu(); + + if (hold_rtnl) + ops_exit_rtnl_list(ops_list, saved_ops, net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, ops_list, list) + ops_exit_list(ops, net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, ops_list, list) + ops_free_list(ops, net_exit_list); +} + +static void ops_undo_single(struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + LIST_HEAD(ops_list); + + list_add(&ops->list, &ops_list); + ops_undo_list(&ops_list, NULL, net_exit_list, false); + list_del(&ops->list); +} + /* should be called with nsid_lock held */ static int alloc_netid(struct net *net, struct net *peer, int reqid) { @@ -351,9 +430,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ static __net_init int setup_net(struct net *net) { /* Must be called with pernet_ops_rwsem held */ - const struct pernet_operations *ops, *saved_ops; + const struct pernet_operations *ops; LIST_HEAD(net_exit_list); - LIST_HEAD(dev_kill_list); int error = 0; preempt_disable(); @@ -376,29 +454,7 @@ out_undo: * for the pernet modules whose init functions did not fail. */ list_add(&net->exit_list, &net_exit_list); - saved_ops = ops; - list_for_each_entry_continue_reverse(ops, &pernet_list, list) - ops_pre_exit_list(ops, &net_exit_list); - - synchronize_rcu(); - - ops = saved_ops; - rtnl_lock(); - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit_batch_rtnl) - ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); - } - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); - - ops = saved_ops; - list_for_each_entry_continue_reverse(ops, &pernet_list, list) - ops_exit_list(ops, &net_exit_list); - - ops = saved_ops; - list_for_each_entry_continue_reverse(ops, &pernet_list, list) - ops_free_list(ops, &net_exit_list); - + ops_undo_list(&pernet_list, ops, &net_exit_list, false); rcu_barrier(); goto out; } @@ -594,11 +650,9 @@ struct task_struct *cleanup_net_task; static void cleanup_net(struct work_struct *work) { - const struct pernet_operations *ops; - struct net *net, *tmp, *last; struct llist_node *net_kill_list; + struct net *net, *tmp, *last; LIST_HEAD(net_exit_list); - LIST_HEAD(dev_kill_list); cleanup_net_task = current; @@ -629,33 +683,7 @@ static void cleanup_net(struct work_struct *work) list_add_tail(&net->exit_list, &net_exit_list); } - /* Run all of the network namespace pre_exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) - ops_pre_exit_list(ops, &net_exit_list); - - /* - * Another CPU might be rcu-iterating the list, wait for it. - * This needs to be before calling the exit() notifiers, so - * the rcu_barrier() below isn't sufficient alone. - * Also the pre_exit() and exit() methods need this barrier. - */ - synchronize_rcu_expedited(); - - rtnl_lock(); - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit_batch_rtnl) - ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); - } - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); - - /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) - ops_exit_list(ops, &net_exit_list); - - /* Free the net generic variables */ - list_for_each_entry_reverse(ops, &pernet_list, list) - ops_free_list(ops, &net_exit_list); + ops_undo_list(&pernet_list, NULL, &net_exit_list, true); up_read(&pernet_ops_rwsem); @@ -1239,31 +1267,13 @@ void __init net_ns_init(void) rtnl_register_many(net_ns_rtnl_msg_handlers); } -static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) -{ - ops_pre_exit_list(ops, net_exit_list); - synchronize_rcu(); - - if (ops->exit_batch_rtnl) { - LIST_HEAD(dev_kill_list); - - rtnl_lock(); - ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); - } - ops_exit_list(ops, net_exit_list); - - ops_free_list(ops, net_exit_list); -} - #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { + LIST_HEAD(net_exit_list); struct net *net; int error; - LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); if (ops->init || ops->id) { @@ -1282,21 +1292,21 @@ static int __register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - free_exit_list(ops, &net_exit_list); + ops_undo_single(ops, &net_exit_list); return error; } static void __unregister_pernet_operations(struct pernet_operations *ops) { - struct net *net; LIST_HEAD(net_exit_list); + struct net *net; - list_del(&ops->list); /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); - free_exit_list(ops, &net_exit_list); + list_del(&ops->list); + ops_undo_single(ops, &net_exit_list); } #else @@ -1318,8 +1328,9 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) list_del(&ops->list); } else { LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); - free_exit_list(ops, &net_exit_list); + ops_undo_single(ops, &net_exit_list); } } diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 739f7b6506a6..4fc44587f493 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -99,6 +99,12 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPE [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, }; +/* NETDEV_CMD_BIND_TX - do */ +static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] = { + [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -190,6 +196,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_BIND_TX, + .doit = netdev_nl_bind_tx_doit, + .policy = netdev_bind_tx_nl_policy, + .maxattr = NETDEV_A_DMABUF_FD, + .flags = GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 17d39fd64c94..cf3fad74511f 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -34,6 +34,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index a877693fecd6..762570dcda61 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -38,6 +38,8 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, u64 xdp_rx_meta = 0; void *hdr; + netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */ + hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; @@ -122,15 +124,14 @@ int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) if (!rsp) return -ENOMEM; - rtnl_lock(); - - netdev = __dev_get_by_index(genl_info_net(info), ifindex); - if (netdev) - err = netdev_nl_dev_fill(netdev, rsp, info); - else + netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + if (!netdev) { err = -ENODEV; + goto err_free_msg; + } - rtnl_unlock(); + err = netdev_nl_dev_fill(netdev, rsp, info); + netdev_unlock(netdev); if (err) goto err_free_msg; @@ -146,18 +147,15 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); struct net *net = sock_net(skb->sk); - struct net_device *netdev; - int err = 0; + int err; - rtnl_lock(); - for_each_netdev_dump(net, netdev, ctx->ifindex) { + for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); if (err < 0) - break; + return err; } - rtnl_unlock(); - return err; + return 0; } static int @@ -481,18 +479,15 @@ int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) if (!rsp) return -ENOMEM; - rtnl_lock(); - - netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info), + ifindex); if (netdev) { err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); - netdev_unlock(netdev); + netdev_unlock_ops_compat(netdev); } else { err = -ENODEV; } - rtnl_unlock(); - if (err) goto err_free_msg; @@ -541,17 +536,17 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); - rtnl_lock(); if (ifindex) { - netdev = netdev_get_by_index_lock(net, ifindex); + netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); if (netdev) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); - netdev_unlock(netdev); + netdev_unlock_ops_compat(netdev); } else { err = -ENODEV; } } else { - for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { + for_each_netdev_lock_ops_compat_scoped(net, netdev, + ctx->ifindex) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); if (err < 0) break; @@ -559,7 +554,6 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) ctx->txq_idx = 0; } } - rtnl_unlock(); return err; } @@ -832,26 +826,31 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, if (info->attrs[NETDEV_A_QSTATS_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]); - rtnl_lock(); if (ifindex) { - netdev = __dev_get_by_index(net, ifindex); - if (netdev && netdev->stat_ops) { - err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, - info, ctx); - } else { + netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); + if (!netdev) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QSTATS_IFINDEX]); - err = netdev ? -EOPNOTSUPP : -ENODEV; + return -ENODEV; } - } else { - for_each_netdev_dump(net, netdev, ctx->ifindex) { + if (netdev->stat_ops) { err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, info, ctx); - if (err < 0) - break; + } else { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NETDEV_A_QSTATS_IFINDEX]); + err = -EOPNOTSUPP; } + netdev_unlock_ops_compat(netdev); + return err; + } + + for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) { + err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, + info, ctx); + if (err < 0) + break; } - rtnl_unlock(); return err; } @@ -908,7 +907,8 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock; } - binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); + binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd, + info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock; @@ -969,6 +969,83 @@ err_genlmsg_free: return err; } +int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_devmem_dmabuf_binding *binding; + struct netdev_nl_sock *priv; + struct net_device *netdev; + u32 ifindex, dmabuf_fd; + struct sk_buff *rsp; + int err = 0; + void *hdr; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); + dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); + + priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_genlmsg_free; + } + + mutex_lock(&priv->lock); + + netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + if (!netdev) { + err = -ENODEV; + goto err_unlock_sock; + } + + if (!netif_device_present(netdev)) { + err = -ENODEV; + goto err_unlock_netdev; + } + + if (!netdev->netmem_tx) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(info->extack, + "Driver does not support netmem TX"); + goto err_unlock_netdev; + } + + binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, + info->extack); + if (IS_ERR(binding)) { + err = PTR_ERR(binding); + goto err_unlock_netdev; + } + + list_add(&binding->list, &priv->bindings); + + nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); + genlmsg_end(rsp, hdr); + + netdev_unlock(netdev); + mutex_unlock(&priv->lock); + + return genlmsg_reply(rsp, info); + +err_unlock_netdev: + netdev_unlock(netdev); +err_unlock_sock: + mutex_unlock(&priv->lock); +err_genlmsg_free: + nlmsg_free(rsp); + return err; +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); @@ -1009,10 +1086,14 @@ static int netdev_genl_netdevice_event(struct notifier_block *nb, switch (event) { case NETDEV_REGISTER: + netdev_lock_ops_to_full(netdev); netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); + netdev_unlock_full_to_ops(netdev); break; case NETDEV_UNREGISTER: + netdev_lock(netdev); netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); + netdev_unlock(netdev); break; case NETDEV_XDP_FEAT_CHANGE: netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 7eadb8393e00..cd95394399b4 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -5,7 +5,7 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp_magic; + return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) @@ -15,9 +15,16 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) static inline void netmem_clear_pp_magic(netmem_ref netmem) { + WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK); + __netmem_clear_lsb(netmem)->pp_magic = 0; } +static inline bool netmem_is_pp(netmem_ref netmem) +{ + return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; +} + static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) { __netmem_clear_lsb(netmem)->pp = pool; @@ -28,4 +35,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem, { __netmem_clear_lsb(netmem)->dma_addr = dma_addr; } + +static inline unsigned long netmem_get_dma_index(netmem_ref netmem) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return 0; + + magic = __netmem_clear_lsb(netmem)->pp_magic; + + return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; +} + +static inline void netmem_set_dma_index(netmem_ref netmem, + unsigned long id) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return; + + magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); + __netmem_clear_lsb(netmem)->pp_magic = magic; +} #endif diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 7745ad924ae2..974f3eef2efa 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -276,8 +276,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->dma_map) - get_device(pool->p.dev); + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { netdev_assert_locked(pool->slow.netdev); @@ -320,9 +319,7 @@ free_ptr_ring: static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->dma_map) - put_device(pool->p.dev); + xa_destroy(&pool->dma_mapped); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) @@ -463,13 +460,21 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { - if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) { + rcu_read_lock(); + /* re-check under rcu_read_lock() to sync with page_pool_scrub() */ + if (pool->dma_sync) + __page_pool_dma_sync_for_device(pool, netmem, + dma_sync_size); + rcu_read_unlock(); + } } -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; + u32 id; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -483,15 +488,30 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr_netmem(netmem, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) { + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); goto unmap_failed; + } + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto unset_failed; + } + + netmem_set_dma_index(netmem, id); page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; +unset_failed: + page_pool_set_dma_addr_netmem(netmem, 0); unmap_failed: - WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); @@ -508,7 +528,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } @@ -554,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { put_page(netmem_to_page(netmem)); continue; } @@ -656,6 +676,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; dma_addr_t dma; if (!pool->dma_map) @@ -664,6 +686,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, */ return; + id = netmem_get_dma_index(netmem); + if (!id) + return; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return; + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ @@ -671,6 +704,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); + netmem_set_dma_index(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -805,6 +839,10 @@ static bool page_pool_napi_local(const struct page_pool *pool) const struct napi_struct *napi; u32 cpuid; + /* On PREEMPT_RT the softirq can be preempted by the consumer */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return false; + if (unlikely(!in_softirq())) return false; @@ -1080,8 +1118,29 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++ && pool->dma_map) { + if (pool->dma_sync) { + /* Disable page_pool_dma_sync_for_device() */ + pool->dma_sync = false; + + /* Make sure all concurrent returns that may see the old + * value of dma_sync (and thus perform a sync) have + * finished before doing the unmapping below. Skip the + * wait if the device doesn't actually need syncing, or + * if there are no outstanding mapped pages. + */ + if (dma_dev_need_sync(pool->p.dev) && + !xa_empty(&pool->dma_mapped)) + synchronize_net(); + } + + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_page_dma(pool, page_to_netmem(ptr)); + } /* No more consumers should exist, but producers could still * be in-flight. diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fe7fdefab994..0ebe5461d4d9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -177,7 +177,7 @@ #define MAX_IMIX_ENTRIES 20 #define IMIX_PRECISION 100 /* Precision of IMIX distribution */ -#define func_enter() pr_debug("entering %s\n", __func__); +#define func_enter() pr_debug("entering %s\n", __func__) #define PKT_FLAGS \ pf(IPV6) /* Interface in IPV6 Mode */ \ @@ -227,12 +227,12 @@ static char *pkt_flag_names[] = { /* Xmit modes */ #define M_START_XMIT 0 /* Default normal TX */ -#define M_NETIF_RECEIVE 1 /* Inject packets into stack */ +#define M_NETIF_RECEIVE 1 /* Inject packets into stack */ #define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ -#define if_lock(t) mutex_lock(&(t->if_lock)); -#define if_unlock(t) mutex_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)) +#define if_unlock(t) mutex_unlock(&(t->if_lock)) /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -283,7 +283,8 @@ struct pktgen_dev { int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ + * removal by worker thread + */ struct page *page; u64 delay; /* nano-seconds */ @@ -346,10 +347,12 @@ struct pktgen_dev { __u16 udp_dst_max; /* exclusive, dest UDP port */ /* DSCP + ECN */ - __u8 tos; /* six MSB of (former) IPv4 TOS - are for dscp codepoint */ - __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 - (see RFC 3260, sec. 4) */ + __u8 tos; /* six MSB of (former) IPv4 TOS + * are for dscp codepoint + */ + __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 + * (see RFC 3260, sec. 4) + */ /* IMIX */ unsigned int n_imix_entries; @@ -389,12 +392,12 @@ struct pktgen_dev { __u8 hh[14]; /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; + * 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, + * + * We fill in SRC address later + * 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + * 0x08, 0x00 + * }; */ __u16 pad; /* pad out the hh struct to an even 16 bytes */ @@ -458,7 +461,8 @@ struct pktgen_thread { char result[512]; /* Field for thread to receive "posted" events terminate, - stop ifs etc. */ + * stop ifs etc. + */ u32 control; int cpu; @@ -472,8 +476,7 @@ struct pktgen_thread { #define FIND 0 static const char version[] = - "Packet Generator for packet performance testing. " - "Version: " VERSION "\n"; + "Packet Generator for packet performance testing. Version: " VERSION "\n"; static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); @@ -624,8 +627,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, - " udp_src_min: %d udp_src_max: %d" - " udp_dst_min: %d udp_dst_max: %d\n", + " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, pkt_dev->udp_dst_max); @@ -754,6 +756,7 @@ static ssize_t hex32_arg(const char __user *user_buffer, size_t maxlen, for (; i < maxlen; i++) { int value; char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; value = hex_to_bin(c); @@ -773,6 +776,7 @@ static ssize_t count_trail_chars(const char __user *user_buffer, size_t maxlen) for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; switch (c) { @@ -799,6 +803,7 @@ static ssize_t num_arg(const char __user *user_buffer, size_t maxlen, for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; if ((c >= '0') && (c <= '9')) { @@ -816,6 +821,7 @@ static ssize_t strn_len(const char __user *user_buffer, size_t maxlen) for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; switch (c) { @@ -974,8 +980,8 @@ static __u32 pktgen_read_flag(const char *f, bool *disable) } static ssize_t pktgen_if_write(struct file *file, - const char __user * user_buffer, size_t count, - loff_t * offset) + const char __user *user_buffer, size_t count, + loff_t *offset) { struct seq_file *seq = file->private_data; struct pktgen_dev *pkt_dev = seq->private; @@ -1307,9 +1313,9 @@ static ssize_t pktgen_if_write(struct file *file, put_page(pkt_dev->page); pkt_dev->page = NULL; } - } - else + } else { sprintf(pg_result, "ERROR: node not possible"); + } return count; } if (!strcmp(name, "xmit_mode")) { @@ -1413,8 +1419,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->dst_min) != 0) { - memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min)); - strcpy(pkt_dev->dst_min, buf); + strscpy_pad(pkt_dev->dst_min, buf); pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); pkt_dev->cur_daddr = pkt_dev->daddr_min; } @@ -1434,8 +1439,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->dst_max) != 0) { - memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max)); - strcpy(pkt_dev->dst_max, buf); + strscpy_pad(pkt_dev->dst_max, buf); pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); pkt_dev->cur_daddr = pkt_dev->daddr_max; } @@ -1544,8 +1548,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->src_min) != 0) { - memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min)); - strcpy(pkt_dev->src_min, buf); + strscpy_pad(pkt_dev->src_min, buf); pkt_dev->saddr_min = in_aton(pkt_dev->src_min); pkt_dev->cur_saddr = pkt_dev->saddr_min; } @@ -1565,8 +1568,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->src_max) != 0) { - memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max)); - strcpy(pkt_dev->src_max, buf); + strscpy_pad(pkt_dev->src_max, buf); pkt_dev->saddr_max = in_aton(pkt_dev->src_max); pkt_dev->cur_saddr = pkt_dev->saddr_max; } @@ -1909,8 +1911,8 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) } static ssize_t pktgen_thread_write(struct file *file, - const char __user * user_buffer, - size_t count, loff_t * offset) + const char __user *user_buffer, + size_t count, loff_t *offset) { struct seq_file *seq = file->private_data; struct pktgen_thread *t = seq->private; @@ -1962,6 +1964,7 @@ static ssize_t pktgen_thread_write(struct file *file, if (!strcmp(name, "add_device")) { char f[32]; + memset(f, 0, 32); max = min(sizeof(f) - 1, count - i); len = strn_len(&user_buffer[i], max); @@ -2397,13 +2400,14 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... -*/ + */ #define DUMMY_MARK 0 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { #ifdef CONFIG_XFRM struct xfrm_state *x = pkt_dev->flows[flow].x; struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); + if (!x) { if (pkt_dev->spi) { @@ -2436,6 +2440,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { t = get_random_u32_inclusive(pkt_dev->queue_map_min, pkt_dev->queue_map_max); @@ -2517,6 +2522,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->flags & F_MPLS_RND) { unsigned int i; + for (i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | @@ -2561,6 +2567,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) imx = ntohl(pkt_dev->saddr_max); if (imn < imx) { __u32 t; + if (pkt_dev->flags & F_IPSRC_RND) t = get_random_u32_inclusive(imn, imx - 1); else { @@ -2581,6 +2588,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (imn < imx) { __u32 t; __be32 s; + if (pkt_dev->flags & F_IPDST_RND) { do { @@ -2628,6 +2636,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { __u32 t; + if (pkt_dev->flags & F_TXSIZE_RND) { t = get_random_u32_inclusive(pkt_dev->min_pkt_size, pkt_dev->max_pkt_size - 1); @@ -2694,7 +2703,8 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) if (!x) return 0; /* XXX: we dont support tunnel mode for now until - * we resolve the dst issue */ + * we resolve the dst issue + */ if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0)) return 0; @@ -2729,8 +2739,10 @@ static void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i; + for (i = 0; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { xfrm_state_put(x); pkt_dev->flows[i].x = NULL; @@ -2745,6 +2757,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (pkt_dev->flags & F_IPSEC) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int nhead = 0; + if (x) { struct ethhdr *eth; struct iphdr *iph; @@ -2788,6 +2801,7 @@ err: static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned int i; + for (i = 0; i < pkt_dev->nr_labels; i++) *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM; @@ -2900,7 +2914,7 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb->dev = dev; } } else { - skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } /* the caller pre-fetches from skb->data and reserves for the mac hdr */ @@ -2981,7 +2995,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->priority = pkt_dev->skb_priority; memcpy(eth, pkt_dev->hh, 12); - *(__be16 *) & eth[12] = protocol; + *(__be16 *)ð[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - @@ -3210,11 +3224,11 @@ static void pktgen_run(struct pktgen_thread *t) set_pkt_overhead(pkt_dev); - strcpy(pkt_dev->result, "Starting"); + strscpy(pkt_dev->result, "Starting"); pkt_dev->running = 1; /* Cranke yeself! */ started++; } else - strcpy(pkt_dev->result, "Error starting"); + strscpy(pkt_dev->result, "Error starting"); } rcu_read_unlock(); if (started) @@ -3473,6 +3487,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_get(); + schedule(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } @@ -3788,7 +3803,8 @@ static int add_dev_to_thread(struct pktgen_thread *t, * userspace on another CPU than the kthread. The if_lock() * is used here to sync with concurrent instances of * _rem_dev_from_if_list() invoked via kthread, which is also - * updating the if_list */ + * updating the if_list + */ if_lock(t); if (pkt_dev->pg_thread) { @@ -3826,7 +3842,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) if (!pkt_dev) return -ENOMEM; - strcpy(pkt_dev->odevname, ifname); + strscpy(pkt_dev->odevname, ifname); pkt_dev->flows = vzalloc_node(array_size(MAX_CFLOWS, sizeof(struct flow_state)), node); @@ -3983,7 +3999,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Remove proc before if_list entry, because add_device uses * list to determine if interface already exist, avoid race - * with proc_create_data() */ + * with proc_create_data() + */ proc_remove(pkt_dev->entry); /* And update the thread if_list */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c5a7f41982a5..8a914b37ef6e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2390,12 +2390,12 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request"); @@ -3580,7 +3580,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, u32 portid, const struct nlmsghdr *nlh) { - unsigned int old_flags; + unsigned int old_flags, changed; int err; old_flags = dev->flags; @@ -3591,12 +3591,13 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, return err; } - if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { - __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags), portid, nlh); - } else { - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - __dev_notify_flags(dev, old_flags, ~0U, portid, nlh); + changed = old_flags ^ dev->flags; + if (dev->rtnl_link_initializing) { + dev->rtnl_link_initializing = false; + changed = ~0U; } + + __dev_notify_flags(dev, old_flags, changed, portid, nlh); return 0; } EXPORT_SYMBOL(rtnl_configure_link); @@ -3654,7 +3655,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, dev_net_set(dev, net); dev->rtnl_link_ops = ops; - dev->rtnl_link_state = RTNL_LINK_INITIALIZING; + dev->rtnl_link_initializing = true; if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); @@ -4083,7 +4084,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, struct ifinfomsg *ifm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for get link"); return -EINVAL; } @@ -4092,7 +4094,6 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); @@ -4883,12 +4884,12 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); @@ -5051,12 +5052,12 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, struct ndmsg *ndm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { + ndm = nlmsg_payload(nlh, sizeof(*ndm)); + if (!ndm) { NL_SET_ERR_MSG(extack, "Invalid header for fdb get request"); return -EINVAL; } - ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request"); @@ -5323,12 +5324,12 @@ static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, if (strict_check) { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request"); @@ -6220,7 +6221,8 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { + ifsm = nlmsg_payload(nlh, sizeof(*ifsm)); + if (!ifsm) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } @@ -6228,8 +6230,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, if (!strict_check) return 0; - ifsm = nlmsg_data(nlh); - /* only requests using strict checks can pass data to influence * the dump. The legacy exception is filter_mask. */ @@ -6457,12 +6457,12 @@ static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, { struct br_port_msg *bpm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { + bpm = nlmsg_payload(nlh, sizeof(*bpm)); + if (!bpm) { NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); return -EINVAL; } - bpm = nlmsg_data(nlh); if (bpm->ifindex) { NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); return -EINVAL; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 568779d5a0ef..9a3965680451 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -156,45 +156,3 @@ u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif - -#if IS_ENABLED(CONFIG_IP_DCCP) -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - u64 seq; - net_secret_init(); - seq = siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u32)sport << 16 | (__force u32)dport, - &net_secret); - seq += ktime_get_real_ns(); - seq &= (1ull << 48) - 1; - return seq; -} -EXPORT_SYMBOL(secure_dccp_sequence_number); - -#if IS_ENABLED(CONFIG_IPV6) -u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport) -{ - const struct { - struct in6_addr saddr; - struct in6_addr daddr; - __be16 sport; - __be16 dport; - } __aligned(SIPHASH_ALIGNMENT) combined = { - .saddr = *(struct in6_addr *)saddr, - .daddr = *(struct in6_addr *)daddr, - .sport = sport, - .dport = dport - }; - u64 seq; - net_secret_init(); - seq = siphash(&combined, offsetofend(typeof(combined), dport), - &net_secret); - seq += ktime_get_real_ns(); - seq &= (1ull << 48) - 1; - return seq; -} -EXPORT_SYMBOL(secure_dccpv6_sequence_number); -#endif -#endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6cbf77bc61fc..4159107f1666 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -89,6 +89,7 @@ #include <linux/textsearch.h> #include "dev.h" +#include "devmem.h" #include "netmem_priv.h" #include "sock_destructor.h" @@ -893,11 +894,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_netmem(netmem_ref netmem) -{ - return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -995,14 +991,7 @@ bool napi_pp_put_page(netmem_ref netmem) { netmem = netmem_compound_head(netmem); - /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation - * in order to preserve any existing bits, such as bit 0 for the - * head page of compound page and bit 1 for pfmemalloc page, so - * mask those bits for freeing side when doing below checking, - * and page_is_pfmemalloc() is checked in __page_pool_put_page() - * to avoid recycling the pfmemalloc page. - */ - if (unlikely(!is_pp_netmem(netmem))) + if (unlikely(!netmem_is_pp(netmem))) return false; page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); @@ -1042,7 +1031,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) for (i = 0; i < shinfo->nr_frags; i++) { head_netmem = netmem_compound_head(shinfo->frags[i].netmem); - if (likely(is_pp_netmem(head_netmem))) + if (likely(netmem_is_pp(head_netmem))) page_pool_ref_netmem(head_netmem); else page_ref_inc(netmem_to_page(head_netmem)); @@ -1666,7 +1655,8 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp) } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); -static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) +static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size, + bool devmem) { struct ubuf_info_msgzc *uarg; struct sk_buff *skb; @@ -1681,7 +1671,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) uarg = (void *)skb->cb; uarg->mmp.user = NULL; - if (mm_account_pinned_pages(&uarg->mmp, size)) { + if (likely(!devmem) && mm_account_pinned_pages(&uarg->mmp, size)) { kfree_skb(skb); return NULL; } @@ -1704,7 +1694,7 @@ static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg) } struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg) + struct ubuf_info *uarg, bool devmem) { if (uarg) { struct ubuf_info_msgzc *uarg_zc; @@ -1734,7 +1724,8 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, next = (u32)atomic_read(&sk->sk_zckey); if ((u32)(uarg_zc->id + uarg_zc->len) == next) { - if (mm_account_pinned_pages(&uarg_zc->mmp, size)) + if (likely(!devmem) && + mm_account_pinned_pages(&uarg_zc->mmp, size)) return NULL; uarg_zc->len++; uarg_zc->bytelen = bytelen; @@ -1749,7 +1740,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, } new_alloc: - return msg_zerocopy_alloc(sk, size); + return msg_zerocopy_alloc(sk, size, devmem); } EXPORT_SYMBOL_GPL(msg_zerocopy_realloc); @@ -1853,7 +1844,8 @@ EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, - struct ubuf_info *uarg) + struct ubuf_info *uarg, + struct net_devmem_dmabuf_binding *binding) { int err, orig_len = skb->len; @@ -1872,7 +1864,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, return -EEXIST; } - err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len); + err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len, + binding); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { struct sock *save_sk = skb->sk; @@ -3239,7 +3232,7 @@ static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg) typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, - int len, sendmsg_func sendmsg) + int len, sendmsg_func sendmsg, int flags) { unsigned int orig_len = len; struct sk_buff *head = skb; @@ -3257,7 +3250,7 @@ do_frag_list: kv.iov_base = skb->data + offset; kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; + msg.msg_flags = MSG_DONTWAIT | flags; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, @@ -3294,7 +3287,8 @@ do_frag_list: while (slen) { struct bio_vec bvec; struct msghdr msg = { - .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT, + .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | + flags, }; bvec_set_page(&bvec, skb_frag_page(frag), slen, @@ -3340,14 +3334,21 @@ error: int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len) { - return __skb_send_sock(sk, skb, offset, len, sendmsg_locked); + return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, 0); } EXPORT_SYMBOL_GPL(skb_send_sock_locked); +int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, + int offset, int len, int flags) +{ + return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, flags); +} +EXPORT_SYMBOL_GPL(skb_send_sock_locked_with_flags); + /* Send skb data on a socket. Socket must be unlocked. */ int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) { - return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked); + return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, 0); } /** @@ -7317,3 +7318,32 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, return false; } EXPORT_SYMBOL(csum_and_copy_from_iter_full); + +void get_netmem(netmem_ref netmem) +{ + struct net_iov *niov; + + if (netmem_is_net_iov(netmem)) { + niov = netmem_to_net_iov(netmem); + if (net_is_devmem_iov(niov)) + net_devmem_get_net_iov(netmem_to_net_iov(netmem)); + return; + } + get_page(netmem_to_page(netmem)); +} +EXPORT_SYMBOL(get_netmem); + +void put_netmem(netmem_ref netmem) +{ + struct net_iov *niov; + + if (netmem_is_net_iov(netmem)) { + niov = netmem_to_net_iov(netmem); + if (net_is_devmem_iov(niov)) + net_devmem_put_net_iov(netmem_to_net_iov(netmem)); + return; + } + + put_page(netmem_to_page(netmem)); +} +EXPORT_SYMBOL(put_netmem); diff --git a/net/core/sock.c b/net/core/sock.c index e54449c9ab0b..347ce75482f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2494,17 +2494,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ if (!is_charged) RCU_INIT_POINTER(newsk->sk_filter, NULL); - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; + + goto free; } + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); - if (bpf_sk_storage_clone(sk, newsk)) { - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; - } + if (bpf_sk_storage_clone(sk, newsk)) + goto free; /* Clear sk_user_data if parent had the pointer tagged * as not suitable for copying when cloning. @@ -2534,18 +2531,17 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) net_enable_timestamp(); out: return newsk; -} -EXPORT_SYMBOL_GPL(sk_clone_lock); - -void sk_free_unlock_clone(struct sock *sk) -{ +free: /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - sk->sk_destruct = NULL; - bh_unlock_sock(sk); - sk_free(sk); + * destructor and make plain sk_free() + */ + newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); + sk_free(newsk); + newsk = NULL; + goto out; } -EXPORT_SYMBOL_GPL(sk_free_unlock_clone); +EXPORT_SYMBOL_GPL(sk_clone_lock); static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) { @@ -3022,6 +3018,11 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EPERM; sockc->priority = *(u32 *)CMSG_DATA(cmsg); break; + case SCM_DEVMEM_DMABUF: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->dmabuf_id = *(u32 *)CMSG_DATA(cmsg); + break; default: return -EINVAL; } @@ -4004,7 +4005,7 @@ static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); - if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { + if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) { pr_err("PROTO_INUSE_NR exhausted\n"); return -ENOSPC; } @@ -4015,7 +4016,7 @@ static int assign_proto_idx(struct proto *prot) static void release_proto_idx(struct proto *prot) { - if (prot->inuse_idx != PROTO_INUSE_NR - 1) + if (prot->inuse_idx != PROTO_INUSE_NR) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a08eed9b9142..b23594c767f2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -264,8 +264,6 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, switch (nlh->nlmsg_type) { case TCPDIAG_GETSOCK: - case DCCPDIAG_GETSOCK: - if (!rcu_access_pointer(inet_rcv_compat)) sock_load_diag_module(AF_INET, 0); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c7769ee0d9c5..5dbb2c6f371d 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -201,7 +201,7 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, if (orig_sock_table) { static_branch_dec(&rps_needed); static_branch_dec(&rfs_needed); - kvfree_rcu_mightsleep(orig_sock_table); + kvfree_rcu(orig_sock_table, rcu); } } } @@ -239,7 +239,7 @@ static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, lockdep_is_held(&flow_limit_update_mutex)); if (cur && !cpumask_test_cpu(i, mask)) { RCU_INIT_POINTER(sd->flow_limit, NULL); - kfree_rcu_mightsleep(cur); + kfree_rcu(cur, rcu); } else if (!cur && cpumask_test_cpu(i, mask)) { cur = kzalloc_node(len, GFP_KERNEL, cpu_to_node(i)); @@ -248,7 +248,7 @@ static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, ret = -ENOMEM; goto write_unlock; } - cur->num_buckets = netdev_flow_limit_table_len; + cur->log_buckets = ilog2(netdev_flow_limit_table_len); rcu_assign_pointer(sd->flow_limit, cur); } } diff --git a/net/core/xdp.c b/net/core/xdp.c index f86eedad586a..e6f22ba61c1e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -17,6 +17,7 @@ #include <net/page_pool/helpers.h> #include <net/hotdata.h> +#include <net/netdev_lock.h> #include <net/xdp.h> #include <net/xdp_priv.h> /* struct xdp_mem_allocator */ #include <trace/events/xdp.h> @@ -437,8 +438,8 @@ void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, netmem = netmem_compound_head(netmem); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as mem->type knows this a page_pool page + /* No need to check netmem_is_pp() as mem->type knows this a + * page_pool page */ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi_direct); @@ -697,23 +698,24 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, nr_frags = xinfo->nr_frags; for (u32 i = 0; i < nr_frags; i++) { - u32 len = skb_frag_size(&xinfo->frags[i]); + const skb_frag_t *frag = &xinfo->frags[i]; + u32 len = skb_frag_size(frag); u32 offset, truesize = len; - netmem_ref netmem; + struct page *page; - netmem = page_pool_dev_alloc_netmem(pp, &offset, &truesize); - if (unlikely(!netmem)) { + page = page_pool_dev_alloc(pp, &offset, &truesize); + if (unlikely(!page)) { sinfo->nr_frags = i; return false; } - memcpy(__netmem_address(netmem), - __netmem_address(xinfo->frags[i].netmem), + memcpy(page_address(page) + offset, + skb_frag_page(frag) + skb_frag_off(frag), LARGEST_ALIGN(len)); - __skb_fill_netmem_desc_noacc(sinfo, i, netmem, offset, len); + __skb_fill_page_desc_noacc(sinfo, i, page, offset, len); tsize += truesize; - pfmemalloc |= netmem_is_pfmemalloc(netmem); + pfmemalloc |= page_is_pfmemalloc(page); } xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size, @@ -737,25 +739,27 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, */ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) { - struct page_pool *pp = this_cpu_read(system_page_pool); const struct xdp_rxq_info *rxq = xdp->rxq; u32 len = xdp->data_end - xdp->data_meta; u32 truesize = xdp->frame_sz; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + struct page_pool *pp; int metalen; void *data; if (!IS_ENABLED(CONFIG_PAGE_POOL)) return NULL; + local_lock_nested_bh(&system_page_pool.bh_lock); + pp = this_cpu_read(system_page_pool.pool); data = page_pool_dev_alloc_va(pp, &truesize); if (unlikely(!data)) - return NULL; + goto out; skb = napi_build_skb(data, truesize); if (unlikely(!skb)) { page_pool_free_va(pp, data, true); - return NULL; + goto out; } skb_mark_for_recycle(skb); @@ -774,13 +778,16 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) if (unlikely(xdp_buff_has_frags(xdp)) && unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) { napi_consume_skb(skb, true); - return NULL; + skb = NULL; + goto out; } xsk_buff_free(xdp); skb->protocol = eth_type_trans(skb, rxq->dev); +out: + local_unlock_nested_bh(&system_page_pool.bh_lock); return skb; } EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc); @@ -991,34 +998,60 @@ static int __init xdp_metadata_init(void) } late_initcall(xdp_metadata_init); -void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) +void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val) { val &= NETDEV_XDP_ACT_MASK; if (dev->xdp_features == val) return; + netdev_assert_locked_or_invisible(dev); dev->xdp_features = val; if (dev->reg_state == NETREG_REGISTERED) call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); } +EXPORT_SYMBOL_GPL(xdp_set_features_flag_locked); + +void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) +{ + netdev_lock(dev); + xdp_set_features_flag_locked(dev, val); + netdev_unlock(dev); +} EXPORT_SYMBOL_GPL(xdp_set_features_flag); -void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +void xdp_features_set_redirect_target_locked(struct net_device *dev, + bool support_sg) { xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); if (support_sg) val |= NETDEV_XDP_ACT_NDO_XMIT_SG; - xdp_set_features_flag(dev, val); + xdp_set_features_flag_locked(dev, val); +} +EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target_locked); + +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ + netdev_lock(dev); + xdp_features_set_redirect_target_locked(dev, support_sg); + netdev_unlock(dev); } EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); -void xdp_features_clear_redirect_target(struct net_device *dev) +void xdp_features_clear_redirect_target_locked(struct net_device *dev) { xdp_features_t val = dev->xdp_features; val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); - xdp_set_features_flag(dev, val); + xdp_set_features_flag_locked(dev, val); +} +EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target_locked); + +void xdp_features_clear_redirect_target(struct net_device *dev) +{ + netdev_lock(dev); + xdp_features_clear_redirect_target_locked(dev); + netdev_unlock(dev); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig deleted file mode 100644 index 0c7d2f66ba27..000000000000 --- a/net/dccp/Kconfig +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menuconfig IP_DCCP - tristate "The DCCP Protocol" - depends on INET - help - Datagram Congestion Control Protocol (RFC 4340) - - From https://www.ietf.org/rfc/rfc4340.txt: - - The Datagram Congestion Control Protocol (DCCP) is a transport - protocol that implements bidirectional, unicast connections of - congestion-controlled, unreliable datagrams. It should be suitable - for use by applications such as streaming media, Internet telephony, - and on-line games. - - To compile this protocol support as a module, choose M here: the - module will be called dccp. - - If in doubt, say N. - -if IP_DCCP - -config INET_DCCP_DIAG - depends on INET_DIAG - def_tristate y if (IP_DCCP = y && INET_DIAG = y) - def_tristate m - -source "net/dccp/ccids/Kconfig" - -menu "DCCP Kernel Hacking" - depends on DEBUG_KERNEL=y - -config IP_DCCP_DEBUG - bool "DCCP debug messages" - help - Only use this if you're hacking DCCP. - - When compiling DCCP as a module, this debugging output can be toggled - by setting the parameter dccp_debug of the `dccp' module to 0 or 1. - - Just say N. - - -endmenu - -endif # IP_DDCP diff --git a/net/dccp/Makefile b/net/dccp/Makefile deleted file mode 100644 index 5b4ff37bc806..000000000000 --- a/net/dccp/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o - -dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ - qpolicy.o -# -# CCID algorithms to be used by dccp.ko -# -# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency -dccp-y += ccids/ccid2.o ackvec.o -dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o -dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o \ - ccids/lib/tfrc_equation.o \ - ccids/lib/packet_history.o \ - ccids/lib/loss_interval.o - -dccp_ipv4-y := ipv4.o - -# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module -obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o -dccp_ipv6-y := ipv6.o - -obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o - -dccp-$(CONFIG_SYSCTL) += sysctl.o - -dccp_diag-y := diag.o - -# build with local directory for trace.h -CFLAGS_proto.o := -I$(src) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c deleted file mode 100644 index 1cba001bb4c8..000000000000 --- a/net/dccp/ackvec.c +++ /dev/null @@ -1,403 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/ackvec.c - * - * An implementation of Ack Vectors for the DCCP protocol - * Copyright (c) 2007 University of Aberdeen, Scotland, UK - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - */ -#include "dccp.h" -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/export.h> - -static struct kmem_cache *dccp_ackvec_slab; -static struct kmem_cache *dccp_ackvec_record_slab; - -struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) -{ - struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority); - - if (av != NULL) { - av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1; - INIT_LIST_HEAD(&av->av_records); - } - return av; -} - -static void dccp_ackvec_purge_records(struct dccp_ackvec *av) -{ - struct dccp_ackvec_record *cur, *next; - - list_for_each_entry_safe(cur, next, &av->av_records, avr_node) - kmem_cache_free(dccp_ackvec_record_slab, cur); - INIT_LIST_HEAD(&av->av_records); -} - -void dccp_ackvec_free(struct dccp_ackvec *av) -{ - if (likely(av != NULL)) { - dccp_ackvec_purge_records(av); - kmem_cache_free(dccp_ackvec_slab, av); - } -} - -/** - * dccp_ackvec_update_records - Record information about sent Ack Vectors - * @av: Ack Vector records to update - * @seqno: Sequence number of the packet carrying the Ack Vector just sent - * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector - */ -int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) -{ - struct dccp_ackvec_record *avr; - - avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); - if (avr == NULL) - return -ENOBUFS; - - avr->avr_ack_seqno = seqno; - avr->avr_ack_ptr = av->av_buf_head; - avr->avr_ack_ackno = av->av_buf_ackno; - avr->avr_ack_nonce = nonce_sum; - avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head); - /* - * When the buffer overflows, we keep no more than one record. This is - * the simplest way of disambiguating sender-Acks dating from before the - * overflow from sender-Acks which refer to after the overflow; a simple - * solution is preferable here since we are handling an exception. - */ - if (av->av_overflow) - dccp_ackvec_purge_records(av); - /* - * Since GSS is incremented for each packet, the list is automatically - * arranged in descending order of @ack_seqno. - */ - list_add(&avr->avr_node, &av->av_records); - - dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n", - (unsigned long long)avr->avr_ack_seqno, - (unsigned long long)avr->avr_ack_ackno, - avr->avr_ack_runlen); - return 0; -} - -static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, - const u64 ackno) -{ - struct dccp_ackvec_record *avr; - /* - * Exploit that records are inserted in descending order of sequence - * number, start with the oldest record first. If @ackno is `before' - * the earliest ack_ackno, the packet is too old to be considered. - */ - list_for_each_entry_reverse(avr, av_list, avr_node) { - if (avr->avr_ack_seqno == ackno) - return avr; - if (before48(ackno, avr->avr_ack_seqno)) - break; - } - return NULL; -} - -/* - * Buffer index and length computation using modulo-buffersize arithmetic. - * Note that, as pointers move from right to left, head is `before' tail. - */ -static inline u16 __ackvec_idx_add(const u16 a, const u16 b) -{ - return (a + b) % DCCPAV_MAX_ACKVEC_LEN; -} - -static inline u16 __ackvec_idx_sub(const u16 a, const u16 b) -{ - return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b); -} - -u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) -{ - if (unlikely(av->av_overflow)) - return DCCPAV_MAX_ACKVEC_LEN; - return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); -} - -/** - * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 - * @av: non-empty buffer to update - * @distance: negative or zero distance of @seqno from buf_ackno downward - * @seqno: the (old) sequence number whose record is to be updated - * @state: state in which packet carrying @seqno was received - */ -static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, - u64 seqno, enum dccp_ackvec_states state) -{ - u16 ptr = av->av_buf_head; - - BUG_ON(distance > 0); - if (unlikely(dccp_ackvec_is_empty(av))) - return; - - do { - u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); - - if (distance + runlen >= 0) { - /* - * Only update the state if packet has not been received - * yet. This is OK as per the second table in RFC 4340, - * 11.4.1; i.e. here we are using the following table: - * RECEIVED - * 0 1 3 - * S +---+---+---+ - * T 0 | 0 | 0 | 0 | - * O +---+---+---+ - * R 1 | 1 | 1 | 1 | - * E +---+---+---+ - * D 3 | 0 | 1 | 3 | - * +---+---+---+ - * The "Not Received" state was set by reserve_seats(). - */ - if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) - av->av_buf[ptr] = state; - else - dccp_pr_debug("Not changing %llu state to %u\n", - (unsigned long long)seqno, state); - break; - } - - distance += runlen + 1; - ptr = __ackvec_idx_add(ptr, 1); - - } while (ptr != av->av_buf_tail); -} - -/* Mark @num entries after buf_head as "Not yet received". */ -static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) -{ - u16 start = __ackvec_idx_add(av->av_buf_head, 1), - len = DCCPAV_MAX_ACKVEC_LEN - start; - - /* check for buffer wrap-around */ - if (num > len) { - memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); - start = 0; - num -= len; - } - if (num) - memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); -} - -/** - * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer - * @av: container of buffer to update (can be empty or non-empty) - * @num_packets: number of packets to register (must be >= 1) - * @seqno: sequence number of the first packet in @num_packets - * @state: state in which packet carrying @seqno was received - */ -static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, - u64 seqno, enum dccp_ackvec_states state) -{ - u32 num_cells = num_packets; - - if (num_packets > DCCPAV_BURST_THRESH) { - u32 lost_packets = num_packets - 1; - - DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); - /* - * We received 1 packet and have a loss of size "num_packets-1" - * which we squeeze into num_cells-1 rather than reserving an - * entire byte for each lost packet. - * The reason is that the vector grows in O(burst_length); when - * it grows too large there will no room left for the payload. - * This is a trade-off: if a few packets out of the burst show - * up later, their state will not be changed; it is simply too - * costly to reshuffle/reallocate/copy the buffer each time. - * Should such problems persist, we will need to switch to a - * different underlying data structure. - */ - for (num_packets = num_cells = 1; lost_packets; ++num_cells) { - u8 len = min_t(u32, lost_packets, DCCPAV_MAX_RUNLEN); - - av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); - av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; - - lost_packets -= len; - } - } - - if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { - DCCP_CRIT("Ack Vector buffer overflow: dropping old entries"); - av->av_overflow = true; - } - - av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); - if (av->av_overflow) - av->av_buf_tail = av->av_buf_head; - - av->av_buf[av->av_buf_head] = state; - av->av_buf_ackno = seqno; - - if (num_packets > 1) - dccp_ackvec_reserve_seats(av, num_packets - 1); -} - -/** - * dccp_ackvec_input - Register incoming packet in the buffer - * @av: Ack Vector to register packet to - * @skb: Packet to register - */ -void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) -{ - u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; - enum dccp_ackvec_states state = DCCPAV_RECEIVED; - - if (dccp_ackvec_is_empty(av)) { - dccp_ackvec_add_new(av, 1, seqno, state); - av->av_tail_ackno = seqno; - - } else { - s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); - u8 *current_head = av->av_buf + av->av_buf_head; - - if (num_packets == 1 && - dccp_ackvec_state(current_head) == state && - dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { - - *current_head += 1; - av->av_buf_ackno = seqno; - - } else if (num_packets > 0) { - dccp_ackvec_add_new(av, num_packets, seqno, state); - } else { - dccp_ackvec_update_old(av, num_packets, seqno, state); - } - } -} - -/** - * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection - * @av: Ack Vector record to clean - * @ackno: last Ack Vector which has been acknowledged - * - * This routine is called when the peer acknowledges the receipt of Ack Vectors - * up to and including @ackno. While based on section A.3 of RFC 4340, here - * are additional precautions to prevent corrupted buffer state. In particular, - * we use tail_ackno to identify outdated records; it always marks the earliest - * packet of group (2) in 11.4.2. - */ -void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) -{ - struct dccp_ackvec_record *avr, *next; - u8 runlen_now, eff_runlen; - s64 delta; - - avr = dccp_ackvec_lookup(&av->av_records, ackno); - if (avr == NULL) - return; - /* - * Deal with outdated acknowledgments: this arises when e.g. there are - * several old records and the acks from the peer come in slowly. In - * that case we may still have records that pre-date tail_ackno. - */ - delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); - if (delta < 0) - goto free_records; - /* - * Deal with overlapping Ack Vectors: don't subtract more than the - * number of packets between tail_ackno and ack_ackno. - */ - eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; - - runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); - /* - * The run length of Ack Vector cells does not decrease over time. If - * the run length is the same as at the time the Ack Vector was sent, we - * free the ack_ptr cell. That cell can however not be freed if the run - * length has increased: in this case we need to move the tail pointer - * backwards (towards higher indices), to its next-oldest neighbour. - */ - if (runlen_now > eff_runlen) { - - av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; - av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); - - /* This move may not have cleared the overflow flag. */ - if (av->av_overflow) - av->av_overflow = (av->av_buf_head == av->av_buf_tail); - } else { - av->av_buf_tail = avr->avr_ack_ptr; - /* - * We have made sure that avr points to a valid cell within the - * buffer. This cell is either older than head, or equals head - * (empty buffer): in both cases we no longer have any overflow. - */ - av->av_overflow = 0; - } - - /* - * The peer has acknowledged up to and including ack_ackno. Hence the - * first packet in group (2) of 11.4.2 is the successor of ack_ackno. - */ - av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); - -free_records: - list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { - list_del(&avr->avr_node); - kmem_cache_free(dccp_ackvec_record_slab, avr); - } -} - -/* - * Routines to keep track of Ack Vectors received in an skb - */ -int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) -{ - struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); - - if (new == NULL) - return -ENOBUFS; - new->vec = vec; - new->len = len; - new->nonce = nonce; - - list_add_tail(&new->node, head); - return 0; -} -EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); - -void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) -{ - struct dccp_ackvec_parsed *cur, *next; - - list_for_each_entry_safe(cur, next, parsed_chunks, node) - kfree(cur); - INIT_LIST_HEAD(parsed_chunks); -} -EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); - -int __init dccp_ackvec_init(void) -{ - dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN); - if (dccp_ackvec_slab == NULL) - goto out_err; - - dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN); - if (dccp_ackvec_record_slab == NULL) - goto out_destroy_slab; - - return 0; - -out_destroy_slab: - kmem_cache_destroy(dccp_ackvec_slab); - dccp_ackvec_slab = NULL; -out_err: - DCCP_CRIT("Unable to create Ack Vector slab cache"); - return -ENOBUFS; -} - -void dccp_ackvec_exit(void) -{ - kmem_cache_destroy(dccp_ackvec_slab); - dccp_ackvec_slab = NULL; - kmem_cache_destroy(dccp_ackvec_record_slab); - dccp_ackvec_record_slab = NULL; -} diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h deleted file mode 100644 index d2c4220fb377..000000000000 --- a/net/dccp/ackvec.h +++ /dev/null @@ -1,136 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ACKVEC_H -#define _ACKVEC_H -/* - * net/dccp/ackvec.h - * - * An implementation of Ack Vectors for the DCCP protocol - * Copyright (c) 2007 University of Aberdeen, Scotland, UK - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> - */ - -#include <linux/dccp.h> -#include <linux/compiler.h> -#include <linux/list.h> -#include <linux/types.h> - -/* - * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN, - * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1 - * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives - * more headroom if Ack Ratio is higher or when the sender acknowledges slowly. - * The maximum value is bounded by the u16 types for indices and functions. - */ -#define DCCPAV_NUM_ACKVECS 2 -#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS) - -/* Estimated minimum average Ack Vector length - used for updating MPS */ -#define DCCPAV_MIN_OPTLEN 16 - -/* Threshold for coping with large bursts of losses */ -#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) - -enum dccp_ackvec_states { - DCCPAV_RECEIVED = 0x00, - DCCPAV_ECN_MARKED = 0x40, - DCCPAV_RESERVED = 0x80, - DCCPAV_NOT_RECEIVED = 0xC0 -}; -#define DCCPAV_MAX_RUNLEN 0x3F - -static inline u8 dccp_ackvec_runlen(const u8 *cell) -{ - return *cell & DCCPAV_MAX_RUNLEN; -} - -static inline u8 dccp_ackvec_state(const u8 *cell) -{ - return *cell & ~DCCPAV_MAX_RUNLEN; -} - -/** - * struct dccp_ackvec - Ack Vector main data structure - * - * This implements a fixed-size circular buffer within an array and is largely - * based on Appendix A of RFC 4340. - * - * @av_buf: circular buffer storage area - * @av_buf_head: head index; begin of live portion in @av_buf - * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf - * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf - * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf - * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to - * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf - * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound - * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) - */ -struct dccp_ackvec { - u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; - u16 av_buf_head; - u16 av_buf_tail; - u64 av_buf_ackno:48; - u64 av_tail_ackno:48; - bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; - u8 av_overflow:1; - struct list_head av_records; -}; - -/** - * struct dccp_ackvec_record - Records information about sent Ack Vectors - * - * These list entries define the additional information which the HC-Receiver - * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. - * - * @avr_node: the list node in @av_records - * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on - * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to - * @avr_ack_ptr: pointer into @av_buf where this record starts - * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending - * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent - * - * The list as a whole is sorted in descending order by @avr_ack_seqno. - */ -struct dccp_ackvec_record { - struct list_head avr_node; - u64 avr_ack_seqno:48; - u64 avr_ack_ackno:48; - u16 avr_ack_ptr; - u8 avr_ack_runlen; - u8 avr_ack_nonce:1; -}; - -int dccp_ackvec_init(void); -void dccp_ackvec_exit(void); - -struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); -void dccp_ackvec_free(struct dccp_ackvec *av); - -void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); -int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); -void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); -u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); - -static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) -{ - return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; -} - -/** - * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb - * @vec: start of vector (offset into skb) - * @len: length of @vec - * @nonce: whether @vec had an ECN nonce of 0 or 1 - * @node: FIFO - arranged in descending order of ack_ackno - * - * This structure is used by CCIDs to access Ack Vectors in a received skb. - */ -struct dccp_ackvec_parsed { - u8 *vec, - len, - nonce:1; - struct list_head node; -}; - -int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce); -void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); -#endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c deleted file mode 100644 index 6beac5d348e2..000000000000 --- a/net/dccp/ccid.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/ccid.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * - * CCID infrastructure - */ - -#include <linux/slab.h> - -#include "ccid.h" -#include "ccids/lib/tfrc.h" - -static struct ccid_operations *ccids[] = { - &ccid2_ops, -#ifdef CONFIG_IP_DCCP_CCID3 - &ccid3_ops, -#endif -}; - -static struct ccid_operations *ccid_by_number(const u8 id) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ccids); i++) - if (ccids[i]->ccid_id == id) - return ccids[i]; - return NULL; -} - -/* check that up to @array_len members in @ccid_array are supported */ -bool ccid_support_check(u8 const *ccid_array, u8 array_len) -{ - while (array_len > 0) - if (ccid_by_number(ccid_array[--array_len]) == NULL) - return false; - return true; -} - -/** - * ccid_get_builtin_ccids - Populate a list of built-in CCIDs - * @ccid_array: pointer to copy into - * @array_len: value to return length into - * - * This function allocates memory - caller must see that it is freed after use. - */ -int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) -{ - *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any()); - if (*ccid_array == NULL) - return -ENOBUFS; - - for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1) - (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id; - return 0; -} - -int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - u8 *ccid_array, array_len; - int err = 0; - - if (ccid_get_builtin_ccids(&ccid_array, &array_len)) - return -ENOBUFS; - - if (put_user(array_len, optlen)) - err = -EFAULT; - else if (len > 0 && copy_to_user(optval, ccid_array, - len > array_len ? array_len : len)) - err = -EFAULT; - - kfree(ccid_array); - return err; -} - -static __printf(3, 4) struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...) -{ - struct kmem_cache *slab; - va_list args; - - va_start(args, fmt); - vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args); - va_end(args); - - slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); - return slab; -} - -static void ccid_kmem_cache_destroy(struct kmem_cache *slab) -{ - kmem_cache_destroy(slab); -} - -static int __init ccid_activate(struct ccid_operations *ccid_ops) -{ - int err = -ENOBUFS; - - ccid_ops->ccid_hc_rx_slab = - ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, - ccid_ops->ccid_hc_rx_slab_name, - "ccid%u_hc_rx_sock", - ccid_ops->ccid_id); - if (ccid_ops->ccid_hc_rx_slab == NULL) - goto out; - - ccid_ops->ccid_hc_tx_slab = - ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, - ccid_ops->ccid_hc_tx_slab_name, - "ccid%u_hc_tx_sock", - ccid_ops->ccid_id); - if (ccid_ops->ccid_hc_tx_slab == NULL) - goto out_free_rx_slab; - - pr_info("DCCP: Activated CCID %d (%s)\n", - ccid_ops->ccid_id, ccid_ops->ccid_name); - err = 0; -out: - return err; -out_free_rx_slab: - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); - ccid_ops->ccid_hc_rx_slab = NULL; - goto out; -} - -static void ccid_deactivate(struct ccid_operations *ccid_ops) -{ - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); - ccid_ops->ccid_hc_tx_slab = NULL; - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); - ccid_ops->ccid_hc_rx_slab = NULL; - - pr_info("DCCP: Deactivated CCID %d (%s)\n", - ccid_ops->ccid_id, ccid_ops->ccid_name); -} - -struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx) -{ - struct ccid_operations *ccid_ops = ccid_by_number(id); - struct ccid *ccid = NULL; - - if (ccid_ops == NULL) - goto out; - - ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab : - ccid_ops->ccid_hc_tx_slab, gfp_any()); - if (ccid == NULL) - goto out; - ccid->ccid_ops = ccid_ops; - if (rx) { - memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size); - if (ccid->ccid_ops->ccid_hc_rx_init != NULL && - ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0) - goto out_free_ccid; - } else { - memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size); - if (ccid->ccid_ops->ccid_hc_tx_init != NULL && - ccid->ccid_ops->ccid_hc_tx_init(ccid, sk) != 0) - goto out_free_ccid; - } -out: - return ccid; -out_free_ccid: - kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab : - ccid_ops->ccid_hc_tx_slab, ccid); - ccid = NULL; - goto out; -} - -void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) -{ - if (ccid != NULL) { - if (ccid->ccid_ops->ccid_hc_rx_exit != NULL) - ccid->ccid_ops->ccid_hc_rx_exit(sk); - kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid); - } -} - -void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) -{ - if (ccid != NULL) { - if (ccid->ccid_ops->ccid_hc_tx_exit != NULL) - ccid->ccid_ops->ccid_hc_tx_exit(sk); - kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid); - } -} - -int __init ccid_initialize_builtins(void) -{ - int i, err = tfrc_lib_init(); - - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(ccids); i++) { - err = ccid_activate(ccids[i]); - if (err) - goto unwind_registrations; - } - return 0; - -unwind_registrations: - while(--i >= 0) - ccid_deactivate(ccids[i]); - tfrc_lib_exit(); - return err; -} - -void ccid_cleanup_builtins(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ccids); i++) - ccid_deactivate(ccids[i]); - tfrc_lib_exit(); -} diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h deleted file mode 100644 index 105f3734dadb..000000000000 --- a/net/dccp/ccid.h +++ /dev/null @@ -1,262 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _CCID_H -#define _CCID_H -/* - * net/dccp/ccid.h - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * - * CCID infrastructure - */ - -#include <net/sock.h> -#include <linux/compiler.h> -#include <linux/dccp.h> -#include <linux/list.h> -#include <linux/module.h> - -/* maximum value for a CCID (RFC 4340, 19.5) */ -#define CCID_MAX 255 -#define CCID_SLAB_NAME_LENGTH 32 - -struct tcp_info; - -/** - * struct ccid_operations - Interface to Congestion-Control Infrastructure - * - * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.) - * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled) - * @ccid_name: alphabetical identifier string for @ccid_id - * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection - * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket - * - * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup) - * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction) - * @ccid_hc_rx_packet_recv: implements the HC-receiver side - * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options - * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options - * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender - * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender - * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender - * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender - * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender - */ -struct ccid_operations { - unsigned char ccid_id; - __u32 ccid_ccmps; - const char *ccid_name; - struct kmem_cache *ccid_hc_rx_slab, - *ccid_hc_tx_slab; - char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH]; - char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH]; - __u32 ccid_hc_rx_obj_size, - ccid_hc_tx_obj_size; - /* Interface Routines */ - int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); - int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); - void (*ccid_hc_rx_exit)(struct sock *sk); - void (*ccid_hc_tx_exit)(struct sock *sk); - void (*ccid_hc_rx_packet_recv)(struct sock *sk, - struct sk_buff *skb); - int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt, - u8 opt, u8 *val, u8 len); - int (*ccid_hc_rx_insert_options)(struct sock *sk, - struct sk_buff *skb); - void (*ccid_hc_tx_packet_recv)(struct sock *sk, - struct sk_buff *skb); - int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt, - u8 opt, u8 *val, u8 len); - int (*ccid_hc_tx_send_packet)(struct sock *sk, - struct sk_buff *skb); - void (*ccid_hc_tx_packet_sent)(struct sock *sk, - unsigned int len); - void (*ccid_hc_rx_get_info)(struct sock *sk, - struct tcp_info *info); - void (*ccid_hc_tx_get_info)(struct sock *sk, - struct tcp_info *info); - int (*ccid_hc_rx_getsockopt)(struct sock *sk, - const int optname, int len, - u32 __user *optval, - int __user *optlen); - int (*ccid_hc_tx_getsockopt)(struct sock *sk, - const int optname, int len, - u32 __user *optval, - int __user *optlen); -}; - -extern struct ccid_operations ccid2_ops; -#ifdef CONFIG_IP_DCCP_CCID3 -extern struct ccid_operations ccid3_ops; -#endif - -int ccid_initialize_builtins(void); -void ccid_cleanup_builtins(void); - -struct ccid { - struct ccid_operations *ccid_ops; - char ccid_priv[]; -}; - -static inline void *ccid_priv(const struct ccid *ccid) -{ - return (void *)ccid->ccid_priv; -} - -bool ccid_support_check(u8 const *ccid_array, u8 array_len); -int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); -int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *, int __user *); - -struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); - -static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) -{ - struct ccid *ccid = dp->dccps_hc_rx_ccid; - - if (ccid == NULL || ccid->ccid_ops == NULL) - return -1; - return ccid->ccid_ops->ccid_id; -} - -static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) -{ - struct ccid *ccid = dp->dccps_hc_tx_ccid; - - if (ccid == NULL || ccid->ccid_ops == NULL) - return -1; - return ccid->ccid_ops->ccid_id; -} - -void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); -void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); - -/* - * Congestion control of queued data packets via CCID decision. - * - * The TX CCID performs its congestion-control by indicating whether and when a - * queued packet may be sent, using the return code of ccid_hc_tx_send_packet(). - * The following modes are supported via the symbolic constants below: - * - timer-based pacing (CCID returns a delay value in milliseconds); - * - autonomous dequeueing (CCID internally schedules dccps_xmitlet). - */ - -enum ccid_dequeueing_decision { - CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */ - CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */ - CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */ - CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */ - CCID_PACKET_ERR = 0xF0000, /* error condition */ -}; - -static inline int ccid_packet_dequeue_eval(const int return_code) -{ - if (return_code < 0) - return CCID_PACKET_ERR; - if (return_code == 0) - return CCID_PACKET_SEND_AT_ONCE; - if (return_code <= CCID_PACKET_DELAY_MAX) - return CCID_PACKET_DELAY; - return return_code; -} - -static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) - return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); - return CCID_PACKET_SEND_AT_ONCE; -} - -static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, - unsigned int len) -{ - if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) - ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len); -} - -static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_rx_packet_recv != NULL) - ccid->ccid_ops->ccid_hc_rx_packet_recv(sk, skb); -} - -static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_tx_packet_recv != NULL) - ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); -} - -/** - * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver - * @pkt: type of packet that @opt appears on (RFC 4340, 5.1) - * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3) - * @val: value of @opt - * @len: length of @val in bytes - */ -static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, - u8 pkt, u8 opt, u8 *val, u8 len) -{ - if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) - return 0; - return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); -} - -/** - * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender - * Arguments are analogous to ccid_hc_tx_parse_options() - */ -static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, - u8 pkt, u8 opt, u8 *val, u8 len) -{ - if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) - return 0; - return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); -} - -static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_rx_insert_options != NULL) - return ccid->ccid_ops->ccid_hc_rx_insert_options(sk, skb); - return 0; -} - -static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, - struct tcp_info *info) -{ - if (ccid->ccid_ops->ccid_hc_rx_get_info != NULL) - ccid->ccid_ops->ccid_hc_rx_get_info(sk, info); -} - -static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, - struct tcp_info *info) -{ - if (ccid->ccid_ops->ccid_hc_tx_get_info != NULL) - ccid->ccid_ops->ccid_hc_tx_get_info(sk, info); -} - -static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, - const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - int rc = -ENOPROTOOPT; - if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) - rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, - optval, optlen); - return rc; -} - -static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, - const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - int rc = -ENOPROTOOPT; - if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) - rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, - optval, optlen); - return rc; -} -#endif /* _CCID_H */ diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig deleted file mode 100644 index e3d388c33d25..000000000000 --- a/net/dccp/ccids/Kconfig +++ /dev/null @@ -1,55 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menu "DCCP CCIDs Configuration" - -config IP_DCCP_CCID2_DEBUG - bool "CCID-2 debugging messages" - help - Enable CCID-2 specific debugging messages. - - The debugging output can additionally be toggled by setting the - ccid2_debug parameter to 0 or 1. - - If in doubt, say N. - -config IP_DCCP_CCID3 - bool "CCID-3 (TCP-Friendly)" - default IP_DCCP = y || IP_DCCP = m - help - CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based - rate-controlled congestion control mechanism. TFRC is designed to - be reasonably fair when competing for bandwidth with TCP-like flows, - where a flow is "reasonably fair" if its sending rate is generally - within a factor of two of the sending rate of a TCP flow under the - same conditions. However, TFRC has a much lower variation of - throughput over time compared with TCP, which makes CCID-3 more - suitable than CCID-2 for applications such streaming media where a - relatively smooth sending rate is of importance. - - CCID-3 is further described in RFC 4342, - https://www.ietf.org/rfc/rfc4342.txt - - The TFRC congestion control algorithms were initially described in - RFC 5348. - - This text was extracted from RFC 4340 (sec. 10.2), - https://www.ietf.org/rfc/rfc4340.txt - - If in doubt, say N. - -config IP_DCCP_CCID3_DEBUG - bool "CCID-3 debugging messages" - depends on IP_DCCP_CCID3 - help - Enable CCID-3 specific debugging messages. - - The debugging output can additionally be toggled by setting the - ccid3_debug parameter to 0 or 1. - - If in doubt, say N. - -config IP_DCCP_TFRC_LIB - def_bool y if IP_DCCP_CCID3 - -config IP_DCCP_TFRC_DEBUG - def_bool y if IP_DCCP_CCID3_DEBUG -endmenu diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c deleted file mode 100644 index d6b30700af67..000000000000 --- a/net/dccp/ccids/ccid2.c +++ /dev/null @@ -1,794 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk> - * - * Changes to meet Linux coding standards, and DCCP infrastructure fixes. - * - * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -/* - * This implementation should follow RFC 4341 - */ -#include <linux/slab.h> -#include "../feat.h" -#include "ccid2.h" - - -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG -static bool ccid2_debug; -#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) -#else -#define ccid2_pr_debug(format, a...) -#endif - -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) -{ - struct ccid2_seq *seqp; - int i; - - /* check if we have space to preserve the pointer to the buffer */ - if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) / - sizeof(struct ccid2_seq *))) - return -ENOMEM; - - /* allocate buffer and initialize linked list */ - seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq), - gfp_any()); - if (seqp == NULL) - return -ENOMEM; - - for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) { - seqp[i].ccid2s_next = &seqp[i + 1]; - seqp[i + 1].ccid2s_prev = &seqp[i]; - } - seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp; - seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; - - /* This is the first allocation. Initiate the head and tail. */ - if (hc->tx_seqbufc == 0) - hc->tx_seqh = hc->tx_seqt = seqp; - else { - /* link the existing list with the one we just created */ - hc->tx_seqh->ccid2s_next = seqp; - seqp->ccid2s_prev = hc->tx_seqh; - - hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; - seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt; - } - - /* store the original pointer to the buffer so we can free it */ - hc->tx_seqbuf[hc->tx_seqbufc] = seqp; - hc->tx_seqbufc++; - - return 0; -} - -static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) -{ - if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) - return CCID_PACKET_WILL_DEQUEUE_LATER; - return CCID_PACKET_SEND_AT_ONCE; -} - -static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) -{ - u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); - - /* - * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from - * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always - * acceptable since this causes starvation/deadlock whenever cwnd < 2. - * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled). - */ - if (val == 0 || val > max_ratio) { - DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); - val = max_ratio; - } - dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, - min_t(u32, val, DCCPF_ACK_RATIO_MAX)); -} - -static void ccid2_check_l_ack_ratio(struct sock *sk) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - /* - * After a loss, idle period, application limited period, or RTO we - * need to check that the ack ratio is still less than the congestion - * window. Otherwise, we will send an entire congestion window of - * packets and got no response because we haven't sent ack ratio - * packets yet. - * If the ack ratio does need to be reduced, we reduce it to half of - * the congestion window (or 1 if that's zero) instead of to the - * congestion window. This prevents problems if one ack is lost. - */ - if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); -} - -static void ccid2_change_l_seq_window(struct sock *sk, u64 val) -{ - dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, - clamp_val(val, DCCPF_SEQ_WMIN, - DCCPF_SEQ_WMAX)); -} - -static void dccp_tasklet_schedule(struct sock *sk) -{ - struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet; - - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - sock_hold(sk); - __tasklet_schedule(t); - } -} - -static void ccid2_hc_tx_rto_expire(struct timer_list *t) -{ - struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer); - struct sock *sk = hc->sk; - const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5); - goto out; - } - - ccid2_pr_debug("RTO_EXPIRE\n"); - - if (sk->sk_state == DCCP_CLOSED) - goto out; - - /* back-off timer */ - hc->tx_rto <<= 1; - if (hc->tx_rto > DCCP_RTO_MAX) - hc->tx_rto = DCCP_RTO_MAX; - - /* adjust pipe, cwnd etc */ - hc->tx_ssthresh = hc->tx_cwnd / 2; - if (hc->tx_ssthresh < 2) - hc->tx_ssthresh = 2; - hc->tx_cwnd = 1; - hc->tx_pipe = 0; - - /* clear state about stuff we sent */ - hc->tx_seqt = hc->tx_seqh; - hc->tx_packets_acked = 0; - - /* clear ack ratio state. */ - hc->tx_rpseq = 0; - hc->tx_rpdupack = -1; - ccid2_change_l_ack_ratio(sk, 1); - - /* if we were blocked before, we may now send cwnd=1 packet */ - if (sender_was_blocked) - dccp_tasklet_schedule(sk); - /* restart backed-off timer */ - sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -/* - * Congestion window validation (RFC 2861). - */ -static bool ccid2_do_cwv = true; -module_param(ccid2_do_cwv, bool, 0644); -MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); - -/** - * ccid2_update_used_window - Track how much of cwnd is actually used - * @hc: socket to update window - * @new_wnd: new window values to add into the filter - * - * This is done in addition to CWV. The sender needs to have an idea of how many - * packets may be in flight, to set the local Sequence Window value accordingly - * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the - * maximum-used window. We use an EWMA low-pass filter to filter out noise. - */ -static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) -{ - hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; -} - -/* This borrows the code of tcp_cwnd_application_limited() */ -static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - /* don't reduce cwnd below the initial window (IW) */ - u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), - win_used = max(hc->tx_cwnd_used, init_win); - - if (win_used < hc->tx_cwnd) { - hc->tx_ssthresh = max(hc->tx_ssthresh, - (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); - hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; - } - hc->tx_cwnd_used = 0; - hc->tx_cwnd_stamp = now; - - ccid2_check_l_ack_ratio(sk); -} - -/* This borrows the code of tcp_cwnd_restart() */ -static void ccid2_cwnd_restart(struct sock *sk, const u32 now) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - u32 cwnd = hc->tx_cwnd, restart_cwnd, - iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); - s32 delta = now - hc->tx_lsndtime; - - hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); - - /* don't reduce cwnd below the initial window (IW) */ - restart_cwnd = min(cwnd, iwnd); - - while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) - cwnd >>= 1; - hc->tx_cwnd = max(cwnd, restart_cwnd); - hc->tx_cwnd_stamp = now; - hc->tx_cwnd_used = 0; - - ccid2_check_l_ack_ratio(sk); -} - -static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - const u32 now = ccid2_jiffies32; - struct ccid2_seq *next; - - /* slow-start after idle periods (RFC 2581, RFC 2861) */ - if (ccid2_do_cwv && !hc->tx_pipe && - (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) - ccid2_cwnd_restart(sk, now); - - hc->tx_lsndtime = now; - hc->tx_pipe += 1; - - /* see whether cwnd was fully used (RFC 2861), update expected window */ - if (ccid2_cwnd_network_limited(hc)) { - ccid2_update_used_window(hc, hc->tx_cwnd); - hc->tx_cwnd_used = 0; - hc->tx_cwnd_stamp = now; - } else { - if (hc->tx_pipe > hc->tx_cwnd_used) - hc->tx_cwnd_used = hc->tx_pipe; - - ccid2_update_used_window(hc, hc->tx_cwnd_used); - - if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) - ccid2_cwnd_application_limited(sk, now); - } - - hc->tx_seqh->ccid2s_seq = dp->dccps_gss; - hc->tx_seqh->ccid2s_acked = 0; - hc->tx_seqh->ccid2s_sent = now; - - next = hc->tx_seqh->ccid2s_next; - /* check if we need to alloc more space */ - if (next == hc->tx_seqt) { - if (ccid2_hc_tx_alloc_seq(hc)) { - DCCP_CRIT("packet history - out of memory!"); - /* FIXME: find a more graceful way to bail out */ - return; - } - next = hc->tx_seqh->ccid2s_next; - BUG_ON(next == hc->tx_seqt); - } - hc->tx_seqh = next; - - ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe); - - /* - * FIXME: The code below is broken and the variables have been removed - * from the socket struct. The `ackloss' variable was always set to 0, - * and with arsent there are several problems: - * (i) it doesn't just count the number of Acks, but all sent packets; - * (ii) it is expressed in # of packets, not # of windows, so the - * comparison below uses the wrong formula: Appendix A of RFC 4341 - * comes up with the number K = cwnd / (R^2 - R) of consecutive windows - * of data with no lost or marked Ack packets. If arsent were the # of - * consecutive Acks received without loss, then Ack Ratio needs to be - * decreased by 1 when - * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2) - * where cwnd / R is the number of Acks received per window of data - * (cf. RFC 4341, App. A). The problems are that - * - arsent counts other packets as well; - * - the comparison uses a formula different from RFC 4341; - * - computing a cubic/quadratic equation each time is too complicated. - * Hence a different algorithm is needed. - */ -#if 0 - /* Ack Ratio. Need to maintain a concept of how many windows we sent */ - hc->tx_arsent++; - /* We had an ack loss in this window... */ - if (hc->tx_ackloss) { - if (hc->tx_arsent >= hc->tx_cwnd) { - hc->tx_arsent = 0; - hc->tx_ackloss = 0; - } - } else { - /* No acks lost up to now... */ - /* decrease ack ratio if enough packets were sent */ - if (dp->dccps_l_ack_ratio > 1) { - /* XXX don't calculate denominator each time */ - int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - - dp->dccps_l_ack_ratio; - - denom = hc->tx_cwnd * hc->tx_cwnd / denom; - - if (hc->tx_arsent >= denom) { - ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); - hc->tx_arsent = 0; - } - } else { - /* we can't increase ack ratio further [1] */ - hc->tx_arsent = 0; /* or maybe set it to cwnd*/ - } - } -#endif - - sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); - -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG - do { - struct ccid2_seq *seqp = hc->tx_seqt; - - while (seqp != hc->tx_seqh) { - ccid2_pr_debug("out seq=%llu acked=%d time=%u\n", - (unsigned long long)seqp->ccid2s_seq, - seqp->ccid2s_acked, seqp->ccid2s_sent); - seqp = seqp->ccid2s_next; - } - } while (0); - ccid2_pr_debug("=========\n"); -#endif -} - -/** - * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm - * @sk: socket to perform estimator on - * @mrtt: measured RTT - * - * This code is almost identical with TCP's tcp_rtt_estimator(), since - * - it has a higher sampling frequency (recommended by RFC 1323), - * - the RTO does not collapse into RTT due to RTTVAR going towards zero, - * - it is simple (cf. more complex proposals such as Eifel timer or research - * which suggests that the gain should be set according to window size), - * - in tests it was found to work well with CCID2 [gerrit]. - */ -static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - long m = mrtt ? : 1; - - if (hc->tx_srtt == 0) { - /* First measurement m */ - hc->tx_srtt = m << 3; - hc->tx_mdev = m << 1; - - hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk)); - hc->tx_rttvar = hc->tx_mdev_max; - - hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; - } else { - /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ - m -= (hc->tx_srtt >> 3); - hc->tx_srtt += m; - - /* Similarly, update scaled mdev with regard to |m| */ - if (m < 0) { - m = -m; - m -= (hc->tx_mdev >> 2); - /* - * This neutralises RTO increase when RTT < SRTT - mdev - * (see P. Sarolahti, A. Kuznetsov,"Congestion Control - * in Linux TCP", USENIX 2002, pp. 49-62). - */ - if (m > 0) - m >>= 3; - } else { - m -= (hc->tx_mdev >> 2); - } - hc->tx_mdev += m; - - if (hc->tx_mdev > hc->tx_mdev_max) { - hc->tx_mdev_max = hc->tx_mdev; - if (hc->tx_mdev_max > hc->tx_rttvar) - hc->tx_rttvar = hc->tx_mdev_max; - } - - /* - * Decay RTTVAR at most once per flight, exploiting that - * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) - * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) - * GAR is a useful bound for FlightSize = pipe. - * AWL is probably too low here, as it over-estimates pipe. - */ - if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) { - if (hc->tx_mdev_max < hc->tx_rttvar) - hc->tx_rttvar -= (hc->tx_rttvar - - hc->tx_mdev_max) >> 2; - hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; - hc->tx_mdev_max = tcp_rto_min(sk); - } - } - - /* - * Set RTO from SRTT and RTTVAR - * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms. - * This agrees with RFC 4341, 5: - * "Because DCCP does not retransmit data, DCCP does not require - * TCP's recommended minimum timeout of one second". - */ - hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar; - - if (hc->tx_rto > DCCP_RTO_MAX) - hc->tx_rto = DCCP_RTO_MAX; -} - -static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, - unsigned int *maxincr) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; - - if (hc->tx_cwnd < dp->dccps_l_seq_win && - r_seq_used < dp->dccps_r_seq_win) { - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { - hc->tx_cwnd += 1; - *maxincr -= 1; - hc->tx_packets_acked = 0; - } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; - } - } - - /* - * Adjust the local sequence window and the ack ratio to allow about - * 5 times the number of packets in the network (RFC 4340 7.5.2) - */ - if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) - ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); - else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) - ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); - - if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) - ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); - else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) - ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); - - /* - * FIXME: RTT is sampled several times per acknowledgment (for each - * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). - * This causes the RTT to be over-estimated, since the older entries - * in the Ack Vector have earlier sending times. - * The cleanest solution is to not use the ccid2s_sent field at all - * and instead use DCCP timestamps: requires changes in other places. - */ - ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent); -} - -static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) { - ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); - return; - } - - hc->tx_last_cong = ccid2_jiffies32; - - hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; - hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - - ccid2_check_l_ack_ratio(sk); -} - -static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, - u8 option, u8 *optval, u8 optlen) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - switch (option) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, - option - DCCPO_ACK_VECTOR_0); - } - return 0; -} - -static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); - struct dccp_ackvec_parsed *avp; - u64 ackno, seqno; - struct ccid2_seq *seqp; - int done = 0; - unsigned int maxincr = 0; - - /* check reverse path congestion */ - seqno = DCCP_SKB_CB(skb)->dccpd_seq; - - /* XXX this whole "algorithm" is broken. Need to fix it to keep track - * of the seqnos of the dupacks so that rpseq and rpdupack are correct - * -sorbo. - */ - /* need to bootstrap */ - if (hc->tx_rpdupack == -1) { - hc->tx_rpdupack = 0; - hc->tx_rpseq = seqno; - } else { - /* check if packet is consecutive */ - if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1) - hc->tx_rpseq = seqno; - /* it's a later packet */ - else if (after48(seqno, hc->tx_rpseq)) { - hc->tx_rpdupack++; - - /* check if we got enough dupacks */ - if (hc->tx_rpdupack >= NUMDUPACK) { - hc->tx_rpdupack = -1; /* XXX lame */ - hc->tx_rpseq = 0; -#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ - /* - * FIXME: Ack Congestion Control is broken; in - * the current state instabilities occurred with - * Ack Ratios greater than 1; causing hang-ups - * and long RTO timeouts. This needs to be fixed - * before opening up dynamic changes. -- gerrit - */ - ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); -#endif - } - } - } - - /* check forward path congestion */ - if (dccp_packet_without_ack(skb)) - return; - - /* still didn't send out new data packets */ - if (hc->tx_seqh == hc->tx_seqt) - goto done; - - ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - if (after48(ackno, hc->tx_high_ack)) - hc->tx_high_ack = ackno; - - seqp = hc->tx_seqt; - while (before48(seqp->ccid2s_seq, ackno)) { - seqp = seqp->ccid2s_next; - if (seqp == hc->tx_seqh) { - seqp = hc->tx_seqh->ccid2s_prev; - break; - } - } - - /* - * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2 - * packets per acknowledgement. Rounding up avoids that cwnd is not - * advanced when Ack Ratio is 1 and gives a slight edge otherwise. - */ - if (hc->tx_cwnd < hc->tx_ssthresh) - maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); - - /* go through all ack vectors */ - list_for_each_entry(avp, &hc->tx_av_chunks, node) { - /* go through this ack vector */ - for (; avp->len--; avp->vec++) { - u64 ackno_end_rl = SUB48(ackno, - dccp_ackvec_runlen(avp->vec)); - - ccid2_pr_debug("ackvec %llu |%u,%u|\n", - (unsigned long long)ackno, - dccp_ackvec_state(avp->vec) >> 6, - dccp_ackvec_runlen(avp->vec)); - /* if the seqno we are analyzing is larger than the - * current ackno, then move towards the tail of our - * seqnos. - */ - while (after48(seqp->ccid2s_seq, ackno)) { - if (seqp == hc->tx_seqt) { - done = 1; - break; - } - seqp = seqp->ccid2s_prev; - } - if (done) - break; - - /* check all seqnos in the range of the vector - * run length - */ - while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = dccp_ackvec_state(avp->vec); - - /* new packet received or marked */ - if (state != DCCPAV_NOT_RECEIVED && - !seqp->ccid2s_acked) { - if (state == DCCPAV_ECN_MARKED) - ccid2_congestion_event(sk, - seqp); - else - ccid2_new_ack(sk, seqp, - &maxincr); - - seqp->ccid2s_acked = 1; - ccid2_pr_debug("Got ack for %llu\n", - (unsigned long long)seqp->ccid2s_seq); - hc->tx_pipe--; - } - if (seqp == hc->tx_seqt) { - done = 1; - break; - } - seqp = seqp->ccid2s_prev; - } - if (done) - break; - - ackno = SUB48(ackno_end_rl, 1); - } - if (done) - break; - } - - /* The state about what is acked should be correct now - * Check for NUMDUPACK - */ - seqp = hc->tx_seqt; - while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) { - seqp = seqp->ccid2s_next; - if (seqp == hc->tx_seqh) { - seqp = hc->tx_seqh->ccid2s_prev; - break; - } - } - done = 0; - while (1) { - if (seqp->ccid2s_acked) { - done++; - if (done == NUMDUPACK) - break; - } - if (seqp == hc->tx_seqt) - break; - seqp = seqp->ccid2s_prev; - } - - /* If there are at least 3 acknowledgements, anything unacknowledged - * below the last sequence number is considered lost - */ - if (done == NUMDUPACK) { - struct ccid2_seq *last_acked = seqp; - - /* check for lost packets */ - while (1) { - if (!seqp->ccid2s_acked) { - ccid2_pr_debug("Packet lost: %llu\n", - (unsigned long long)seqp->ccid2s_seq); - /* XXX need to traverse from tail -> head in - * order to detect multiple congestion events in - * one ack vector. - */ - ccid2_congestion_event(sk, seqp); - hc->tx_pipe--; - } - if (seqp == hc->tx_seqt) - break; - seqp = seqp->ccid2s_prev; - } - - hc->tx_seqt = last_acked; - } - - /* trim acked packets in tail */ - while (hc->tx_seqt != hc->tx_seqh) { - if (!hc->tx_seqt->ccid2s_acked) - break; - - hc->tx_seqt = hc->tx_seqt->ccid2s_next; - } - - /* restart RTO timer if not all outstanding data has been acked */ - if (hc->tx_pipe == 0) - sk_stop_timer(sk, &hc->tx_rtotimer); - else - sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); -done: - /* check if incoming Acks allow pending packets to be sent */ - if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) - dccp_tasklet_schedule(sk); - dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); -} - -static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) -{ - struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); - struct dccp_sock *dp = dccp_sk(sk); - u32 max_ratio; - - /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ - hc->tx_ssthresh = ~0U; - - /* Use larger initial windows (RFC 4341, section 5). */ - hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); - hc->tx_expected_wnd = hc->tx_cwnd; - - /* Make sure that Ack Ratio is enabled and within bounds. */ - max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); - if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) - dp->dccps_l_ack_ratio = max_ratio; - - /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hc)) - return -ENOMEM; - - hc->tx_rto = DCCP_TIMEOUT_INIT; - hc->tx_rpdupack = -1; - hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; - hc->tx_cwnd_used = 0; - hc->sk = sk; - timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0); - INIT_LIST_HEAD(&hc->tx_av_chunks); - return 0; -} - -static void ccid2_hc_tx_exit(struct sock *sk) -{ - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - int i; - - sk_stop_timer(sk, &hc->tx_rtotimer); - - for (i = 0; i < hc->tx_seqbufc; i++) - kfree(hc->tx_seqbuf[i]); - hc->tx_seqbufc = 0; - dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); -} - -static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) -{ - struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); - - if (!dccp_data_packet(skb)) - return; - - if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) { - dccp_send_ack(sk); - hc->rx_num_data_pkts = 0; - } -} - -struct ccid_operations ccid2_ops = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, -}; - -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, bool, 0644); -MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages"); -#endif diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h deleted file mode 100644 index 330c7b4ec001..000000000000 --- a/net/dccp/ccids/ccid2.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> - */ -#ifndef _DCCP_CCID2_H_ -#define _DCCP_CCID2_H_ - -#include <linux/timer.h> -#include <linux/types.h> -#include "../ccid.h" -#include "../dccp.h" - -/* - * CCID-2 timestamping faces the same issues as TCP timestamping. - * Hence we reuse/share as much of the code as possible. - */ -#define ccid2_jiffies32 ((u32)jiffies) - -/* NUMDUPACK parameter from RFC 4341, p. 6 */ -#define NUMDUPACK 3 - -struct ccid2_seq { - u64 ccid2s_seq; - u32 ccid2s_sent; - int ccid2s_acked; - struct ccid2_seq *ccid2s_prev; - struct ccid2_seq *ccid2s_next; -}; - -#define CCID2_SEQBUF_LEN 1024 -#define CCID2_SEQBUF_MAX 128 - -/* - * Multiple of congestion window to keep the sequence window at - * (RFC 4340 7.5.2) - */ -#define CCID2_WIN_CHANGE_FACTOR 5 - -/** - * struct ccid2_hc_tx_sock - CCID2 TX half connection - * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 - * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) - * @tx_srtt: smoothed RTT estimate, scaled by 2^3 - * @tx_mdev: smoothed RTT variation, scaled by 2^2 - * @tx_mdev_max: maximum of @mdev during one flight - * @tx_rttvar: moving average/maximum of @mdev_max - * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) - * @tx_rtt_seq: to decay RTTVAR at most once per flight - * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861 - * @tx_expected_wnd: moving average of @tx_cwnd_used - * @tx_cwnd_stamp: to track idle periods in CWV - * @tx_lsndtime: last time (in jiffies) a data packet was sent - * @tx_rpseq: last consecutive seqno - * @tx_rpdupack: dupacks since rpseq - * @tx_av_chunks: list of Ack Vectors received on current skb - */ -struct ccid2_hc_tx_sock { - u32 tx_cwnd; - u32 tx_ssthresh; - u32 tx_pipe; - u32 tx_packets_acked; - struct ccid2_seq *tx_seqbuf[CCID2_SEQBUF_MAX]; - int tx_seqbufc; - struct ccid2_seq *tx_seqh; - struct ccid2_seq *tx_seqt; - - /* RTT measurement: variables/principles are the same as in TCP */ - u32 tx_srtt, - tx_mdev, - tx_mdev_max, - tx_rttvar, - tx_rto; - u64 tx_rtt_seq:48; - struct timer_list tx_rtotimer; - struct sock *sk; - - /* Congestion Window validation (optional, RFC 2861) */ - u32 tx_cwnd_used, - tx_expected_wnd, - tx_cwnd_stamp, - tx_lsndtime; - - u64 tx_rpseq; - int tx_rpdupack; - u32 tx_last_cong; - u64 tx_high_ack; - struct list_head tx_av_chunks; -}; - -static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) -{ - return hc->tx_pipe >= hc->tx_cwnd; -} - -/* - * Convert RFC 3390 larger initial window into an equivalent number of packets. - * This is based on the numbers specified in RFC 5681, 3.1. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 smss) -{ - return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); -} - -/** - * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection - * @rx_num_data_pkts: number of data packets received since last feedback - */ -struct ccid2_hc_rx_sock { - u32 rx_num_data_pkts; -}; - -static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) -{ - return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); -} - -static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk) -{ - return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); -} -#endif /* _DCCP_CCID2_H_ */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c deleted file mode 100644 index f349d16dd8f6..000000000000 --- a/net/dccp/ccids/ccid3.c +++ /dev/null @@ -1,866 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> - * - * An implementation of the DCCP protocol - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see https://www.wand.net.nz/ - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ -#include "../dccp.h" -#include "ccid3.h" - -#include <linux/unaligned.h> - -#ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static bool ccid3_debug; -#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) -#else -#define ccid3_pr_debug(format, a...) -#endif - -/* - * Transmitter Half-Connection Routines - */ -#ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) -{ - static const char *const ccid3_state_names[] = { - [TFRC_SSTATE_NO_SENT] = "NO_SENT", - [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", - [TFRC_SSTATE_FBACK] = "FBACK", - }; - - return ccid3_state_names[state]; -} -#endif - -static void ccid3_hc_tx_set_state(struct sock *sk, - enum ccid3_hc_tx_states state) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - enum ccid3_hc_tx_states oldstate = hc->tx_state; - - ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_tx_state_name(oldstate), - ccid3_tx_state_name(state)); - WARN_ON(state == oldstate); - hc->tx_state = state; -} - -/* - * Compute the initial sending rate X_init in the manner of RFC 3390: - * - * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT - * - * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis - * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. - * For consistency with other parts of the code, X_init is scaled by 2^6. - */ -static inline u64 rfc3390_initial_rate(struct sock *sk) -{ - const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - const __u32 w_init = clamp_t(__u32, 4380U, 2 * hc->tx_s, 4 * hc->tx_s); - - return scaled_div(w_init << 6, hc->tx_rtt); -} - -/** - * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst - * @hc: socket to have the send interval updated - * - * This respects the granularity of X_inst (64 * bytes/second). - */ -static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) -{ - hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); - - DCCP_BUG_ON(hc->tx_t_ipi == 0); - ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, - hc->tx_s, (unsigned int)(hc->tx_x >> 6)); -} - -static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) -{ - u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count); - - return delta / hc->tx_rtt; -} - -/** - * ccid3_hc_tx_update_x - Update allowed sending rate X - * @sk: socket to be updated - * @stamp: most recent time if available - can be left NULL. - * - * This function tracks draft rfc3448bis, check there for latest details. - * - * Note: X and X_recv are both stored in units of 64 * bytes/second, to support - * fine-grained resolution of sending rates. This requires scaling by 2^6 - * throughout the code. Only X_calc is unscaled (in bytes/second). - * - */ -static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - __u64 min_rate = 2 * hc->tx_x_recv; - const __u64 old_x = hc->tx_x; - ktime_t now = stamp ? *stamp : ktime_get_real(); - - /* - * Handle IDLE periods: do not reduce below RFC3390 initial sending rate - * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis: - * a sender is idle if it has not sent anything over a 2-RTT-period. - * For consistency with X and X_recv, min_rate is also scaled by 2^6. - */ - if (ccid3_hc_tx_idle_rtt(hc, now) >= 2) { - min_rate = rfc3390_initial_rate(sk); - min_rate = max(min_rate, 2 * hc->tx_x_recv); - } - - if (hc->tx_p > 0) { - - hc->tx_x = min(((__u64)hc->tx_x_calc) << 6, min_rate); - hc->tx_x = max(hc->tx_x, (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); - - } else if (ktime_us_delta(now, hc->tx_t_ld) - (s64)hc->tx_rtt >= 0) { - - hc->tx_x = min(2 * hc->tx_x, min_rate); - hc->tx_x = max(hc->tx_x, - scaled_div(((__u64)hc->tx_s) << 6, hc->tx_rtt)); - hc->tx_t_ld = now; - } - - if (hc->tx_x != old_x) { - ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " - "X_recv=%u\n", (unsigned int)(old_x >> 6), - (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc, - (unsigned int)(hc->tx_x_recv >> 6)); - - ccid3_update_send_interval(hc); - } -} - -/** - * ccid3_hc_tx_update_s - Track the mean packet size `s' - * @hc: socket to be updated - * @len: DCCP packet payload size in bytes - * - * cf. RFC 4342, 5.3 and RFC 3448, 4.1 - */ -static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len) -{ - const u16 old_s = hc->tx_s; - - hc->tx_s = tfrc_ewma(hc->tx_s, len, 9); - - if (hc->tx_s != old_s) - ccid3_update_send_interval(hc); -} - -/* - * Update Window Counter using the algorithm from [RFC 4342, 8.1]. - * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). - */ -static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc, - ktime_t now) -{ - u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count), - quarter_rtts = (4 * delta) / hc->tx_rtt; - - if (quarter_rtts > 0) { - hc->tx_t_last_win_count = now; - hc->tx_last_win_count += min(quarter_rtts, 5U); - hc->tx_last_win_count &= 0xF; /* mod 16 */ - } -} - -static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t) -{ - struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer); - struct sock *sk = hc->sk; - unsigned long t_nfb = USEC_PER_SEC / 5; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - /* XXX: set some sensible MIB */ - goto restart_timer; - } - - ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, - ccid3_tx_state_name(hc->tx_state)); - - /* Ignore and do not restart after leaving the established state */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) - goto out; - - /* Reset feedback state to "no feedback received" */ - if (hc->tx_state == TFRC_SSTATE_FBACK) - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - - /* - * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 - * RTO is 0 if and only if no feedback has been received yet. - */ - if (hc->tx_t_rto == 0 || hc->tx_p == 0) { - - /* halve send rate directly */ - hc->tx_x = max(hc->tx_x / 2, - (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); - ccid3_update_send_interval(hc); - } else { - /* - * Modify the cached value of X_recv - * - * If (X_calc > 2 * X_recv) - * X_recv = max(X_recv / 2, s / (2 * t_mbi)); - * Else - * X_recv = X_calc / 4; - * - * Note that X_recv is scaled by 2^6 while X_calc is not - */ - if (hc->tx_x_calc > (hc->tx_x_recv >> 5)) - hc->tx_x_recv = - max(hc->tx_x_recv / 2, - (((__u64)hc->tx_s) << 6) / (2*TFRC_T_MBI)); - else { - hc->tx_x_recv = hc->tx_x_calc; - hc->tx_x_recv <<= 4; - } - ccid3_hc_tx_update_x(sk, NULL); - } - ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", - (unsigned long long)hc->tx_x); - - /* - * Set new timeout for the nofeedback timer. - * See comments in packet_recv() regarding the value of t_RTO. - */ - if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */ - t_nfb = TFRC_INITIAL_TIMEOUT; - else - t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); - -restart_timer: - sk_reset_timer(sk, &hc->tx_no_feedback_timer, - jiffies + usecs_to_jiffies(t_nfb)); -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -/** - * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets - * @sk: socket to send packet from - * @skb: next packet candidate to send on @sk - * - * This function uses the convention of ccid_packet_dequeue_eval() and - * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. - */ -static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - ktime_t now = ktime_get_real(); - s64 delay; - - /* - * This function is called only for Data and DataAck packets. Sending - * zero-sized Data(Ack)s is theoretically possible, but for congestion - * control this case is pathological - ignore it. - */ - if (unlikely(skb->len == 0)) - return -EBADMSG; - - if (hc->tx_state == TFRC_SSTATE_NO_SENT) { - sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + - usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); - hc->tx_last_win_count = 0; - hc->tx_t_last_win_count = now; - - /* Set t_0 for initial packet */ - hc->tx_t_nom = now; - - hc->tx_s = skb->len; - - /* - * Use initial RTT sample when available: recommended by erratum - * to RFC 4342. This implements the initialisation procedure of - * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6. - */ - if (dp->dccps_syn_rtt) { - ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); - hc->tx_rtt = dp->dccps_syn_rtt; - hc->tx_x = rfc3390_initial_rate(sk); - hc->tx_t_ld = now; - } else { - /* - * Sender does not have RTT sample: - * - set fallback RTT (RFC 4340, 3.4) since a RTT value - * is needed in several parts (e.g. window counter); - * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. - */ - hc->tx_rtt = DCCP_FALLBACK_RTT; - hc->tx_x = hc->tx_s; - hc->tx_x <<= 6; - } - ccid3_update_send_interval(hc); - - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - - } else { - delay = ktime_us_delta(hc->tx_t_nom, now); - ccid3_pr_debug("delay=%ld\n", (long)delay); - /* - * Scheduling of packet transmissions (RFC 5348, 8.3) - * - * if (t_now > t_nom - delta) - * // send the packet now - * else - * // send the packet in (t_nom - t_now) milliseconds. - */ - if (delay >= TFRC_T_DELTA) - return (u32)delay / USEC_PER_MSEC; - - ccid3_hc_tx_update_win_count(hc, now); - } - - /* prepare to send now (add options etc.) */ - dp->dccps_hc_tx_insert_options = 1; - DCCP_SKB_CB(skb)->dccpd_ccval = hc->tx_last_win_count; - - /* set the nominal send time for the next following packet */ - hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); - return CCID_PACKET_SEND_AT_ONCE; -} - -static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - - ccid3_hc_tx_update_s(hc, len); - - if (tfrc_tx_hist_add(&hc->tx_hist, dccp_sk(sk)->dccps_gss)) - DCCP_CRIT("packet history - out of memory!"); -} - -static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - struct tfrc_tx_hist_entry *acked; - ktime_t now; - unsigned long t_nfb; - u32 r_sample; - - /* we are only interested in ACKs */ - if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || - DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) - return; - /* - * Locate the acknowledged packet in the TX history. - * - * Returning "entry not found" here can for instance happen when - * - the host has not sent out anything (e.g. a passive server), - * - the Ack is outdated (packet with higher Ack number was received), - * - it is a bogus Ack (for a packet not sent on this connection). - */ - acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb)); - if (acked == NULL) - return; - /* For the sake of RTT sampling, ignore/remove all older entries */ - tfrc_tx_hist_purge(&acked->next); - - /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ - now = ktime_get_real(); - r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); - hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); - - /* - * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 - */ - if (hc->tx_state == TFRC_SSTATE_NO_FBACK) { - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - - if (hc->tx_t_rto == 0) { - /* - * Initial feedback packet: Larger Initial Windows (4.2) - */ - hc->tx_x = rfc3390_initial_rate(sk); - hc->tx_t_ld = now; - - ccid3_update_send_interval(hc); - - goto done_computing_x; - } else if (hc->tx_p == 0) { - /* - * First feedback after nofeedback timer expiry (4.3) - */ - goto done_computing_x; - } - } - - /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ - if (hc->tx_p > 0) - hc->tx_x_calc = tfrc_calc_x(hc->tx_s, hc->tx_rtt, hc->tx_p); - ccid3_hc_tx_update_x(sk, &now); - -done_computing_x: - ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " - "p=%u, X_calc=%u, X_recv=%u, X=%u\n", - dccp_role(sk), sk, hc->tx_rtt, r_sample, - hc->tx_s, hc->tx_p, hc->tx_x_calc, - (unsigned int)(hc->tx_x_recv >> 6), - (unsigned int)(hc->tx_x >> 6)); - - /* unschedule no feedback timer */ - sk_stop_timer(sk, &hc->tx_no_feedback_timer); - - /* - * As we have calculated new ipi, delta, t_nom it is possible - * that we now can send a packet, so wake up dccp_wait_for_ccid - */ - sk->sk_write_space(sk); - - /* - * Update timeout interval for the nofeedback timer. In order to control - * rate halving on networks with very low RTTs (<= 1 ms), use per-route - * tunable RTAX_RTO_MIN value as the lower bound. - */ - hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, - USEC_PER_SEC/HZ * tcp_rto_min(sk)); - /* - * Schedule no feedback timer to expire in - * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) - */ - t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); - - ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " - "expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); - - sk_reset_timer(sk, &hc->tx_no_feedback_timer, - jiffies + usecs_to_jiffies(t_nfb)); -} - -static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, - u8 option, u8 *optval, u8 optlen) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - __be32 opt_val; - - switch (option) { - case TFRC_OPT_RECEIVE_RATE: - case TFRC_OPT_LOSS_EVENT_RATE: - /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ - if (packet_type == DCCP_PKT_DATA) - break; - if (unlikely(optlen != 4)) { - DCCP_WARN("%s(%p), invalid len %d for %u\n", - dccp_role(sk), sk, optlen, option); - return -EINVAL; - } - opt_val = ntohl(get_unaligned((__be32 *)optval)); - - if (option == TFRC_OPT_RECEIVE_RATE) { - /* Receive Rate is kept in units of 64 bytes/second */ - hc->tx_x_recv = opt_val; - hc->tx_x_recv <<= 6; - - ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", - dccp_role(sk), sk, opt_val); - } else { - /* Update the fixpoint Loss Event Rate fraction */ - hc->tx_p = tfrc_invert_loss_event_rate(opt_val); - - ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", - dccp_role(sk), sk, opt_val); - } - } - return 0; -} - -static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) -{ - struct ccid3_hc_tx_sock *hc = ccid_priv(ccid); - - hc->tx_state = TFRC_SSTATE_NO_SENT; - hc->tx_hist = NULL; - hc->sk = sk; - timer_setup(&hc->tx_no_feedback_timer, - ccid3_hc_tx_no_feedback_timer, 0); - return 0; -} - -static void ccid3_hc_tx_exit(struct sock *sk) -{ - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - - sk_stop_timer(sk, &hc->tx_no_feedback_timer); - tfrc_tx_hist_purge(&hc->tx_hist); -} - -static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) -{ - info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto; - info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt; -} - -static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - struct tfrc_tx_info tfrc; - const void *val; - - switch (optname) { - case DCCP_SOCKOPT_CCID_TX_INFO: - if (len < sizeof(tfrc)) - return -EINVAL; - memset(&tfrc, 0, sizeof(tfrc)); - tfrc.tfrctx_x = hc->tx_x; - tfrc.tfrctx_x_recv = hc->tx_x_recv; - tfrc.tfrctx_x_calc = hc->tx_x_calc; - tfrc.tfrctx_rtt = hc->tx_rtt; - tfrc.tfrctx_p = hc->tx_p; - tfrc.tfrctx_rto = hc->tx_t_rto; - tfrc.tfrctx_ipi = hc->tx_t_ipi; - len = sizeof(tfrc); - val = &tfrc; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen) || copy_to_user(optval, val, len)) - return -EFAULT; - - return 0; -} - -/* - * Receiver Half-Connection Routines - */ - -/* CCID3 feedback types */ -enum ccid3_fback_type { - CCID3_FBACK_NONE = 0, - CCID3_FBACK_INITIAL, - CCID3_FBACK_PERIODIC, - CCID3_FBACK_PARAM_CHANGE -}; - -#ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) -{ - static const char *const ccid3_rx_state_names[] = { - [TFRC_RSTATE_NO_DATA] = "NO_DATA", - [TFRC_RSTATE_DATA] = "DATA", - }; - - return ccid3_rx_state_names[state]; -} -#endif - -static void ccid3_hc_rx_set_state(struct sock *sk, - enum ccid3_hc_rx_states state) -{ - struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - enum ccid3_hc_rx_states oldstate = hc->rx_state; - - ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", - dccp_role(sk), sk, ccid3_rx_state_name(oldstate), - ccid3_rx_state_name(state)); - WARN_ON(state == oldstate); - hc->rx_state = state; -} - -static void ccid3_hc_rx_send_feedback(struct sock *sk, - const struct sk_buff *skb, - enum ccid3_fback_type fbtype) -{ - struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - ktime_t now = ktime_get(); - s64 delta = 0; - - switch (fbtype) { - case CCID3_FBACK_INITIAL: - hc->rx_x_recv = 0; - hc->rx_pinv = ~0U; /* see RFC 4342, 8.5 */ - break; - case CCID3_FBACK_PARAM_CHANGE: - /* - * When parameters change (new loss or p > p_prev), we do not - * have a reliable estimate for R_m of [RFC 3448, 6.2] and so - * need to reuse the previous value of X_recv. However, when - * X_recv was 0 (due to early loss), this would kill X down to - * s/t_mbi (i.e. one packet in 64 seconds). - * To avoid such drastic reduction, we approximate X_recv as - * the number of bytes since last feedback. - * This is a safe fallback, since X is bounded above by X_calc. - */ - if (hc->rx_x_recv > 0) - break; - fallthrough; - case CCID3_FBACK_PERIODIC: - delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); - if (delta <= 0) - delta = 1; - hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); - break; - default: - return; - } - - ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta, - hc->rx_x_recv, hc->rx_pinv); - - hc->rx_tstamp_last_feedback = now; - hc->rx_last_counter = dccp_hdr(skb)->dccph_ccval; - hc->rx_bytes_recv = 0; - - dp->dccps_hc_rx_insert_options = 1; - dccp_send_ack(sk); -} - -static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) -{ - const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - __be32 x_recv, pinv; - - if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) - return 0; - - if (dccp_packet_without_ack(skb)) - return 0; - - x_recv = htonl(hc->rx_x_recv); - pinv = htonl(hc->rx_pinv); - - if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE, - &pinv, sizeof(pinv)) || - dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE, - &x_recv, sizeof(x_recv))) - return -1; - - return 0; -} - -/** - * ccid3_first_li - Implements [RFC 5348, 6.3.1] - * @sk: socket to calculate loss interval for - * - * Determine the length of the first loss interval via inverse lookup. - * Assume that X_recv can be computed by the throughput equation - * s - * X_recv = -------- - * R * fval - * Find some p such that f(p) = fval; return 1/p (scaled). - */ -static u32 ccid3_first_li(struct sock *sk) -{ - struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - u32 x_recv, p; - s64 delta; - u64 fval; - - if (hc->rx_rtt == 0) { - DCCP_WARN("No RTT estimate available, using fallback RTT\n"); - hc->rx_rtt = DCCP_FALLBACK_RTT; - } - - delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback); - if (delta <= 0) - delta = 1; - x_recv = scaled_div32(hc->rx_bytes_recv, delta); - if (x_recv == 0) { /* would also trigger divide-by-zero */ - DCCP_WARN("X_recv==0\n"); - if (hc->rx_x_recv == 0) { - DCCP_BUG("stored value of X_recv is zero"); - return ~0U; - } - x_recv = hc->rx_x_recv; - } - - fval = scaled_div(hc->rx_s, hc->rx_rtt); - fval = scaled_div32(fval, x_recv); - p = tfrc_calc_x_reverse_lookup(fval); - - ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " - "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); - - return p == 0 ? ~0U : scaled_div(1, p); -} - -static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) -{ - struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; - const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; - const bool is_data_packet = dccp_data_packet(skb); - - if (unlikely(hc->rx_state == TFRC_RSTATE_NO_DATA)) { - if (is_data_packet) { - const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; - do_feedback = CCID3_FBACK_INITIAL; - ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); - hc->rx_s = payload; - /* - * Not necessary to update rx_bytes_recv here, - * since X_recv = 0 for the first feedback packet (cf. - * RFC 3448, 6.3) -- gerrit - */ - } - goto update_records; - } - - if (tfrc_rx_hist_duplicate(&hc->rx_hist, skb)) - return; /* done receiving */ - - if (is_data_packet) { - const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; - /* - * Update moving-average of s and the sum of received payload bytes - */ - hc->rx_s = tfrc_ewma(hc->rx_s, payload, 9); - hc->rx_bytes_recv += payload; - } - - /* - * Perform loss detection and handle pending losses - */ - if (tfrc_rx_handle_loss(&hc->rx_hist, &hc->rx_li_hist, - skb, ndp, ccid3_first_li, sk)) { - do_feedback = CCID3_FBACK_PARAM_CHANGE; - goto done_receiving; - } - - if (tfrc_rx_hist_loss_pending(&hc->rx_hist)) - return; /* done receiving */ - - /* - * Handle data packets: RTT sampling and monitoring p - */ - if (unlikely(!is_data_packet)) - goto update_records; - - if (!tfrc_lh_is_initialised(&hc->rx_li_hist)) { - const u32 sample = tfrc_rx_hist_sample_rtt(&hc->rx_hist, skb); - /* - * Empty loss history: no loss so far, hence p stays 0. - * Sample RTT values, since an RTT estimate is required for the - * computation of p when the first loss occurs; RFC 3448, 6.3.1. - */ - if (sample != 0) - hc->rx_rtt = tfrc_ewma(hc->rx_rtt, sample, 9); - - } else if (tfrc_lh_update_i_mean(&hc->rx_li_hist, skb)) { - /* - * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean - * has decreased (resp. p has increased), send feedback now. - */ - do_feedback = CCID3_FBACK_PARAM_CHANGE; - } - - /* - * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 - */ - if (SUB16(dccp_hdr(skb)->dccph_ccval, hc->rx_last_counter) > 3) - do_feedback = CCID3_FBACK_PERIODIC; - -update_records: - tfrc_rx_hist_add_packet(&hc->rx_hist, skb, ndp); - -done_receiving: - if (do_feedback) - ccid3_hc_rx_send_feedback(sk, skb, do_feedback); -} - -static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) -{ - struct ccid3_hc_rx_sock *hc = ccid_priv(ccid); - - hc->rx_state = TFRC_RSTATE_NO_DATA; - tfrc_lh_init(&hc->rx_li_hist); - return tfrc_rx_hist_alloc(&hc->rx_hist); -} - -static void ccid3_hc_rx_exit(struct sock *sk) -{ - struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - - tfrc_rx_hist_purge(&hc->rx_hist); - tfrc_lh_cleanup(&hc->rx_li_hist); -} - -static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) -{ - info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state; - info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt; -} - -static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - struct tfrc_rx_info rx_info; - const void *val; - - switch (optname) { - case DCCP_SOCKOPT_CCID_RX_INFO: - if (len < sizeof(rx_info)) - return -EINVAL; - rx_info.tfrcrx_x_recv = hc->rx_x_recv; - rx_info.tfrcrx_rtt = hc->rx_rtt; - rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv); - len = sizeof(rx_info); - val = &rx_info; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen) || copy_to_user(optval, val, len)) - return -EFAULT; - - return 0; -} - -struct ccid_operations ccid3_ops = { - .ccid_id = DCCPC_CCID3, - .ccid_name = "TCP-Friendly Rate Control", - .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), - .ccid_hc_tx_init = ccid3_hc_tx_init, - .ccid_hc_tx_exit = ccid3_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, - .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, - .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, - .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), - .ccid_hc_rx_init = ccid3_hc_rx_init, - .ccid_hc_rx_exit = ccid3_hc_rx_exit, - .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, - .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, - .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, - .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, - .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, - .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, -}; - -#ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, bool, 0644); -MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages"); -#endif diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h deleted file mode 100644 index 02e0fc9f6334..000000000000 --- a/net/dccp/ccids/ccid3.h +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * - * An implementation of the DCCP protocol - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see https://www.wand.net.nz/ - * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ -#ifndef _DCCP_CCID3_H_ -#define _DCCP_CCID3_H_ - -#include <linux/ktime.h> -#include <linux/list.h> -#include <linux/types.h> -#include <linux/tfrc.h> -#include "lib/tfrc.h" -#include "../ccid.h" - -/* Two seconds as per RFC 5348, 4.2 */ -#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) - -/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ -#define TFRC_T_MBI 64 - -/* - * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are - * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. - * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse - * resolution of HZ < 500 means that the error is below one timer tick (t_gran) - * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). - */ -#if (HZ >= 500) -# define TFRC_T_DELTA USEC_PER_MSEC -#else -# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) -#endif - -enum ccid3_options { - TFRC_OPT_LOSS_EVENT_RATE = 192, - TFRC_OPT_LOSS_INTERVALS = 193, - TFRC_OPT_RECEIVE_RATE = 194, -}; - -/* TFRC sender states */ -enum ccid3_hc_tx_states { - TFRC_SSTATE_NO_SENT = 1, - TFRC_SSTATE_NO_FBACK, - TFRC_SSTATE_FBACK, -}; - -/** - * struct ccid3_hc_tx_sock - CCID3 sender half-connection socket - * @tx_x: Current sending rate in 64 * bytes per second - * @tx_x_recv: Receive rate in 64 * bytes per second - * @tx_x_calc: Calculated rate in bytes per second - * @tx_rtt: Estimate of current round trip time in usecs - * @tx_p: Current loss event rate (0-1) scaled by 1000000 - * @tx_s: Packet size in bytes - * @tx_t_rto: Nofeedback Timer setting in usecs - * @tx_t_ipi: Interpacket (send) interval (RFC 3448, 4.6) in usecs - * @tx_state: Sender state, one of %ccid3_hc_tx_states - * @tx_last_win_count: Last window counter sent - * @tx_t_last_win_count: Timestamp of earliest packet - * with last_win_count value sent - * @tx_no_feedback_timer: Handle to no feedback timer - * @tx_t_ld: Time last doubled during slow start - * @tx_t_nom: Nominal send time of next packet - * @tx_hist: Packet history - */ -struct ccid3_hc_tx_sock { - u64 tx_x; - u64 tx_x_recv; - u32 tx_x_calc; - u32 tx_rtt; - u32 tx_p; - u32 tx_t_rto; - u32 tx_t_ipi; - u16 tx_s; - enum ccid3_hc_tx_states tx_state:8; - u8 tx_last_win_count; - ktime_t tx_t_last_win_count; - struct timer_list tx_no_feedback_timer; - struct sock *sk; - ktime_t tx_t_ld; - ktime_t tx_t_nom; - struct tfrc_tx_hist_entry *tx_hist; -}; - -static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) -{ - struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid); - BUG_ON(hctx == NULL); - return hctx; -} - -/* TFRC receiver states */ -enum ccid3_hc_rx_states { - TFRC_RSTATE_NO_DATA = 1, - TFRC_RSTATE_DATA, -}; - -/** - * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket - * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) - * @rx_state: Receiver state, one of %ccid3_hc_rx_states - * @rx_bytes_recv: Total sum of DCCP payload bytes - * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) - * @rx_rtt: Receiver estimate of RTT - * @rx_tstamp_last_feedback: Time at which last feedback was sent - * @rx_hist: Packet history (loss detection + RTT sampling) - * @rx_li_hist: Loss Interval database - * @rx_s: Received packet size in bytes - * @rx_pinv: Inverse of Loss Event Rate (RFC 4342, sec. 8.5) - */ -struct ccid3_hc_rx_sock { - u8 rx_last_counter:4; - enum ccid3_hc_rx_states rx_state:8; - u32 rx_bytes_recv; - u32 rx_x_recv; - u32 rx_rtt; - ktime_t rx_tstamp_last_feedback; - struct tfrc_rx_hist rx_hist; - struct tfrc_loss_hist rx_li_hist; - u16 rx_s; -#define rx_pinv rx_li_hist.i_mean -}; - -static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) -{ - struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); - BUG_ON(hcrx == NULL); - return hcrx; -} - -#endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c deleted file mode 100644 index da95319842bb..000000000000 --- a/net/dccp/ccids/lib/loss_interval.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ -#include <net/sock.h> -#include "tfrc.h" - -static struct kmem_cache *tfrc_lh_slab __read_mostly; -/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ -static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; - -/* implements LIFO semantics on the array */ -static inline u8 LIH_INDEX(const u8 ctr) -{ - return LIH_SIZE - 1 - (ctr % LIH_SIZE); -} - -/* the `counter' index always points at the next entry to be populated */ -static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh) -{ - return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL; -} - -/* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */ -static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i) -{ - BUG_ON(i >= lh->counter); - return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length; -} - -/* - * On-demand allocation and de-allocation of entries - */ -static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh) -{ - if (lh->ring[LIH_INDEX(lh->counter)] == NULL) - lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab, - GFP_ATOMIC); - return lh->ring[LIH_INDEX(lh->counter)]; -} - -void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) -{ - if (!tfrc_lh_is_initialised(lh)) - return; - - for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++) - if (lh->ring[LIH_INDEX(lh->counter)] != NULL) { - kmem_cache_free(tfrc_lh_slab, - lh->ring[LIH_INDEX(lh->counter)]); - lh->ring[LIH_INDEX(lh->counter)] = NULL; - } -} - -static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) -{ - u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; - int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ - - if (k <= 0) - return; - - for (i = 0; i <= k; i++) { - i_i = tfrc_lh_get_interval(lh, i); - - if (i < k) { - i_tot0 += i_i * tfrc_lh_weights[i]; - w_tot += tfrc_lh_weights[i]; - } - if (i > 0) - i_tot1 += i_i * tfrc_lh_weights[i-1]; - } - - lh->i_mean = max(i_tot0, i_tot1) / w_tot; -} - -/** - * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 - * @lh: histogram to update - * @skb: received socket triggering loss interval update - * - * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev - */ -u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) -{ - struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); - u32 old_i_mean = lh->i_mean; - s64 len; - - if (cur == NULL) /* not initialised */ - return 0; - - len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; - - if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ - return 0; - - if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) - /* - * Implements RFC 4342, 10.2: - * If a packet S (skb) exists whose seqno comes `after' the one - * starting the current loss interval (cur) and if the modulo-16 - * distance from C(cur) to C(S) is greater than 4, consider all - * subsequent packets as belonging to a new loss interval. This - * test is necessary since CCVal may wrap between intervals. - */ - cur->li_is_closed = 1; - - if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ - return 0; - - cur->li_length = len; - tfrc_lh_calc_i_mean(lh); - - return lh->i_mean < old_i_mean; -} - -/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ -static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, - struct tfrc_rx_hist_entry *new_loss) -{ - return dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 && - (cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4); -} - -/** - * tfrc_lh_interval_add - Insert new record into the Loss Interval database - * @lh: Loss Interval database - * @rh: Receive history containing a fresh loss event - * @calc_first_li: Caller-dependent routine to compute length of first interval - * @sk: Used by @calc_first_li in caller-specific way (subtyping) - * - * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. - */ -int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, - u32 (*calc_first_li)(struct sock *), struct sock *sk) -{ - struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; - - if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) - return 0; - - new = tfrc_lh_demand_next(lh); - if (unlikely(new == NULL)) { - DCCP_CRIT("Cannot allocate/add loss record."); - return 0; - } - - new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; - new->li_ccval = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval; - new->li_is_closed = 0; - - if (++lh->counter == 1) - lh->i_mean = new->li_length = (*calc_first_li)(sk); - else { - cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno); - new->li_length = dccp_delta_seqno(new->li_seqno, - tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1; - if (lh->counter > (2*LIH_SIZE)) - lh->counter -= LIH_SIZE; - - tfrc_lh_calc_i_mean(lh); - } - return 1; -} - -int __init tfrc_li_init(void) -{ - tfrc_lh_slab = kmem_cache_create("tfrc_li_hist", - sizeof(struct tfrc_loss_interval), 0, - SLAB_HWCACHE_ALIGN, NULL); - return tfrc_lh_slab == NULL ? -ENOBUFS : 0; -} - -void tfrc_li_exit(void) -{ - if (tfrc_lh_slab != NULL) { - kmem_cache_destroy(tfrc_lh_slab); - tfrc_lh_slab = NULL; - } -} diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h deleted file mode 100644 index c3d95f85e43b..000000000000 --- a/net/dccp/ccids/lib/loss_interval.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _DCCP_LI_HIST_ -#define _DCCP_LI_HIST_ -/* - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ -#include <linux/ktime.h> -#include <linux/list.h> -#include <linux/slab.h> - -/* - * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than - * NINTERVAL, since the `open' interval I_0 is always stored as the first entry. - */ -#define NINTERVAL 8 -#define LIH_SIZE (NINTERVAL + 1) - -/** - * tfrc_loss_interval - Loss history record for TFRC-based protocols - * @li_seqno: Highest received seqno before the start of loss - * @li_ccval: The CCVal belonging to @li_seqno - * @li_is_closed: Whether @li_seqno is older than 1 RTT - * @li_length: Loss interval sequence length - */ -struct tfrc_loss_interval { - u64 li_seqno:48, - li_ccval:4, - li_is_closed:1; - u32 li_length; -}; - -/** - * tfrc_loss_hist - Loss record database - * @ring: Circular queue managed in LIFO manner - * @counter: Current count of entries (can be more than %LIH_SIZE) - * @i_mean: Current Average Loss Interval [RFC 3448, 5.4] - */ -struct tfrc_loss_hist { - struct tfrc_loss_interval *ring[LIH_SIZE]; - u8 counter; - u32 i_mean; -}; - -static inline void tfrc_lh_init(struct tfrc_loss_hist *lh) -{ - memset(lh, 0, sizeof(struct tfrc_loss_hist)); -} - -static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh) -{ - return lh->counter > 0; -} - -static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) -{ - return min(lh->counter, (u8)LIH_SIZE); -} - -struct tfrc_rx_hist; - -int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, - u32 (*first_li)(struct sock *), struct sock *); -u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); -void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); - -#endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c deleted file mode 100644 index 0cdda3c66fb5..000000000000 --- a/net/dccp/ccids/lib/packet_history.c +++ /dev/null @@ -1,439 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. - * - * An implementation of the DCCP protocol - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see https://www.wand.net.nz/ - * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/string.h> -#include <linux/slab.h> -#include "packet_history.h" -#include "../../dccp.h" - -/* - * Transmitter History Routines - */ -static struct kmem_cache *tfrc_tx_hist_slab; - -int __init tfrc_tx_packet_history_init(void) -{ - tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist", - sizeof(struct tfrc_tx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, NULL); - return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; -} - -void tfrc_tx_packet_history_exit(void) -{ - if (tfrc_tx_hist_slab != NULL) { - kmem_cache_destroy(tfrc_tx_hist_slab); - tfrc_tx_hist_slab = NULL; - } -} - -int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) -{ - struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); - - if (entry == NULL) - return -ENOBUFS; - entry->seqno = seqno; - entry->stamp = ktime_get_real(); - entry->next = *headp; - *headp = entry; - return 0; -} - -void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) -{ - struct tfrc_tx_hist_entry *head = *headp; - - while (head != NULL) { - struct tfrc_tx_hist_entry *next = head->next; - - kmem_cache_free(tfrc_tx_hist_slab, head); - head = next; - } - - *headp = NULL; -} - -/* - * Receiver History Routines - */ -static struct kmem_cache *tfrc_rx_hist_slab; - -int __init tfrc_rx_packet_history_init(void) -{ - tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache", - sizeof(struct tfrc_rx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, NULL); - return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; -} - -void tfrc_rx_packet_history_exit(void) -{ - if (tfrc_rx_hist_slab != NULL) { - kmem_cache_destroy(tfrc_rx_hist_slab); - tfrc_rx_hist_slab = NULL; - } -} - -static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, - const struct sk_buff *skb, - const u64 ndp) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - - entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->tfrchrx_ccval = dh->dccph_ccval; - entry->tfrchrx_type = dh->dccph_type; - entry->tfrchrx_ndp = ndp; - entry->tfrchrx_tstamp = ktime_get_real(); -} - -void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, - const u64 ndp) -{ - struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); - - tfrc_rx_hist_entry_from_skb(entry, skb, ndp); -} - -/* has the packet contained in skb been seen before? */ -int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) -{ - const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq; - int i; - - if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0) - return 1; - - for (i = 1; i <= h->loss_count; i++) - if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq) - return 1; - - return 0; -} - -static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) -{ - const u8 idx_a = tfrc_rx_hist_index(h, a), - idx_b = tfrc_rx_hist_index(h, b); - - swap(h->ring[idx_a], h->ring[idx_b]); -} - -/* - * Private helper functions for loss detection. - * - * In the descriptions, `Si' refers to the sequence number of entry number i, - * whose NDP count is `Ni' (lower case is used for variables). - * Note: All __xxx_loss functions expect that a test against duplicates has been - * performed already: the seqno of the skb must not be less than the seqno - * of loss_prev; and it must not equal that of any valid history entry. - */ -static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) -{ - u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, - s1 = DCCP_SKB_CB(skb)->dccpd_seq; - - if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ - h->loss_count = 1; - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); - } -} - -static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) -{ - u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, - s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, - s2 = DCCP_SKB_CB(skb)->dccpd_seq; - - if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ - h->loss_count = 2; - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); - return; - } - - /* S0 < S2 < S1 */ - - if (dccp_loss_free(s0, s2, n2)) { - u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - - if (dccp_loss_free(s2, s1, n1)) { - /* hole is filled: S0, S2, and S1 are consecutive */ - h->loss_count = 0; - h->loss_start = tfrc_rx_hist_index(h, 1); - } else - /* gap between S2 and S1: just update loss_prev */ - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); - - } else { /* gap between S0 and S2 */ - /* - * Reorder history to insert S2 between S0 and S1 - */ - tfrc_rx_hist_swap(h, 0, 3); - h->loss_start = tfrc_rx_hist_index(h, 3); - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2); - h->loss_count = 2; - } -} - -/* return 1 if a new loss event has been identified */ -static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) -{ - u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, - s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, - s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, - s3 = DCCP_SKB_CB(skb)->dccpd_seq; - - if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ - h->loss_count = 3; - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); - return 1; - } - - /* S3 < S2 */ - - if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ - /* - * Reorder history to insert S3 between S1 and S2 - */ - tfrc_rx_hist_swap(h, 2, 3); - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); - h->loss_count = 3; - return 1; - } - - /* S0 < S3 < S1 */ - - if (dccp_loss_free(s0, s3, n3)) { - u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; - - if (dccp_loss_free(s3, s1, n1)) { - /* hole between S0 and S1 filled by S3 */ - u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; - - if (dccp_loss_free(s1, s2, n2)) { - /* entire hole filled by S0, S3, S1, S2 */ - h->loss_start = tfrc_rx_hist_index(h, 2); - h->loss_count = 0; - } else { - /* gap remains between S1 and S2 */ - h->loss_start = tfrc_rx_hist_index(h, 1); - h->loss_count = 1; - } - - } else /* gap exists between S3 and S1, loss_count stays at 2 */ - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3); - - return 0; - } - - /* - * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 - * Reorder history to insert S3 between S0 and S1. - */ - tfrc_rx_hist_swap(h, 0, 3); - h->loss_start = tfrc_rx_hist_index(h, 3); - tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3); - h->loss_count = 3; - - return 1; -} - -/* recycle RX history records to continue loss detection if necessary */ -static void __three_after_loss(struct tfrc_rx_hist *h) -{ - /* - * At this stage we know already that there is a gap between S0 and S1 - * (since S0 was the highest sequence number received before detecting - * the loss). To recycle the loss record, it is thus only necessary to - * check for other possible gaps between S1/S2 and between S2/S3. - */ - u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, - s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, - s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; - u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, - n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; - - if (dccp_loss_free(s1, s2, n2)) { - - if (dccp_loss_free(s2, s3, n3)) { - /* no gap between S2 and S3: entire hole is filled */ - h->loss_start = tfrc_rx_hist_index(h, 3); - h->loss_count = 0; - } else { - /* gap between S2 and S3 */ - h->loss_start = tfrc_rx_hist_index(h, 2); - h->loss_count = 1; - } - - } else { /* gap between S1 and S2 */ - h->loss_start = tfrc_rx_hist_index(h, 1); - h->loss_count = 2; - } -} - -/** - * tfrc_rx_handle_loss - Loss detection and further processing - * @h: The non-empty RX history object - * @lh: Loss Intervals database to update - * @skb: Currently received packet - * @ndp: The NDP count belonging to @skb - * @calc_first_li: Caller-dependent computation of first loss interval in @lh - * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) - * - * Chooses action according to pending loss, updates LI database when a new - * loss was detected, and does required post-processing. Returns 1 when caller - * should send feedback, 0 otherwise. - * Since it also takes care of reordering during loss detection and updates the - * records accordingly, the caller should not perform any more RX history - * operations when loss_count is greater than 0 after calling this function. - */ -int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*calc_first_li)(struct sock *), struct sock *sk) -{ - int is_new_loss = 0; - - if (h->loss_count == 0) { - __do_track_loss(h, skb, ndp); - } else if (h->loss_count == 1) { - __one_after_loss(h, skb, ndp); - } else if (h->loss_count != 2) { - DCCP_BUG("invalid loss_count %d", h->loss_count); - } else if (__two_after_loss(h, skb, ndp)) { - /* - * Update Loss Interval database and recycle RX records - */ - is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); - __three_after_loss(h); - } - return is_new_loss; -} - -int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) -{ - int i; - - for (i = 0; i <= TFRC_NDUPACK; i++) { - h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); - if (h->ring[i] == NULL) - goto out_free; - } - - h->loss_count = h->loss_start = 0; - return 0; - -out_free: - while (i-- != 0) { - kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); - h->ring[i] = NULL; - } - return -ENOBUFS; -} - -void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) -{ - int i; - - for (i = 0; i <= TFRC_NDUPACK; ++i) - if (h->ring[i] != NULL) { - kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); - h->ring[i] = NULL; - } -} - -/** - * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against - * @h: The non-empty RX history object - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) -{ - return h->ring[0]; -} - -/** - * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry - * @h: The non-empty RX history object - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) -{ - return h->ring[h->rtt_sample_prev]; -} - -/** - * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal - * @h: receive histogram - * @skb: packet containing timestamp. - * - * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able - * to compute a sample with given data - calling function should check this. - */ -u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) -{ - u32 sample = 0, - delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - - if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ - if (h->rtt_sample_prev == 2) { /* previous candidate stored */ - sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - if (sample) - sample = 4 / sample * - ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); - else /* - * FIXME: This condition is in principle not - * possible but occurs when CCID is used for - * two-way data traffic. I have tried to trace - * it, but the cause does not seem to be here. - */ - DCCP_BUG("please report to dccp@vger.kernel.org" - " => prev = %u, last = %u", - tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, - tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); - } else if (delta_v < 1) { - h->rtt_sample_prev = 1; - goto keep_ref_for_next_time; - } - - } else if (delta_v == 4) /* optimal match */ - sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); - else { /* suboptimal match */ - h->rtt_sample_prev = 2; - goto keep_ref_for_next_time; - } - - if (unlikely(sample > DCCP_SANE_RTT_MAX)) { - DCCP_WARN("RTT sample %u too large, using max\n", sample); - sample = DCCP_SANE_RTT_MAX; - } - - h->rtt_sample_prev = 0; /* use current entry as next reference */ -keep_ref_for_next_time: - - return sample; -} diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h deleted file mode 100644 index 159cc9326eab..000000000000 --- a/net/dccp/ccids/lib/packet_history.h +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Packet RX/TX history data structures and routines for TFRC-based protocols. - * - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. - * - * This code has been developed by the University of Waikato WAND - * research group. For further information please see https://www.wand.net.nz/ - * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz - * - * This code also uses code from Lulea University, rereleased as GPL by its - * authors: - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * Changes to meet Linux coding standards, to make it meet latest ccid3 draft - * and to make it work as a loadable module in the DCCP stack written by - * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. - * - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#ifndef _DCCP_PKT_HIST_ -#define _DCCP_PKT_HIST_ - -#include <linux/list.h> -#include <linux/slab.h> -#include "tfrc.h" - -/** - * tfrc_tx_hist_entry - Simple singly-linked TX history list - * @next: next oldest entry (LIFO order) - * @seqno: sequence number of this entry - * @stamp: send time of packet with sequence number @seqno - */ -struct tfrc_tx_hist_entry { - struct tfrc_tx_hist_entry *next; - u64 seqno; - ktime_t stamp; -}; - -static inline struct tfrc_tx_hist_entry * - tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) -{ - while (head != NULL && head->seqno != seqno) - head = head->next; - return head; -} - -int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); -void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); - -/* Subtraction a-b modulo-16, respects circular wrap-around */ -#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) - -/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */ -#define TFRC_NDUPACK 3 - -/** - * tfrc_rx_hist_entry - Store information about a single received packet - * @tfrchrx_seqno: DCCP packet sequence number - * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1) - * @tfrchrx_ndp: the NDP count (if any) of the packet - * @tfrchrx_tstamp: actual receive time of packet - */ -struct tfrc_rx_hist_entry { - u64 tfrchrx_seqno:48, - tfrchrx_ccval:4, - tfrchrx_type:4; - u64 tfrchrx_ndp:48; - ktime_t tfrchrx_tstamp; -}; - -/** - * tfrc_rx_hist - RX history structure for TFRC-based protocols - * @ring: Packet history for RTT sampling and loss detection - * @loss_count: Number of entries in circular history - * @loss_start: Movable index (for loss detection) - * @rtt_sample_prev: Used during RTT sampling, points to candidate entry - */ -struct tfrc_rx_hist { - struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; - u8 loss_count:2, - loss_start:2; -#define rtt_sample_prev loss_start -}; - -/** - * tfrc_rx_hist_index - index to reach n-th entry after loss_start - */ -static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) -{ - return (h->loss_start + n) & TFRC_NDUPACK; -} - -/** - * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) -{ - return h->ring[tfrc_rx_hist_index(h, h->loss_count)]; -} - -/** - * tfrc_rx_hist_entry - return the n-th history entry after loss_start - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) -{ - return h->ring[tfrc_rx_hist_index(h, n)]; -} - -/** - * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) -{ - return h->ring[h->loss_start]; -} - -/* indicate whether previously a packet was detected missing */ -static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) -{ - return h->loss_count > 0; -} - -void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, - const u64 ndp); - -int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); - -struct tfrc_loss_hist; -int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), struct sock *sk); -u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); -int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); -void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); - -#endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c deleted file mode 100644 index d7f265e1f50c..000000000000 --- a/net/dccp/ccids/lib/tfrc.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * TFRC library initialisation - * - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> - */ -#include <linux/moduleparam.h> -#include "tfrc.h" - -#ifdef CONFIG_IP_DCCP_TFRC_DEBUG -bool tfrc_debug; -module_param(tfrc_debug, bool, 0644); -MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); -#endif - -int __init tfrc_lib_init(void) -{ - int rc = tfrc_li_init(); - - if (rc) - goto out; - - rc = tfrc_tx_packet_history_init(); - if (rc) - goto out_free_loss_intervals; - - rc = tfrc_rx_packet_history_init(); - if (rc) - goto out_free_tx_history; - return 0; - -out_free_tx_history: - tfrc_tx_packet_history_exit(); -out_free_loss_intervals: - tfrc_li_exit(); -out: - return rc; -} - -void tfrc_lib_exit(void) -{ - tfrc_rx_packet_history_exit(); - tfrc_tx_packet_history_exit(); - tfrc_li_exit(); -} diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h deleted file mode 100644 index 0a63e8750cc5..000000000000 --- a/net/dccp/ccids/lib/tfrc.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _TFRC_H_ -#define _TFRC_H_ -/* - * Copyright (c) 2007 The University of Aberdeen, Scotland, UK - * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - */ -#include <linux/types.h> -#include <linux/math64.h> -#include "../../dccp.h" - -/* internal includes that this library exports: */ -#include "loss_interval.h" -#include "packet_history.h" - -#ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern bool tfrc_debug; -#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) -#else -#define tfrc_pr_debug(format, a...) -#endif - -/* integer-arithmetic divisions of type (a * 1000000)/b */ -static inline u64 scaled_div(u64 a, u64 b) -{ - BUG_ON(b == 0); - return div64_u64(a * 1000000, b); -} - -static inline u32 scaled_div32(u64 a, u64 b) -{ - u64 result = scaled_div(a, b); - - if (result > UINT_MAX) { - DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX", - (unsigned long long)a, (unsigned long long)b); - return UINT_MAX; - } - return result; -} - -/** - * tfrc_ewma - Exponentially weighted moving average - * @weight: Weight to be used as damping factor, in units of 1/10 - */ -static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) -{ - return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; -} - -u32 tfrc_calc_x(u16 s, u32 R, u32 p); -u32 tfrc_calc_x_reverse_lookup(u32 fvalue); -u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); - -int tfrc_tx_packet_history_init(void); -void tfrc_tx_packet_history_exit(void); -int tfrc_rx_packet_history_init(void); -void tfrc_rx_packet_history_exit(void); - -int tfrc_li_init(void); -void tfrc_li_exit(void); - -#ifdef CONFIG_IP_DCCP_TFRC_LIB -int tfrc_lib_init(void); -void tfrc_lib_exit(void); -#else -#define tfrc_lib_init() (0) -#define tfrc_lib_exit() -#endif -#endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c deleted file mode 100644 index 92a8c6bea316..000000000000 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ /dev/null @@ -1,702 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - */ - -#include <linux/module.h> -#include "../../dccp.h" -#include "tfrc.h" - -#define TFRC_CALC_X_ARRSIZE 500 -#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */ -#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE) - -/* - TFRC TCP Reno Throughput Equation Lookup Table for f(p) - - The following two-column lookup table implements a part of the TCP throughput - equation from [RFC 3448, sec. 3.1]: - - s - X_calc = -------------------------------------------------------------- - R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3)) - - Where: - X is the transmit rate in bytes/second - s is the packet size in bytes - R is the round trip time in seconds - p is the loss event rate, between 0 and 1.0, of the number of loss - events as a fraction of the number of packets transmitted - t_RTO is the TCP retransmission timeout value in seconds - b is the number of packets acknowledged by a single TCP ACK - - We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes: - - s - X_calc = ------------------------------------------------------- - R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3)) - - which we can break down into: - - s - X_calc = --------- - R * f(p) - - where f(p) is given for 0 < p <= 1 by: - - f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3) - - Since this is kernel code, floating-point arithmetic is avoided in favour of - integer arithmetic. This means that nearly all fractional parameters are - scaled by 1000000: - * the parameters p and R - * the return result f(p) - The lookup table therefore actually tabulates the following function g(q): - - g(q) = 1000000 * f(q/1000000) - - Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer - granularity for the practically more relevant case of small values of p (up to - 5%), the second column is used; the first one ranges up to 100%. This split - corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also - determines the smallest resolution possible with this lookup table: - - TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE - - The entire table is generated by: - for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) { - lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE); - lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE); - } - - With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1, - lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%) - lookup[M][0] = g(1000000) = 1000000 * f(100%) - lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%) - lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%) - - In summary, the two columns represent f(p) for the following ranges: - * The first column is for 0.002 <= p <= 1.0 - * The second column is for 0.0001 <= p <= 0.05 - Where the columns overlap, the second (finer-grained) is given preference, - i.e. the first column is used only for p >= 0.05. - */ -static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { - { 37172, 8172 }, - { 53499, 11567 }, - { 66664, 14180 }, - { 78298, 16388 }, - { 89021, 18339 }, - { 99147, 20108 }, - { 108858, 21738 }, - { 118273, 23260 }, - { 127474, 24693 }, - { 136520, 26052 }, - { 145456, 27348 }, - { 154316, 28589 }, - { 163130, 29783 }, - { 171919, 30935 }, - { 180704, 32049 }, - { 189502, 33130 }, - { 198328, 34180 }, - { 207194, 35202 }, - { 216114, 36198 }, - { 225097, 37172 }, - { 234153, 38123 }, - { 243294, 39055 }, - { 252527, 39968 }, - { 261861, 40864 }, - { 271305, 41743 }, - { 280866, 42607 }, - { 290553, 43457 }, - { 300372, 44293 }, - { 310333, 45117 }, - { 320441, 45929 }, - { 330705, 46729 }, - { 341131, 47518 }, - { 351728, 48297 }, - { 362501, 49066 }, - { 373460, 49826 }, - { 384609, 50577 }, - { 395958, 51320 }, - { 407513, 52054 }, - { 419281, 52780 }, - { 431270, 53499 }, - { 443487, 54211 }, - { 455940, 54916 }, - { 468635, 55614 }, - { 481581, 56306 }, - { 494785, 56991 }, - { 508254, 57671 }, - { 521996, 58345 }, - { 536019, 59014 }, - { 550331, 59677 }, - { 564939, 60335 }, - { 579851, 60988 }, - { 595075, 61636 }, - { 610619, 62279 }, - { 626491, 62918 }, - { 642700, 63553 }, - { 659253, 64183 }, - { 676158, 64809 }, - { 693424, 65431 }, - { 711060, 66050 }, - { 729073, 66664 }, - { 747472, 67275 }, - { 766266, 67882 }, - { 785464, 68486 }, - { 805073, 69087 }, - { 825103, 69684 }, - { 845562, 70278 }, - { 866460, 70868 }, - { 887805, 71456 }, - { 909606, 72041 }, - { 931873, 72623 }, - { 954614, 73202 }, - { 977839, 73778 }, - { 1001557, 74352 }, - { 1025777, 74923 }, - { 1050508, 75492 }, - { 1075761, 76058 }, - { 1101544, 76621 }, - { 1127867, 77183 }, - { 1154739, 77741 }, - { 1182172, 78298 }, - { 1210173, 78852 }, - { 1238753, 79405 }, - { 1267922, 79955 }, - { 1297689, 80503 }, - { 1328066, 81049 }, - { 1359060, 81593 }, - { 1390684, 82135 }, - { 1422947, 82675 }, - { 1455859, 83213 }, - { 1489430, 83750 }, - { 1523671, 84284 }, - { 1558593, 84817 }, - { 1594205, 85348 }, - { 1630518, 85878 }, - { 1667543, 86406 }, - { 1705290, 86932 }, - { 1743770, 87457 }, - { 1782994, 87980 }, - { 1822973, 88501 }, - { 1863717, 89021 }, - { 1905237, 89540 }, - { 1947545, 90057 }, - { 1990650, 90573 }, - { 2034566, 91087 }, - { 2079301, 91600 }, - { 2124869, 92111 }, - { 2171279, 92622 }, - { 2218543, 93131 }, - { 2266673, 93639 }, - { 2315680, 94145 }, - { 2365575, 94650 }, - { 2416371, 95154 }, - { 2468077, 95657 }, - { 2520707, 96159 }, - { 2574271, 96660 }, - { 2628782, 97159 }, - { 2684250, 97658 }, - { 2740689, 98155 }, - { 2798110, 98651 }, - { 2856524, 99147 }, - { 2915944, 99641 }, - { 2976382, 100134 }, - { 3037850, 100626 }, - { 3100360, 101117 }, - { 3163924, 101608 }, - { 3228554, 102097 }, - { 3294263, 102586 }, - { 3361063, 103073 }, - { 3428966, 103560 }, - { 3497984, 104045 }, - { 3568131, 104530 }, - { 3639419, 105014 }, - { 3711860, 105498 }, - { 3785467, 105980 }, - { 3860253, 106462 }, - { 3936229, 106942 }, - { 4013410, 107422 }, - { 4091808, 107902 }, - { 4171435, 108380 }, - { 4252306, 108858 }, - { 4334431, 109335 }, - { 4417825, 109811 }, - { 4502501, 110287 }, - { 4588472, 110762 }, - { 4675750, 111236 }, - { 4764349, 111709 }, - { 4854283, 112182 }, - { 4945564, 112654 }, - { 5038206, 113126 }, - { 5132223, 113597 }, - { 5227627, 114067 }, - { 5324432, 114537 }, - { 5422652, 115006 }, - { 5522299, 115474 }, - { 5623389, 115942 }, - { 5725934, 116409 }, - { 5829948, 116876 }, - { 5935446, 117342 }, - { 6042439, 117808 }, - { 6150943, 118273 }, - { 6260972, 118738 }, - { 6372538, 119202 }, - { 6485657, 119665 }, - { 6600342, 120128 }, - { 6716607, 120591 }, - { 6834467, 121053 }, - { 6953935, 121514 }, - { 7075025, 121976 }, - { 7197752, 122436 }, - { 7322131, 122896 }, - { 7448175, 123356 }, - { 7575898, 123815 }, - { 7705316, 124274 }, - { 7836442, 124733 }, - { 7969291, 125191 }, - { 8103877, 125648 }, - { 8240216, 126105 }, - { 8378321, 126562 }, - { 8518208, 127018 }, - { 8659890, 127474 }, - { 8803384, 127930 }, - { 8948702, 128385 }, - { 9095861, 128840 }, - { 9244875, 129294 }, - { 9395760, 129748 }, - { 9548529, 130202 }, - { 9703198, 130655 }, - { 9859782, 131108 }, - { 10018296, 131561 }, - { 10178755, 132014 }, - { 10341174, 132466 }, - { 10505569, 132917 }, - { 10671954, 133369 }, - { 10840345, 133820 }, - { 11010757, 134271 }, - { 11183206, 134721 }, - { 11357706, 135171 }, - { 11534274, 135621 }, - { 11712924, 136071 }, - { 11893673, 136520 }, - { 12076536, 136969 }, - { 12261527, 137418 }, - { 12448664, 137867 }, - { 12637961, 138315 }, - { 12829435, 138763 }, - { 13023101, 139211 }, - { 13218974, 139658 }, - { 13417071, 140106 }, - { 13617407, 140553 }, - { 13819999, 140999 }, - { 14024862, 141446 }, - { 14232012, 141892 }, - { 14441465, 142339 }, - { 14653238, 142785 }, - { 14867346, 143230 }, - { 15083805, 143676 }, - { 15302632, 144121 }, - { 15523842, 144566 }, - { 15747453, 145011 }, - { 15973479, 145456 }, - { 16201939, 145900 }, - { 16432847, 146345 }, - { 16666221, 146789 }, - { 16902076, 147233 }, - { 17140429, 147677 }, - { 17381297, 148121 }, - { 17624696, 148564 }, - { 17870643, 149007 }, - { 18119154, 149451 }, - { 18370247, 149894 }, - { 18623936, 150336 }, - { 18880241, 150779 }, - { 19139176, 151222 }, - { 19400759, 151664 }, - { 19665007, 152107 }, - { 19931936, 152549 }, - { 20201564, 152991 }, - { 20473907, 153433 }, - { 20748982, 153875 }, - { 21026807, 154316 }, - { 21307399, 154758 }, - { 21590773, 155199 }, - { 21876949, 155641 }, - { 22165941, 156082 }, - { 22457769, 156523 }, - { 22752449, 156964 }, - { 23049999, 157405 }, - { 23350435, 157846 }, - { 23653774, 158287 }, - { 23960036, 158727 }, - { 24269236, 159168 }, - { 24581392, 159608 }, - { 24896521, 160049 }, - { 25214642, 160489 }, - { 25535772, 160929 }, - { 25859927, 161370 }, - { 26187127, 161810 }, - { 26517388, 162250 }, - { 26850728, 162690 }, - { 27187165, 163130 }, - { 27526716, 163569 }, - { 27869400, 164009 }, - { 28215234, 164449 }, - { 28564236, 164889 }, - { 28916423, 165328 }, - { 29271815, 165768 }, - { 29630428, 166208 }, - { 29992281, 166647 }, - { 30357392, 167087 }, - { 30725779, 167526 }, - { 31097459, 167965 }, - { 31472452, 168405 }, - { 31850774, 168844 }, - { 32232445, 169283 }, - { 32617482, 169723 }, - { 33005904, 170162 }, - { 33397730, 170601 }, - { 33792976, 171041 }, - { 34191663, 171480 }, - { 34593807, 171919 }, - { 34999428, 172358 }, - { 35408544, 172797 }, - { 35821174, 173237 }, - { 36237335, 173676 }, - { 36657047, 174115 }, - { 37080329, 174554 }, - { 37507197, 174993 }, - { 37937673, 175433 }, - { 38371773, 175872 }, - { 38809517, 176311 }, - { 39250924, 176750 }, - { 39696012, 177190 }, - { 40144800, 177629 }, - { 40597308, 178068 }, - { 41053553, 178507 }, - { 41513554, 178947 }, - { 41977332, 179386 }, - { 42444904, 179825 }, - { 42916290, 180265 }, - { 43391509, 180704 }, - { 43870579, 181144 }, - { 44353520, 181583 }, - { 44840352, 182023 }, - { 45331092, 182462 }, - { 45825761, 182902 }, - { 46324378, 183342 }, - { 46826961, 183781 }, - { 47333531, 184221 }, - { 47844106, 184661 }, - { 48358706, 185101 }, - { 48877350, 185541 }, - { 49400058, 185981 }, - { 49926849, 186421 }, - { 50457743, 186861 }, - { 50992759, 187301 }, - { 51531916, 187741 }, - { 52075235, 188181 }, - { 52622735, 188622 }, - { 53174435, 189062 }, - { 53730355, 189502 }, - { 54290515, 189943 }, - { 54854935, 190383 }, - { 55423634, 190824 }, - { 55996633, 191265 }, - { 56573950, 191706 }, - { 57155606, 192146 }, - { 57741621, 192587 }, - { 58332014, 193028 }, - { 58926806, 193470 }, - { 59526017, 193911 }, - { 60129666, 194352 }, - { 60737774, 194793 }, - { 61350361, 195235 }, - { 61967446, 195677 }, - { 62589050, 196118 }, - { 63215194, 196560 }, - { 63845897, 197002 }, - { 64481179, 197444 }, - { 65121061, 197886 }, - { 65765563, 198328 }, - { 66414705, 198770 }, - { 67068508, 199213 }, - { 67726992, 199655 }, - { 68390177, 200098 }, - { 69058085, 200540 }, - { 69730735, 200983 }, - { 70408147, 201426 }, - { 71090343, 201869 }, - { 71777343, 202312 }, - { 72469168, 202755 }, - { 73165837, 203199 }, - { 73867373, 203642 }, - { 74573795, 204086 }, - { 75285124, 204529 }, - { 76001380, 204973 }, - { 76722586, 205417 }, - { 77448761, 205861 }, - { 78179926, 206306 }, - { 78916102, 206750 }, - { 79657310, 207194 }, - { 80403571, 207639 }, - { 81154906, 208084 }, - { 81911335, 208529 }, - { 82672880, 208974 }, - { 83439562, 209419 }, - { 84211402, 209864 }, - { 84988421, 210309 }, - { 85770640, 210755 }, - { 86558080, 211201 }, - { 87350762, 211647 }, - { 88148708, 212093 }, - { 88951938, 212539 }, - { 89760475, 212985 }, - { 90574339, 213432 }, - { 91393551, 213878 }, - { 92218133, 214325 }, - { 93048107, 214772 }, - { 93883493, 215219 }, - { 94724314, 215666 }, - { 95570590, 216114 }, - { 96422343, 216561 }, - { 97279594, 217009 }, - { 98142366, 217457 }, - { 99010679, 217905 }, - { 99884556, 218353 }, - { 100764018, 218801 }, - { 101649086, 219250 }, - { 102539782, 219698 }, - { 103436128, 220147 }, - { 104338146, 220596 }, - { 105245857, 221046 }, - { 106159284, 221495 }, - { 107078448, 221945 }, - { 108003370, 222394 }, - { 108934074, 222844 }, - { 109870580, 223294 }, - { 110812910, 223745 }, - { 111761087, 224195 }, - { 112715133, 224646 }, - { 113675069, 225097 }, - { 114640918, 225548 }, - { 115612702, 225999 }, - { 116590442, 226450 }, - { 117574162, 226902 }, - { 118563882, 227353 }, - { 119559626, 227805 }, - { 120561415, 228258 }, - { 121569272, 228710 }, - { 122583219, 229162 }, - { 123603278, 229615 }, - { 124629471, 230068 }, - { 125661822, 230521 }, - { 126700352, 230974 }, - { 127745083, 231428 }, - { 128796039, 231882 }, - { 129853241, 232336 }, - { 130916713, 232790 }, - { 131986475, 233244 }, - { 133062553, 233699 }, - { 134144966, 234153 }, - { 135233739, 234608 }, - { 136328894, 235064 }, - { 137430453, 235519 }, - { 138538440, 235975 }, - { 139652876, 236430 }, - { 140773786, 236886 }, - { 141901190, 237343 }, - { 143035113, 237799 }, - { 144175576, 238256 }, - { 145322604, 238713 }, - { 146476218, 239170 }, - { 147636442, 239627 }, - { 148803298, 240085 }, - { 149976809, 240542 }, - { 151156999, 241000 }, - { 152343890, 241459 }, - { 153537506, 241917 }, - { 154737869, 242376 }, - { 155945002, 242835 }, - { 157158929, 243294 }, - { 158379673, 243753 }, - { 159607257, 244213 }, - { 160841704, 244673 }, - { 162083037, 245133 }, - { 163331279, 245593 }, - { 164586455, 246054 }, - { 165848586, 246514 }, - { 167117696, 246975 }, - { 168393810, 247437 }, - { 169676949, 247898 }, - { 170967138, 248360 }, - { 172264399, 248822 }, - { 173568757, 249284 }, - { 174880235, 249747 }, - { 176198856, 250209 }, - { 177524643, 250672 }, - { 178857621, 251136 }, - { 180197813, 251599 }, - { 181545242, 252063 }, - { 182899933, 252527 }, - { 184261908, 252991 }, - { 185631191, 253456 }, - { 187007807, 253920 }, - { 188391778, 254385 }, - { 189783129, 254851 }, - { 191181884, 255316 }, - { 192588065, 255782 }, - { 194001698, 256248 }, - { 195422805, 256714 }, - { 196851411, 257181 }, - { 198287540, 257648 }, - { 199731215, 258115 }, - { 201182461, 258582 }, - { 202641302, 259050 }, - { 204107760, 259518 }, - { 205581862, 259986 }, - { 207063630, 260454 }, - { 208553088, 260923 }, - { 210050262, 261392 }, - { 211555174, 261861 }, - { 213067849, 262331 }, - { 214588312, 262800 }, - { 216116586, 263270 }, - { 217652696, 263741 }, - { 219196666, 264211 }, - { 220748520, 264682 }, - { 222308282, 265153 }, - { 223875978, 265625 }, - { 225451630, 266097 }, - { 227035265, 266569 }, - { 228626905, 267041 }, - { 230226576, 267514 }, - { 231834302, 267986 }, - { 233450107, 268460 }, - { 235074016, 268933 }, - { 236706054, 269407 }, - { 238346244, 269881 }, - { 239994613, 270355 }, - { 241651183, 270830 }, - { 243315981, 271305 } -}; - -/* return largest index i such that fval <= lookup[i][small] */ -static inline u32 tfrc_binsearch(u32 fval, u8 small) -{ - u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1; - - while (low < high) { - try = (low + high) / 2; - if (fval <= tfrc_calc_x_lookup[try][small]) - high = try; - else - low = try + 1; - } - return high; -} - -/** - * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448 - * @s: packet size in bytes - * @R: RTT scaled by 1000000 (i.e., microseconds) - * @p: loss ratio estimate scaled by 1000000 - * - * Returns X_calc in bytes per second (not scaled). - */ -u32 tfrc_calc_x(u16 s, u32 R, u32 p) -{ - u16 index; - u32 f; - u64 result; - - /* check against invalid parameters and divide-by-zero */ - BUG_ON(p > 1000000); /* p must not exceed 100% */ - BUG_ON(p == 0); /* f(0) = 0, divide by zero */ - if (R == 0) { /* possible divide by zero */ - DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc."); - return ~0U; - } - - if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ - if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ - DCCP_WARN("Value of p (%d) below resolution. " - "Substituting %d\n", p, TFRC_SMALLEST_P); - index = 0; - } else /* 0.0001 <= p <= 0.05 */ - index = p/TFRC_SMALLEST_P - 1; - - f = tfrc_calc_x_lookup[index][1]; - - } else { /* 0.05 < p <= 1.00 */ - index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1; - - f = tfrc_calc_x_lookup[index][0]; - } - - /* - * Compute X = s/(R*f(p)) in bytes per second. - * Since f(p) and R are both scaled by 1000000, we need to multiply by - * 1000000^2. To avoid overflow, the result is computed in two stages. - * This works under almost all reasonable operational conditions, for a - * wide range of parameters. Yet, should some strange combination of - * parameters result in overflow, the use of scaled_div32 will catch - * this and return UINT_MAX - which is a logically adequate consequence. - */ - result = scaled_div(s, R); - return scaled_div32(result, f); -} - -/** - * tfrc_calc_x_reverse_lookup - try to find p given f(p) - * @fvalue: function value to match, scaled by 1000000 - * - * Returns closest match for p, also scaled by 1000000 - */ -u32 tfrc_calc_x_reverse_lookup(u32 fvalue) -{ - int index; - - if (fvalue == 0) /* f(p) = 0 whenever p = 0 */ - return 0; - - /* Error cases. */ - if (fvalue < tfrc_calc_x_lookup[0][1]) { - DCCP_WARN("fvalue %u smaller than resolution\n", fvalue); - return TFRC_SMALLEST_P; - } - if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { - DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue); - return 1000000; - } - - if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) { - index = tfrc_binsearch(fvalue, 1); - return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE; - } - - /* else ... it must be in the coarse-grained column */ - index = tfrc_binsearch(fvalue, 0); - return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; -} - -/** - * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% - * @loss_event_rate: loss event rate to invert - * When @loss_event_rate is large, there is a chance that p is truncated to 0. - * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. - */ -u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) -{ - if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ - return 0; - if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ - return 1000000; - return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); -} diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h deleted file mode 100644 index 1f748ed1279d..000000000000 --- a/net/dccp/dccp.h +++ /dev/null @@ -1,483 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _DCCP_H -#define _DCCP_H -/* - * net/dccp/dccp.h - * - * An implementation of the DCCP protocol - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> - */ - -#include <linux/dccp.h> -#include <linux/ktime.h> -#include <net/snmp.h> -#include <net/sock.h> -#include <net/tcp.h> -#include "ackvec.h" - -/* - * DCCP - specific warning and debugging macros. - */ -#define DCCP_WARN(fmt, ...) \ - net_warn_ratelimited("%s: " fmt, __func__, ##__VA_ARGS__) -#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \ - __FILE__, __LINE__, __func__) -#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0) -#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \ - DCCP_BUG("\"%s\" holds (exception!)", \ - __stringify(cond)); \ - } while (0) - -#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \ - printk(fmt, ##args); \ - } while(0) -#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \ - "%s: " fmt, __func__, ##a) - -#ifdef CONFIG_IP_DCCP_DEBUG -extern bool dccp_debug; -#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) -#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) -#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) -#else -#define dccp_pr_debug(format, a...) do {} while (0) -#define dccp_pr_debug_cat(format, a...) do {} while (0) -#define dccp_debug(format, a...) do {} while (0) -#endif - -extern struct inet_hashinfo dccp_hashinfo; - -DECLARE_PER_CPU(unsigned int, dccp_orphan_count); - -void dccp_time_wait(struct sock *sk, int state, int timeo); - -/* - * Set safe upper bounds for header and option length. Since Data Offset is 8 - * bits (RFC 4340, sec. 5.1), the total header length can never be more than - * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1): - * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR - * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields - * Hence a safe upper bound for the maximum option length is 1020-28 = 992 - */ -#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t)) -#define DCCP_MAX_PACKET_HDR 28 -#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) -#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) - -/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */ -#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t)) - -#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT - * state, about 60 seconds */ - -/* RFC 1122, 4.2.3.1 initial RTO value */ -#define DCCP_TIMEOUT_INIT ((unsigned int)(3 * HZ)) - -/* - * The maximum back-off value for retransmissions. This is needed for - * - retransmitting client-Requests (sec. 8.1.1), - * - retransmitting Close/CloseReq when closing (sec. 8.3), - * - feature-negotiation retransmission (sec. 6.6.3), - * - Acks in client-PARTOPEN state (sec. 8.1.5). - */ -#define DCCP_RTO_MAX ((unsigned int)(64 * HZ)) - -/* - * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4 - */ -#define DCCP_SANE_RTT_MIN 100 -#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) -#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) - -/* sysctl variables for DCCP */ -extern int sysctl_dccp_request_retries; -extern int sysctl_dccp_retries1; -extern int sysctl_dccp_retries2; -extern int sysctl_dccp_tx_qlen; -extern int sysctl_dccp_sync_ratelimit; - -/* - * 48-bit sequence number arithmetic (signed and unsigned) - */ -#define INT48_MIN 0x800000000000LL /* 2^47 */ -#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */ -#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */ -#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x))) -#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x))) -#define ADD48(a, b) (((a) + (b)) & UINT48_MAX) -#define SUB48(a, b) ADD48((a), COMPLEMENT48(b)) - -static inline void dccp_inc_seqno(u64 *seqno) -{ - *seqno = ADD48(*seqno, 1); -} - -/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */ -static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2) -{ - u64 delta = SUB48(seqno2, seqno1); - - return TO_SIGNED48(delta); -} - -/* is seq1 < seq2 ? */ -static inline int before48(const u64 seq1, const u64 seq2) -{ - return (s64)((seq2 << 16) - (seq1 << 16)) > 0; -} - -/* is seq1 > seq2 ? */ -#define after48(seq1, seq2) before48(seq2, seq1) - -/* is seq2 <= seq1 <= seq3 ? */ -static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) -{ - return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); -} - -/** - * dccp_loss_count - Approximate the number of lost data packets in a burst loss - * @s1: last known sequence number before the loss ('hole') - * @s2: first sequence number seen after the 'hole' - * @ndp: NDP count on packet with sequence number @s2 - */ -static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp) -{ - s64 delta = dccp_delta_seqno(s1, s2); - - WARN_ON(delta < 0); - delta -= ndp + 1; - - return delta > 0 ? delta : 0; -} - -/** - * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1 - */ -static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) -{ - return dccp_loss_count(s1, s2, ndp) == 0; -} - -enum { - DCCP_MIB_NUM = 0, - DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ - DCCP_MIB_ESTABRESETS, /* EstabResets */ - DCCP_MIB_CURRESTAB, /* CurrEstab */ - DCCP_MIB_OUTSEGS, /* OutSegs */ - DCCP_MIB_OUTRSTS, - DCCP_MIB_ABORTONTIMEOUT, - DCCP_MIB_TIMEOUTS, - DCCP_MIB_ABORTFAILED, - DCCP_MIB_PASSIVEOPENS, - DCCP_MIB_ATTEMPTFAILS, - DCCP_MIB_OUTDATAGRAMS, - DCCP_MIB_INERRS, - DCCP_MIB_OPTMANDATORYERROR, - DCCP_MIB_INVALIDOPT, - __DCCP_MIB_MAX -}; - -#define DCCP_MIB_MAX __DCCP_MIB_MAX -struct dccp_mib { - unsigned long mibs[DCCP_MIB_MAX]; -}; - -DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); -#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) -#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field) -#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) - -/* - * Checksumming routines - */ -static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb) -{ - const struct dccp_hdr* dh = dccp_hdr(skb); - - if (dh->dccph_cscov == 0) - return skb->len; - return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32); -} - -static inline void dccp_csum_outgoing(struct sk_buff *skb) -{ - unsigned int cov = dccp_csum_coverage(skb); - - if (cov >= skb->len) - dccp_hdr(skb)->dccph_cscov = 0; - - skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); -} - -void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); - -int dccp_retransmit_skb(struct sock *sk); - -void dccp_send_ack(struct sock *sk); -void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - struct request_sock *rsk); - -void dccp_send_sync(struct sock *sk, const u64 seq, - const enum dccp_pkt_type pkt_type); - -/* - * TX Packet Dequeueing Interface - */ -void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); -bool dccp_qpolicy_full(struct sock *sk); -void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); -struct sk_buff *dccp_qpolicy_top(struct sock *sk); -struct sk_buff *dccp_qpolicy_pop(struct sock *sk); -bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); - -/* - * TX Packet Output and TX Timers - */ -void dccp_write_xmit(struct sock *sk); -void dccp_write_space(struct sock *sk); -void dccp_flush_write_queue(struct sock *sk, long *time_budget); - -void dccp_init_xmit_timers(struct sock *sk); -static inline void dccp_clear_xmit_timers(struct sock *sk) -{ - inet_csk_clear_xmit_timers(sk); -} - -unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); - -const char *dccp_packet_name(const int type); - -void dccp_set_state(struct sock *sk, const int state); -void dccp_done(struct sock *sk); - -int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, - struct sk_buff const *skb); - -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); - -struct sock *dccp_create_openreq_child(const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); - -int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); - -struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst, - struct request_sock *req_unhash, - bool *own_req); -struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - -int dccp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); -int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct dccp_hdr *dh, unsigned int len); -int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned int len); - -void dccp_destruct_common(struct sock *sk); -int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); -void dccp_destroy_sock(struct sock *sk); - -void dccp_close(struct sock *sk, long timeout); -struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst, - struct request_sock *req); - -int dccp_connect(struct sock *sk); -int dccp_disconnect(struct sock *sk, int flags); -int dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int dccp_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen); -int dccp_ioctl(struct sock *sk, int cmd, int *karg); -int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len); -void dccp_shutdown(struct sock *sk, int how); -int inet_dccp_listen(struct socket *sock, int backlog); -__poll_t dccp_poll(struct file *file, struct socket *sock, - poll_table *wait); -int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -void dccp_req_err(struct sock *sk, u64 seq); - -struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); -int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); -void dccp_send_close(struct sock *sk, const int active); -int dccp_invalid_packet(struct sk_buff *skb); -u32 dccp_sample_rtt(struct sock *sk, long delta); - -static inline bool dccp_bad_service_code(const struct sock *sk, - const __be32 service) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - if (dp->dccps_service == service) - return false; - return !dccp_list_has_service(dp->dccps_service_list, service); -} - -/** - * dccp_skb_cb - DCCP per-packet control information - * @dccpd_type: one of %dccp_pkt_type (or unknown) - * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1 - * @dccpd_reset_code: one of %dccp_reset_codes - * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code) - * @dccpd_opt_len: total length of all options (5.8) in the packet - * @dccpd_seq: sequence number - * @dccpd_ack_seq: acknowledgment number subheader field value - * - * This is used for transmission as well as for reception. - */ -struct dccp_skb_cb { - union { - struct inet_skb_parm h4; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; -#endif - } header; - __u8 dccpd_type:4; - __u8 dccpd_ccval:4; - __u8 dccpd_reset_code, - dccpd_reset_data[3]; - __u16 dccpd_opt_len; - __u64 dccpd_seq; - __u64 dccpd_ack_seq; -}; - -#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) - -/* RFC 4340, sec. 7.7 */ -static inline int dccp_non_data_packet(const struct sk_buff *skb) -{ - const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; - - return type == DCCP_PKT_ACK || - type == DCCP_PKT_CLOSE || - type == DCCP_PKT_CLOSEREQ || - type == DCCP_PKT_RESET || - type == DCCP_PKT_SYNC || - type == DCCP_PKT_SYNCACK; -} - -/* RFC 4340, sec. 7.7 */ -static inline int dccp_data_packet(const struct sk_buff *skb) -{ - const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; - - return type == DCCP_PKT_DATA || - type == DCCP_PKT_DATAACK || - type == DCCP_PKT_REQUEST || - type == DCCP_PKT_RESPONSE; -} - -static inline int dccp_packet_without_ack(const struct sk_buff *skb) -{ - const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; - - return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; -} - -#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2) - -static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) -{ - struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + - sizeof(*dh)); - dh->dccph_seq2 = 0; - dh->dccph_seq = htons((gss >> 32) & 0xfffff); - dhx->dccph_seq_low = htonl(gss & 0xffffffff); -} - -static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, - const u64 gsr) -{ - dhack->dccph_reserved1 = 0; - dhack->dccph_ack_nr_high = htons(gsr >> 32); - dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); -} - -static inline void dccp_update_gsr(struct sock *sk, u64 seq) -{ - struct dccp_sock *dp = dccp_sk(sk); - - if (after48(seq, dp->dccps_gsr)) - dp->dccps_gsr = seq; - /* Sequence validity window depends on remote Sequence Window (7.5.1) */ - dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); - /* - * Adjust SWL so that it is not below ISR. In contrast to RFC 4340, - * 7.5.1 we perform this check beyond the initial handshake: W/W' are - * always > 32, so for the first W/W' packets in the lifetime of a - * connection we always have to adjust SWL. - * A second reason why we are doing this is that the window depends on - * the feature-remote value of Sequence Window: nothing stops the peer - * from updating this value while we are busy adjusting SWL for the - * first W packets (we would have to count from scratch again then). - * Therefore it is safer to always make sure that the Sequence Window - * is not artificially extended by a peer who grows SWL downwards by - * continually updating the feature-remote Sequence-Window. - * If sequence numbers wrap it is bad luck. But that will take a while - * (48 bit), and this measure prevents Sequence-number attacks. - */ - if (before48(dp->dccps_swl, dp->dccps_isr)) - dp->dccps_swl = dp->dccps_isr; - dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); -} - -static inline void dccp_update_gss(struct sock *sk, u64 seq) -{ - struct dccp_sock *dp = dccp_sk(sk); - - dp->dccps_gss = seq; - /* Ack validity window depends on local Sequence Window value (7.5.1) */ - dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); - /* Adjust AWL so that it is not below ISS - see comment above for SWL */ - if (before48(dp->dccps_awl, dp->dccps_iss)) - dp->dccps_awl = dp->dccps_iss; - dp->dccps_awh = dp->dccps_gss; -} - -static inline int dccp_ackvec_pending(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL && - !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec); -} - -static inline int dccp_ack_pending(const struct sock *sk) -{ - return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); -} - -int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); -int dccp_feat_finalise_settings(struct dccp_sock *dp); -int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); -int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, - struct sk_buff *skb); -int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); -void dccp_feat_list_purge(struct list_head *fn_list); - -int dccp_insert_options(struct sock *sk, struct sk_buff *skb); -int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); -u32 dccp_timestamp(void); -void dccp_timestamping_init(void); -int dccp_insert_option(struct sk_buff *skb, unsigned char option, - const void *value, unsigned char len); - -#ifdef CONFIG_SYSCTL -int dccp_sysctl_init(void); -void dccp_sysctl_exit(void); -#else -static inline int dccp_sysctl_init(void) -{ - return 0; -} - -static inline void dccp_sysctl_exit(void) -{ -} -#endif - -#endif /* _DCCP_H */ diff --git a/net/dccp/diag.c b/net/dccp/diag.c deleted file mode 100644 index f5019d95c3ae..000000000000 --- a/net/dccp/diag.c +++ /dev/null @@ -1,85 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/diag.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@mandriva.com> - */ - - -#include <linux/module.h> -#include <linux/inet_diag.h> - -#include "ccid.h" -#include "dccp.h" - -static void dccp_get_info(struct sock *sk, struct tcp_info *info) -{ - struct dccp_sock *dp = dccp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - - memset(info, 0, sizeof(*info)); - - info->tcpi_state = sk->sk_state; - info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = icsk->icsk_probes_out; - info->tcpi_backoff = icsk->icsk_backoff; - info->tcpi_pmtu = icsk->icsk_pmtu_cookie; - - if (dp->dccps_hc_rx_ackvec != NULL) - info->tcpi_options |= TCPI_OPT_SACK; - - if (dp->dccps_hc_rx_ccid != NULL) - ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); - - if (dp->dccps_hc_tx_ccid != NULL) - ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); -} - -static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, - void *_info) -{ - r->idiag_rqueue = r->idiag_wqueue = 0; - - if (_info != NULL) - dccp_get_info(sk, _info); -} - -static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r); -} - -static int dccp_diag_dump_one(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - return inet_diag_dump_one_icsk(&dccp_hashinfo, cb, req); -} - -static const struct inet_diag_handler dccp_diag_handler = { - .owner = THIS_MODULE, - .dump = dccp_diag_dump, - .dump_one = dccp_diag_dump_one, - .idiag_get_info = dccp_diag_get_info, - .idiag_type = IPPROTO_DCCP, - .idiag_info_size = sizeof(struct tcp_info), -}; - -static int __init dccp_diag_init(void) -{ - return inet_diag_register(&dccp_diag_handler); -} - -static void __exit dccp_diag_fini(void) -{ - inet_diag_unregister(&dccp_diag_handler); -} - -module_init(dccp_diag_init); -module_exit(dccp_diag_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); -MODULE_DESCRIPTION("DCCP inet_diag handler"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */); diff --git a/net/dccp/feat.c b/net/dccp/feat.c deleted file mode 100644 index f7554dcdaaba..000000000000 --- a/net/dccp/feat.c +++ /dev/null @@ -1,1581 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/feat.c - * - * Feature negotiation for the DCCP protocol (RFC 4340, section 6) - * - * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk> - * Rewrote from scratch, some bits from earlier code by - * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> - * - * ASSUMPTIONS - * ----------- - * o Feature negotiation is coordinated with connection setup (as in TCP), wild - * changes of parameters of an established connection are not supported. - * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. - * o All currently known SP features have 1-byte quantities. If in the future - * extensions of RFCs 4340..42 define features with item lengths larger than - * one byte, a feature-specific extension of the code will be required. - */ -#include <linux/module.h> -#include <linux/slab.h> -#include "ccid.h" -#include "feat.h" - -/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */ -unsigned long sysctl_dccp_sequence_window __read_mostly = 100; -int sysctl_dccp_rx_ccid __read_mostly = 2, - sysctl_dccp_tx_ccid __read_mostly = 2; - -/* - * Feature activation handlers. - * - * These all use an u64 argument, to provide enough room for NN/SP features. At - * this stage the negotiated values have been checked to be within their range. - */ -static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct ccid *new_ccid = ccid_new(ccid, sk, rx); - - if (new_ccid == NULL) - return -ENOMEM; - - if (rx) { - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - dp->dccps_hc_rx_ccid = new_ccid; - } else { - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_tx_ccid = new_ccid; - } - return 0; -} - -static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) -{ - struct dccp_sock *dp = dccp_sk(sk); - - if (rx) { - dp->dccps_r_seq_win = seq_win; - /* propagate changes to update SWL/SWH */ - dccp_update_gsr(sk, dp->dccps_gsr); - } else { - dp->dccps_l_seq_win = seq_win; - /* propagate changes to update AWL */ - dccp_update_gss(sk, dp->dccps_gss); - } - return 0; -} - -static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx) -{ - if (rx) - dccp_sk(sk)->dccps_r_ack_ratio = ratio; - else - dccp_sk(sk)->dccps_l_ack_ratio = ratio; - return 0; -} - -static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx) -{ - struct dccp_sock *dp = dccp_sk(sk); - - if (rx) { - if (enable && dp->dccps_hc_rx_ackvec == NULL) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any()); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; - } else if (!enable) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - } - return 0; -} - -static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx) -{ - if (!rx) - dccp_sk(sk)->dccps_send_ndp_count = (enable > 0); - return 0; -} - -/* - * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that - * `rx' holds when the sending peer informs about his partial coverage via a - * ChangeR() option. In the other case, we are the sender and the receiver - * announces its coverage via ChangeL() options. The policy here is to honour - * such communication by enabling the corresponding partial coverage - but only - * if it has not been set manually before; the warning here means that all - * packets will be dropped. - */ -static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx) -{ - struct dccp_sock *dp = dccp_sk(sk); - - if (rx) - dp->dccps_pcrlen = cscov; - else { - if (dp->dccps_pcslen == 0) - dp->dccps_pcslen = cscov; - else if (cscov > dp->dccps_pcslen) - DCCP_WARN("CsCov %u too small, peer requires >= %u\n", - dp->dccps_pcslen, (u8)cscov); - } - return 0; -} - -static const struct { - u8 feat_num; /* DCCPF_xxx */ - enum dccp_feat_type rxtx; /* RX or TX */ - enum dccp_feat_type reconciliation; /* SP or NN */ - u8 default_value; /* as in 6.4 */ - int (*activation_hdlr)(struct sock *sk, u64 val, bool rx); -/* - * Lookup table for location and type of features (from RFC 4340/4342) - * +--------------------------+----+-----+----+----+---------+-----------+ - * | Feature | Location | Reconc. | Initial | Section | - * | | RX | TX | SP | NN | Value | Reference | - * +--------------------------+----+-----+----+----+---------+-----------+ - * | DCCPF_CCID | | X | X | | 2 | 10 | - * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 | - * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 | - * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 | - * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 | - * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 | - * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 | - * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 | - * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 | - * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 | - * +--------------------------+----+-----+----+----+---------+-----------+ - */ -} dccp_feat_table[] = { - { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid }, - { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL }, - { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win }, - { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL }, - { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio}, - { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec }, - { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp }, - { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov}, - { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL }, - { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL }, -}; -#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table) - -/** - * dccp_feat_index - Hash function to map feature number into array position - * @feat_num: feature to hash, one of %dccp_feature_numbers - * - * Returns consecutive array index or -1 if the feature is not understood. - */ -static int dccp_feat_index(u8 feat_num) -{ - /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */ - if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM) - return feat_num - 1; - - /* - * Other features: add cases for new feature types here after adding - * them to the above table. - */ - switch (feat_num) { - case DCCPF_SEND_LEV_RATE: - return DCCP_FEAT_SUPPORTED_MAX - 1; - } - return -1; -} - -static u8 dccp_feat_type(u8 feat_num) -{ - int idx = dccp_feat_index(feat_num); - - if (idx < 0) - return FEAT_UNKNOWN; - return dccp_feat_table[idx].reconciliation; -} - -static int dccp_feat_default_value(u8 feat_num) -{ - int idx = dccp_feat_index(feat_num); - /* - * There are no default values for unknown features, so encountering a - * negative index here indicates a serious problem somewhere else. - */ - DCCP_BUG_ON(idx < 0); - - return idx < 0 ? 0 : dccp_feat_table[idx].default_value; -} - -/* - * Debugging and verbose-printing section - */ -static const char *dccp_feat_fname(const u8 feat) -{ - static const char *const feature_names[] = { - [DCCPF_RESERVED] = "Reserved", - [DCCPF_CCID] = "CCID", - [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", - [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", - [DCCPF_ECN_INCAPABLE] = "ECN Incapable", - [DCCPF_ACK_RATIO] = "Ack Ratio", - [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", - [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", - [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", - [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", - }; - if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) - return feature_names[DCCPF_RESERVED]; - - if (feat == DCCPF_SEND_LEV_RATE) - return "Send Loss Event Rate"; - if (feat >= DCCPF_MIN_CCID_SPECIFIC) - return "CCID-specific"; - - return feature_names[feat]; -} - -static const char *const dccp_feat_sname[] = { - "DEFAULT", "INITIALISING", "CHANGING", "UNSTABLE", "STABLE", -}; - -#ifdef CONFIG_IP_DCCP_DEBUG -static const char *dccp_feat_oname(const u8 opt) -{ - switch (opt) { - case DCCPO_CHANGE_L: return "Change_L"; - case DCCPO_CONFIRM_L: return "Confirm_L"; - case DCCPO_CHANGE_R: return "Change_R"; - case DCCPO_CONFIRM_R: return "Confirm_R"; - } - return NULL; -} - -static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val) -{ - u8 i, type = dccp_feat_type(feat_num); - - if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL)) - dccp_pr_debug_cat("(NULL)"); - else if (type == FEAT_SP) - for (i = 0; i < val->sp.len; i++) - dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]); - else if (type == FEAT_NN) - dccp_pr_debug_cat("%llu", (unsigned long long)val->nn); - else - dccp_pr_debug_cat("unknown type %u", type); -} - -static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len) -{ - u8 type = dccp_feat_type(feat_num); - dccp_feat_val fval = { .sp.vec = list, .sp.len = len }; - - if (type == FEAT_NN) - fval.nn = dccp_decode_value_var(list, len); - dccp_feat_printval(feat_num, &fval); -} - -static void dccp_feat_print_entry(struct dccp_feat_entry const *entry) -{ - dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote", - dccp_feat_fname(entry->feat_num)); - dccp_feat_printval(entry->feat_num, &entry->val); - dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state], - entry->needs_confirm ? "(Confirm pending)" : ""); -} - -#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \ - dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\ - dccp_feat_printvals(feat, val, len); \ - dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0) - -#define dccp_feat_print_fnlist(fn_list) { \ - const struct dccp_feat_entry *___entry; \ - \ - dccp_pr_debug("List Dump:\n"); \ - list_for_each_entry(___entry, fn_list, node) \ - dccp_feat_print_entry(___entry); \ -} -#else /* ! CONFIG_IP_DCCP_DEBUG */ -#define dccp_feat_print_opt(opt, feat, val, len, mandatory) -#define dccp_feat_print_fnlist(fn_list) -#endif - -static int __dccp_feat_activate(struct sock *sk, const int idx, - const bool is_local, dccp_feat_val const *fval) -{ - bool rx; - u64 val; - - if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX) - return -1; - if (dccp_feat_table[idx].activation_hdlr == NULL) - return 0; - - if (fval == NULL) { - val = dccp_feat_table[idx].default_value; - } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) { - if (fval->sp.vec == NULL) { - /* - * This can happen when an empty Confirm is sent - * for an SP (i.e. known) feature. In this case - * we would be using the default anyway. - */ - DCCP_CRIT("Feature #%d undefined: using default", idx); - val = dccp_feat_table[idx].default_value; - } else { - val = fval->sp.vec[0]; - } - } else { - val = fval->nn; - } - - /* Location is RX if this is a local-RX or remote-TX feature */ - rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); - - dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX", - dccp_feat_fname(dccp_feat_table[idx].feat_num), - fval ? "" : "default ", (unsigned long long)val); - - return dccp_feat_table[idx].activation_hdlr(sk, val, rx); -} - -/** - * dccp_feat_activate - Activate feature value on socket - * @sk: fully connected DCCP socket (after handshake is complete) - * @feat_num: feature to activate, one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is meant - * @fval: the value (SP or NN) to activate, or NULL to use the default value - * - * For general use this function is preferable over __dccp_feat_activate(). - */ -static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, - dccp_feat_val const *fval) -{ - return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); -} - -/* Test for "Req'd" feature (RFC 4340, 6.4) */ -static inline int dccp_feat_must_be_understood(u8 feat_num) -{ - return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS || - feat_num == DCCPF_SEQUENCE_WINDOW; -} - -/* copy constructor, fval must not already contain allocated memory */ -static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) -{ - fval->sp.len = len; - if (fval->sp.len > 0) { - fval->sp.vec = kmemdup(val, len, gfp_any()); - if (fval->sp.vec == NULL) { - fval->sp.len = 0; - return -ENOMEM; - } - } - return 0; -} - -static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) -{ - if (unlikely(val == NULL)) - return; - if (dccp_feat_type(feat_num) == FEAT_SP) - kfree(val->sp.vec); - memset(val, 0, sizeof(*val)); -} - -static struct dccp_feat_entry * - dccp_feat_clone_entry(struct dccp_feat_entry const *original) -{ - struct dccp_feat_entry *new; - u8 type = dccp_feat_type(original->feat_num); - - if (type == FEAT_UNKNOWN) - return NULL; - - new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any()); - if (new == NULL) - return NULL; - - if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val, - original->val.sp.vec, - original->val.sp.len)) { - kfree(new); - return NULL; - } - return new; -} - -static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry) -{ - if (entry != NULL) { - dccp_feat_val_destructor(entry->feat_num, &entry->val); - kfree(entry); - } -} - -/* - * List management functions - * - * Feature negotiation lists rely on and maintain the following invariants: - * - each feat_num in the list is known, i.e. we know its type and default value - * - each feat_num/is_local combination is unique (old entries are overwritten) - * - SP values are always freshly allocated - * - list is sorted in increasing order of feature number (faster lookup) - */ -static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list, - u8 feat_num, bool is_local) -{ - struct dccp_feat_entry *entry; - - list_for_each_entry(entry, fn_list, node) { - if (entry->feat_num == feat_num && entry->is_local == is_local) - return entry; - else if (entry->feat_num > feat_num) - break; - } - return NULL; -} - -/** - * dccp_feat_entry_new - Central list update routine (called by all others) - * @head: list to add to - * @feat: feature number - * @local: whether the local (1) or remote feature with number @feat is meant - * - * This is the only constructor and serves to ensure the above invariants. - */ -static struct dccp_feat_entry * - dccp_feat_entry_new(struct list_head *head, u8 feat, bool local) -{ - struct dccp_feat_entry *entry; - - list_for_each_entry(entry, head, node) - if (entry->feat_num == feat && entry->is_local == local) { - dccp_feat_val_destructor(entry->feat_num, &entry->val); - return entry; - } else if (entry->feat_num > feat) { - head = &entry->node; - break; - } - - entry = kmalloc(sizeof(*entry), gfp_any()); - if (entry != NULL) { - entry->feat_num = feat; - entry->is_local = local; - list_add_tail(&entry->node, head); - } - return entry; -} - -/** - * dccp_feat_push_change - Add/overwrite a Change option in the list - * @fn_list: feature-negotiation list to update - * @feat: one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is meant - * @mandatory: whether to use Mandatory feature negotiation options - * @fval: pointer to NN/SP value to be inserted (will be copied) - */ -static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, - u8 mandatory, dccp_feat_val *fval) -{ - struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); - - if (new == NULL) - return -ENOMEM; - - new->feat_num = feat; - new->is_local = local; - new->state = FEAT_INITIALISING; - new->needs_confirm = false; - new->empty_confirm = false; - new->val = *fval; - new->needs_mandatory = mandatory; - - return 0; -} - -/** - * dccp_feat_push_confirm - Add a Confirm entry to the FN list - * @fn_list: feature-negotiation list to add to - * @feat: one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is being confirmed - * @fval: pointer to NN/SP value to be inserted or NULL - * - * Returns 0 on success, a Reset code for further processing otherwise. - */ -static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, - dccp_feat_val *fval) -{ - struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); - - if (new == NULL) - return DCCP_RESET_CODE_TOO_BUSY; - - new->feat_num = feat; - new->is_local = local; - new->state = FEAT_STABLE; /* transition in 6.6.2 */ - new->needs_confirm = true; - new->empty_confirm = (fval == NULL); - new->val.nn = 0; /* zeroes the whole structure */ - if (!new->empty_confirm) - new->val = *fval; - new->needs_mandatory = false; - - return 0; -} - -static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local) -{ - return dccp_feat_push_confirm(fn_list, feat, local, NULL); -} - -static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry) -{ - list_del(&entry->node); - dccp_feat_entry_destructor(entry); -} - -void dccp_feat_list_purge(struct list_head *fn_list) -{ - struct dccp_feat_entry *entry, *next; - - list_for_each_entry_safe(entry, next, fn_list, node) - dccp_feat_entry_destructor(entry); - INIT_LIST_HEAD(fn_list); -} -EXPORT_SYMBOL_GPL(dccp_feat_list_purge); - -/* generate @to as full clone of @from - @to must not contain any nodes */ -int dccp_feat_clone_list(struct list_head const *from, struct list_head *to) -{ - struct dccp_feat_entry *entry, *new; - - INIT_LIST_HEAD(to); - list_for_each_entry(entry, from, node) { - new = dccp_feat_clone_entry(entry); - if (new == NULL) - goto cloning_failed; - list_add_tail(&new->node, to); - } - return 0; - -cloning_failed: - dccp_feat_list_purge(to); - return -ENOMEM; -} - -/** - * dccp_feat_valid_nn_length - Enforce length constraints on NN options - * @feat_num: feature to return length of, one of %dccp_feature_numbers - * - * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only, - * incoming options are accepted as long as their values are valid. - */ -static u8 dccp_feat_valid_nn_length(u8 feat_num) -{ - if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */ - return 2; - if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */ - return 6; - return 0; -} - -static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val) -{ - switch (feat_num) { - case DCCPF_ACK_RATIO: - return val <= DCCPF_ACK_RATIO_MAX; - case DCCPF_SEQUENCE_WINDOW: - return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX; - } - return 0; /* feature unknown - so we can't tell */ -} - -/* check that SP values are within the ranges defined in RFC 4340 */ -static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val) -{ - switch (feat_num) { - case DCCPF_CCID: - return val == DCCPC_CCID2 || val == DCCPC_CCID3; - /* Type-check Boolean feature values: */ - case DCCPF_SHORT_SEQNOS: - case DCCPF_ECN_INCAPABLE: - case DCCPF_SEND_ACK_VECTOR: - case DCCPF_SEND_NDP_COUNT: - case DCCPF_DATA_CHECKSUM: - case DCCPF_SEND_LEV_RATE: - return val < 2; - case DCCPF_MIN_CSUM_COVER: - return val < 16; - } - return 0; /* feature unknown */ -} - -static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len) -{ - if (sp_list == NULL || sp_len < 1) - return 0; - while (sp_len--) - if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++)) - return 0; - return 1; -} - -/** - * dccp_feat_insert_opts - Generate FN options from current list state - * @skb: next sk_buff to be sent to the peer - * @dp: for client during handshake and general negotiation - * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND) - */ -int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, - struct sk_buff *skb) -{ - struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; - struct dccp_feat_entry *pos, *next; - u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN]; - bool rpt; - - /* put entries into @skb in the order they appear in the list */ - list_for_each_entry_safe_reverse(pos, next, fn, node) { - opt = dccp_feat_genopt(pos); - type = dccp_feat_type(pos->feat_num); - rpt = false; - - if (pos->empty_confirm) { - len = 0; - ptr = NULL; - } else { - if (type == FEAT_SP) { - len = pos->val.sp.len; - ptr = pos->val.sp.vec; - rpt = pos->needs_confirm; - } else if (type == FEAT_NN) { - len = dccp_feat_valid_nn_length(pos->feat_num); - ptr = nn_in_nbo; - dccp_encode_value_var(pos->val.nn, ptr, len); - } else { - DCCP_BUG("unknown feature %u", pos->feat_num); - return -1; - } - } - dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0); - - if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) - return -1; - if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) - return -1; - - if (skb->sk->sk_state == DCCP_OPEN && - (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { - /* - * Confirms don't get retransmitted (6.6.3) once the - * connection is in state OPEN - */ - dccp_feat_list_pop(pos); - } else { - /* - * Enter CHANGING after transmitting the Change - * option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; - } - } - return 0; -} - -/** - * __feat_register_nn - Register new NN value on socket - * @fn: feature-negotiation list to register with - * @feat: an NN feature from %dccp_feature_numbers - * @mandatory: use Mandatory option if 1 - * @nn_val: value to register (restricted to 4 bytes) - * - * Note that NN features are local by definition (RFC 4340, 6.3.2). - */ -static int __feat_register_nn(struct list_head *fn, u8 feat, - u8 mandatory, u64 nn_val) -{ - dccp_feat_val fval = { .nn = nn_val }; - - if (dccp_feat_type(feat) != FEAT_NN || - !dccp_feat_is_valid_nn_val(feat, nn_val)) - return -EINVAL; - - /* Don't bother with default values, they will be activated anyway. */ - if (nn_val - (u64)dccp_feat_default_value(feat) == 0) - return 0; - - return dccp_feat_push_change(fn, feat, 1, mandatory, &fval); -} - -/** - * __feat_register_sp - Register new SP value/list on socket - * @fn: feature-negotiation list to register with - * @feat: an SP feature from %dccp_feature_numbers - * @is_local: whether the local (1) or the remote (0) @feat is meant - * @mandatory: use Mandatory option if 1 - * @sp_val: SP value followed by optional preference list - * @sp_len: length of @sp_val in bytes - */ -static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, - u8 mandatory, u8 const *sp_val, u8 sp_len) -{ - dccp_feat_val fval; - - if (dccp_feat_type(feat) != FEAT_SP || - !dccp_feat_sp_list_ok(feat, sp_val, sp_len)) - return -EINVAL; - - /* Avoid negotiating alien CCIDs by only advertising supported ones */ - if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len)) - return -EOPNOTSUPP; - - if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) - return -ENOMEM; - - if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { - kfree(fval.sp.vec); - return -ENOMEM; - } - - return 0; -} - -/** - * dccp_feat_register_sp - Register requests to change SP feature values - * @sk: client or listening socket - * @feat: one of %dccp_feature_numbers - * @is_local: whether the local (1) or remote (0) @feat is meant - * @list: array of preferred values, in descending order of preference - * @len: length of @list in bytes - */ -int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len) -{ /* any changes must be registered before establishing the connection */ - if (sk->sk_state != DCCP_CLOSED) - return -EISCONN; - if (dccp_feat_type(feat) != FEAT_SP) - return -EINVAL; - return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local, - 0, list, len); -} - -/** - * dccp_feat_nn_get - Query current/pending value of NN feature - * @sk: DCCP socket of an established connection - * @feat: NN feature number from %dccp_feature_numbers - * - * For a known NN feature, returns value currently being negotiated, or - * current (confirmed) value if no negotiation is going on. - */ -u64 dccp_feat_nn_get(struct sock *sk, u8 feat) -{ - if (dccp_feat_type(feat) == FEAT_NN) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_feat_entry *entry; - - entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); - if (entry != NULL) - return entry->val.nn; - - switch (feat) { - case DCCPF_ACK_RATIO: - return dp->dccps_l_ack_ratio; - case DCCPF_SEQUENCE_WINDOW: - return dp->dccps_l_seq_win; - } - } - DCCP_BUG("attempt to look up unsupported feature %u", feat); - return 0; -} -EXPORT_SYMBOL_GPL(dccp_feat_nn_get); - -/** - * dccp_feat_signal_nn_change - Update NN values for an established connection - * @sk: DCCP socket of an established connection - * @feat: NN feature number from %dccp_feature_numbers - * @nn_val: the new value to use - * - * This function is used to communicate NN updates out-of-band. - */ -int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) -{ - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - dccp_feat_val fval = { .nn = nn_val }; - struct dccp_feat_entry *entry; - - if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) - return 0; - - if (dccp_feat_type(feat) != FEAT_NN || - !dccp_feat_is_valid_nn_val(feat, nn_val)) - return -EINVAL; - - if (nn_val == dccp_feat_nn_get(sk, feat)) - return 0; /* already set or negotiation under way */ - - entry = dccp_feat_list_lookup(fn, feat, 1); - if (entry != NULL) { - dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", - (unsigned long long)entry->val.nn, - (unsigned long long)nn_val); - dccp_feat_list_pop(entry); - } - - inet_csk_schedule_ack(sk); - return dccp_feat_push_change(fn, feat, 1, 0, &fval); -} -EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); - -/* - * Tracking features whose value depend on the choice of CCID - * - * This is designed with an extension in mind so that a list walk could be done - * before activating any features. However, the existing framework was found to - * work satisfactorily up until now, the automatic verification is left open. - * When adding new CCIDs, add a corresponding dependency table here. - */ -static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local) -{ - static const struct ccid_dependency ccid2_dependencies[2][2] = { - /* - * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX - * feature and Send Ack Vector is an RX feature, `is_local' - * needs to be reversed. - */ - { /* Dependencies of the receiver-side (remote) CCID2 */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = true, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 } - }, - { /* Dependencies of the sender-side (local) CCID2 */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 } - } - }; - static const struct ccid_dependency ccid3_dependencies[2][5] = { - { /* - * Dependencies of the receiver-side CCID3 - */ - { /* locally disable Ack Vectors */ - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = true, - .is_mandatory = false, - .val = 0 - }, - { /* see below why Send Loss Event Rate is on */ - .dependent_feat = DCCPF_SEND_LEV_RATE, - .is_local = true, - .is_mandatory = true, - .val = 1 - }, - { /* NDP Count is needed as per RFC 4342, 6.1.1 */ - .dependent_feat = DCCPF_SEND_NDP_COUNT, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 }, - }, - { /* - * CCID3 at the TX side: we request that the HC-receiver - * will not send Ack Vectors (they will be ignored, so - * Mandatory is not set); we enable Send Loss Event Rate - * (Mandatory since the implementation does not support - * the Loss Intervals option of RFC 4342, 8.6). - * The last two options are for peer's information only. - */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = false, - .is_mandatory = false, - .val = 0 - }, - { - .dependent_feat = DCCPF_SEND_LEV_RATE, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { /* this CCID does not support Ack Ratio */ - .dependent_feat = DCCPF_ACK_RATIO, - .is_local = true, - .is_mandatory = false, - .val = 0 - }, - { /* tell receiver we are sending NDP counts */ - .dependent_feat = DCCPF_SEND_NDP_COUNT, - .is_local = true, - .is_mandatory = false, - .val = 1 - }, - { 0, 0, 0, 0 } - } - }; - switch (ccid) { - case DCCPC_CCID2: - return ccid2_dependencies[is_local]; - case DCCPC_CCID3: - return ccid3_dependencies[is_local]; - default: - return NULL; - } -} - -/** - * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID - * @fn: feature-negotiation list to update - * @id: CCID number to track - * @is_local: whether TX CCID (1) or RX CCID (0) is meant - * - * This function needs to be called after registering all other features. - */ -static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) -{ - const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local); - int i, rc = (table == NULL); - - for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++) - if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP) - rc = __feat_register_sp(fn, table[i].dependent_feat, - table[i].is_local, - table[i].is_mandatory, - &table[i].val, 1); - else - rc = __feat_register_nn(fn, table[i].dependent_feat, - table[i].is_mandatory, - table[i].val); - return rc; -} - -/** - * dccp_feat_finalise_settings - Finalise settings before starting negotiation - * @dp: client or listening socket (settings will be inherited) - * - * This is called after all registrations (socket initialisation, sysctls, and - * sockopt calls), and before sending the first packet containing Change options - * (ie. client-Request or server-Response), to ensure internal consistency. - */ -int dccp_feat_finalise_settings(struct dccp_sock *dp) -{ - struct list_head *fn = &dp->dccps_featneg; - struct dccp_feat_entry *entry; - int i = 2, ccids[2] = { -1, -1 }; - - /* - * Propagating CCIDs: - * 1) not useful to propagate CCID settings if this host advertises more - * than one CCID: the choice of CCID may still change - if this is - * the client, or if this is the server and the client sends - * singleton CCID values. - * 2) since is that propagate_ccid changes the list, we defer changing - * the sorted list until after the traversal. - */ - list_for_each_entry(entry, fn, node) - if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1) - ccids[entry->is_local] = entry->val.sp.vec[0]; - while (i--) - if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) - return -1; - dccp_feat_print_fnlist(fn); - return 0; -} - -/** - * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features - * @dreq: server socket to resolve - * - * It is the server which resolves the dependencies once the CCID has been - * fully negotiated. If no CCID has been negotiated, it uses the default CCID. - */ -int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq) -{ - struct list_head *fn = &dreq->dreq_featneg; - struct dccp_feat_entry *entry; - u8 is_local, ccid; - - for (is_local = 0; is_local <= 1; is_local++) { - entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local); - - if (entry != NULL && !entry->empty_confirm) - ccid = entry->val.sp.vec[0]; - else - ccid = dccp_feat_default_value(DCCPF_CCID); - - if (dccp_feat_propagate_ccid(fn, ccid, is_local)) - return -1; - } - return 0; -} - -/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */ -static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen) -{ - u8 c, s; - - for (s = 0; s < slen; s++) - for (c = 0; c < clen; c++) - if (servlist[s] == clilist[c]) - return servlist[s]; - return -1; -} - -/** - * dccp_feat_prefer - Move preferred entry to the start of array - * @preferred_value: entry to move to start of array - * @array: array of preferred entries - * @array_len: size of the array - * - * Reorder the @array_len elements in @array so that @preferred_value comes - * first. Returns >0 to indicate that @preferred_value does occur in @array. - */ -static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len) -{ - u8 i, does_occur = 0; - - if (array != NULL) { - for (i = 0; i < array_len; i++) - if (array[i] == preferred_value) { - array[i] = array[0]; - does_occur++; - } - if (does_occur) - array[0] = preferred_value; - } - return does_occur; -} - -/** - * dccp_feat_reconcile - Reconcile SP preference lists - * @fv: SP list to reconcile into - * @arr: received SP preference list - * @len: length of @arr in bytes - * @is_server: whether this side is the server (and @fv is the server's list) - * @reorder: whether to reorder the list in @fv after reconciling with @arr - * When successful, > 0 is returned and the reconciled list is in @fval. - * A value of 0 means that negotiation failed (no shared entry). - */ -static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len, - bool is_server, bool reorder) -{ - int rc; - - if (!fv->sp.vec || !arr) { - DCCP_CRIT("NULL feature value or array"); - return 0; - } - - if (is_server) - rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len); - else - rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len); - - if (!reorder) - return rc; - if (rc < 0) - return 0; - - /* - * Reorder list: used for activating features and in dccp_insert_fn_opt. - */ - return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len); -} - -/** - * dccp_feat_change_recv - Process incoming ChangeL/R options - * @fn: feature-negotiation list to update - * @is_mandatory: whether the Change was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R - * @feat: one of %dccp_feature_numbers - * @val: NN value or SP value/preference list - * @len: length of @val in bytes - * @server: whether this node is the server (1) or the client (0) - */ -static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, - u8 feat, u8 *val, u8 len, const bool server) -{ - u8 defval, type = dccp_feat_type(feat); - const bool local = (opt == DCCPO_CHANGE_R); - struct dccp_feat_entry *entry; - dccp_feat_val fval; - - if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ - goto unknown_feature_or_value; - - dccp_feat_print_opt(opt, feat, val, len, is_mandatory); - - /* - * Negotiation of NN features: Change R is invalid, so there is no - * simultaneous negotiation; hence we do not look up in the list. - */ - if (type == FEAT_NN) { - if (local || len > sizeof(fval.nn)) - goto unknown_feature_or_value; - - /* 6.3.2: "The feature remote MUST accept any valid value..." */ - fval.nn = dccp_decode_value_var(val, len); - if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) - goto unknown_feature_or_value; - - return dccp_feat_push_confirm(fn, feat, local, &fval); - } - - /* - * Unidirectional/simultaneous negotiation of SP features (6.3.1) - */ - entry = dccp_feat_list_lookup(fn, feat, local); - if (entry == NULL) { - /* - * No particular preferences have been registered. We deal with - * this situation by assuming that all valid values are equally - * acceptable, and apply the following checks: - * - if the peer's list is a singleton, we accept a valid value; - * - if we are the server, we first try to see if the peer (the - * client) advertises the default value. If yes, we use it, - * otherwise we accept the preferred value; - * - else if we are the client, we use the first list element. - */ - if (dccp_feat_clone_sp_val(&fval, val, 1)) - return DCCP_RESET_CODE_TOO_BUSY; - - if (len > 1 && server) { - defval = dccp_feat_default_value(feat); - if (dccp_feat_preflist_match(&defval, 1, val, len) > -1) - fval.sp.vec[0] = defval; - } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) { - kfree(fval.sp.vec); - goto unknown_feature_or_value; - } - - /* Treat unsupported CCIDs like invalid values */ - if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) { - kfree(fval.sp.vec); - goto not_valid_or_not_known; - } - - if (dccp_feat_push_confirm(fn, feat, local, &fval)) { - kfree(fval.sp.vec); - return DCCP_RESET_CODE_TOO_BUSY; - } - - return 0; - } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */ - return 0; - } - - if (dccp_feat_reconcile(&entry->val, val, len, server, true)) { - entry->empty_confirm = false; - } else if (is_mandatory) { - return DCCP_RESET_CODE_MANDATORY_ERROR; - } else if (entry->state == FEAT_INITIALISING) { - /* - * Failed simultaneous negotiation (server only): try to `save' - * the connection by checking whether entry contains the default - * value for @feat. If yes, send an empty Confirm to signal that - * the received Change was not understood - which implies using - * the default value. - * If this also fails, we use Reset as the last resort. - */ - WARN_ON(!server); - defval = dccp_feat_default_value(feat); - if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true)) - return DCCP_RESET_CODE_OPTION_ERROR; - entry->empty_confirm = true; - } - entry->needs_confirm = true; - entry->needs_mandatory = false; - entry->state = FEAT_STABLE; - return 0; - -unknown_feature_or_value: - if (!is_mandatory) - return dccp_push_empty_confirm(fn, feat, local); - -not_valid_or_not_known: - return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; -} - -/** - * dccp_feat_confirm_recv - Process received Confirm options - * @fn: feature-negotiation list to update - * @is_mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R - * @feat: one of %dccp_feature_numbers - * @val: NN value or SP value/preference list - * @len: length of @val in bytes - * @server: whether this node is server (1) or client (0) - */ -static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt, - u8 feat, u8 *val, u8 len, const bool server) -{ - u8 *plist, plen, type = dccp_feat_type(feat); - const bool local = (opt == DCCPO_CONFIRM_R); - struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); - - dccp_feat_print_opt(opt, feat, val, len, is_mandatory); - - if (entry == NULL) { /* nothing queued: ignore or handle error */ - if (is_mandatory && type == FEAT_UNKNOWN) - return DCCP_RESET_CODE_MANDATORY_ERROR; - - if (!local && type == FEAT_NN) /* 6.3.2 */ - goto confirmation_failed; - return 0; - } - - if (entry->state != FEAT_CHANGING) /* 6.6.2 */ - return 0; - - if (len == 0) { - if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */ - goto confirmation_failed; - /* - * Empty Confirm during connection setup: this means reverting - * to the `old' value, which in this case is the default. Since - * we handle default values automatically when no other values - * have been set, we revert to the old value by removing this - * entry from the list. - */ - dccp_feat_list_pop(entry); - return 0; - } - - if (type == FEAT_NN) { - if (len > sizeof(entry->val.nn)) - goto confirmation_failed; - - if (entry->val.nn == dccp_decode_value_var(val, len)) - goto confirmation_succeeded; - - DCCP_WARN("Bogus Confirm for non-existing value\n"); - goto confirmation_failed; - } - - /* - * Parsing SP Confirms: the first element of @val is the preferred - * SP value which the peer confirms, the remainder depends on @len. - * Note that only the confirmed value need to be a valid SP value. - */ - if (!dccp_feat_is_valid_sp_val(feat, *val)) - goto confirmation_failed; - - if (len == 1) { /* peer didn't supply a preference list */ - plist = val; - plen = len; - } else { /* preferred value + preference list */ - plist = val + 1; - plen = len - 1; - } - - /* Check whether the peer got the reconciliation right (6.6.8) */ - if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) { - DCCP_WARN("Confirm selected the wrong value %u\n", *val); - return DCCP_RESET_CODE_OPTION_ERROR; - } - entry->val.sp.vec[0] = *val; - -confirmation_succeeded: - entry->state = FEAT_STABLE; - return 0; - -confirmation_failed: - DCCP_WARN("Confirmation failed\n"); - return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; -} - -/** - * dccp_feat_handle_nn_established - Fast-path reception of NN options - * @sk: socket of an established DCCP connection - * @mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) - * @feat: NN number, one of %dccp_feature_numbers - * @val: NN value - * @len: length of @val in bytes - * - * This function combines the functionality of change_recv/confirm_recv, with - * the following differences (reset codes are the same): - * - cleanup after receiving the Confirm; - * - values are directly activated after successful parsing; - * - deliberately restricted to NN features. - * The restriction to NN features is essential since SP features can have non- - * predictable outcomes (depending on the remote configuration), and are inter- - * dependent (CCIDs for instance cause further dependencies). - */ -static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, - u8 feat, u8 *val, u8 len) -{ - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - const bool local = (opt == DCCPO_CONFIRM_R); - struct dccp_feat_entry *entry; - u8 type = dccp_feat_type(feat); - dccp_feat_val fval; - - dccp_feat_print_opt(opt, feat, val, len, mandatory); - - /* Ignore non-mandatory unknown and non-NN features */ - if (type == FEAT_UNKNOWN) { - if (local && !mandatory) - return 0; - goto fast_path_unknown; - } else if (type != FEAT_NN) { - return 0; - } - - /* - * We don't accept empty Confirms, since in fast-path feature - * negotiation the values are enabled immediately after sending - * the Change option. - * Empty Changes on the other hand are invalid (RFC 4340, 6.1). - */ - if (len == 0 || len > sizeof(fval.nn)) - goto fast_path_unknown; - - if (opt == DCCPO_CHANGE_L) { - fval.nn = dccp_decode_value_var(val, len); - if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) - goto fast_path_unknown; - - if (dccp_feat_push_confirm(fn, feat, local, &fval) || - dccp_feat_activate(sk, feat, local, &fval)) - return DCCP_RESET_CODE_TOO_BUSY; - - /* set the `Ack Pending' flag to piggyback a Confirm */ - inet_csk_schedule_ack(sk); - - } else if (opt == DCCPO_CONFIRM_R) { - entry = dccp_feat_list_lookup(fn, feat, local); - if (entry == NULL || entry->state != FEAT_CHANGING) - return 0; - - fval.nn = dccp_decode_value_var(val, len); - /* - * Just ignore a value that doesn't match our current value. - * If the option changes twice within two RTTs, then at least - * one CONFIRM will be received for the old value after a - * new CHANGE was sent. - */ - if (fval.nn != entry->val.nn) - return 0; - - /* Only activate after receiving the Confirm option (6.6.1). */ - dccp_feat_activate(sk, feat, local, &fval); - - /* It has been confirmed - so remove the entry */ - dccp_feat_list_pop(entry); - - } else { - DCCP_WARN("Received illegal option %u\n", opt); - goto fast_path_failed; - } - return 0; - -fast_path_unknown: - if (!mandatory) - return dccp_push_empty_confirm(fn, feat, local); - -fast_path_failed: - return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; -} - -/** - * dccp_feat_parse_options - Process Feature-Negotiation Options - * @sk: for general use and used by the client during connection setup - * @dreq: used by the server during connection setup - * @mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R - * @feat: one of %dccp_feature_numbers - * @val: value contents of @opt - * @len: length of @val in bytes - * - * Returns 0 on success, a Reset code for ending the connection otherwise. - */ -int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; - bool server = false; - - switch (sk->sk_state) { - /* - * Negotiation during connection setup - */ - case DCCP_LISTEN: - server = true; - fallthrough; - case DCCP_REQUESTING: - switch (opt) { - case DCCPO_CHANGE_L: - case DCCPO_CHANGE_R: - return dccp_feat_change_recv(fn, mandatory, opt, feat, - val, len, server); - case DCCPO_CONFIRM_R: - case DCCPO_CONFIRM_L: - return dccp_feat_confirm_recv(fn, mandatory, opt, feat, - val, len, server); - } - break; - /* - * Support for exchanging NN options on an established connection. - */ - case DCCP_OPEN: - case DCCP_PARTOPEN: - return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, - val, len); - } - return 0; /* ignore FN options in all other states */ -} - -/** - * dccp_feat_init - Seed feature negotiation with host-specific defaults - * @sk: Socket to initialize. - * - * This initialises global defaults, depending on the value of the sysctls. - * These can later be overridden by registering changes via setsockopt calls. - * The last link in the chain is finalise_settings, to make sure that between - * here and the start of actual feature negotiation no inconsistencies enter. - * - * All features not appearing below use either defaults or are otherwise - * later adjusted through dccp_feat_finalise_settings(). - */ -int dccp_feat_init(struct sock *sk) -{ - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - u8 on = 1, off = 0; - int rc; - struct { - u8 *val; - u8 len; - } tx, rx; - - /* Non-negotiable (NN) features */ - rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, - sysctl_dccp_sequence_window); - if (rc) - return rc; - - /* Server-priority (SP) features */ - - /* Advertise that short seqnos are not supported (7.6.1) */ - rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1); - if (rc) - return rc; - - /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */ - rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1); - if (rc) - return rc; - - /* - * We advertise the available list of CCIDs and reorder according to - * preferences, to avoid failure resulting from negotiating different - * singleton values (which always leads to failure). - * These settings can still (later) be overridden via sockopts. - */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len)) - return -ENOBUFS; - if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { - kfree(tx.val); - return -ENOBUFS; - } - - if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || - !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) - goto free_ccid_lists; - - rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); - if (rc) - goto free_ccid_lists; - - rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len); - -free_ccid_lists: - kfree(tx.val); - kfree(rx.val); - return rc; -} - -int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_feat_entry *cur, *next; - int idx; - dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = { - [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL } - }; - - list_for_each_entry(cur, fn_list, node) { - /* - * An empty Confirm means that either an unknown feature type - * or an invalid value was present. In the first case there is - * nothing to activate, in the other the default value is used. - */ - if (cur->empty_confirm) - continue; - - idx = dccp_feat_index(cur->feat_num); - if (idx < 0) { - DCCP_BUG("Unknown feature %u", cur->feat_num); - goto activation_failed; - } - if (cur->state != FEAT_STABLE) { - DCCP_CRIT("Negotiation of %s %s failed in state %s", - cur->is_local ? "local" : "remote", - dccp_feat_fname(cur->feat_num), - dccp_feat_sname[cur->state]); - goto activation_failed; - } - fvals[idx][cur->is_local] = &cur->val; - } - - /* - * Activate in decreasing order of index, so that the CCIDs are always - * activated as the last feature. This avoids the case where a CCID - * relies on the initialisation of one or more features that it depends - * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features). - */ - for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;) - if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) || - __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) { - DCCP_CRIT("Could not activate %d", idx); - goto activation_failed; - } - - /* Clean up Change options which have been confirmed already */ - list_for_each_entry_safe(cur, next, fn_list, node) - if (!cur->needs_confirm) - dccp_feat_list_pop(cur); - - dccp_pr_debug("Activation OK\n"); - return 0; - -activation_failed: - /* - * We clean up everything that may have been allocated, since - * it is difficult to track at which stage negotiation failed. - * This is ok, since all allocation functions below are robust - * against NULL arguments. - */ - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - return -1; -} diff --git a/net/dccp/feat.h b/net/dccp/feat.h deleted file mode 100644 index 57d9c026aa3f..000000000000 --- a/net/dccp/feat.h +++ /dev/null @@ -1,133 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _DCCP_FEAT_H -#define _DCCP_FEAT_H -/* - * net/dccp/feat.h - * - * Feature negotiation for the DCCP protocol (RFC 4340, section 6) - * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk> - * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> - */ -#include <linux/types.h> -#include "dccp.h" - -/* - * Known limit values - */ -/* Ack Ratio takes 2-byte integer values (11.3) */ -#define DCCPF_ACK_RATIO_MAX 0xFFFF -/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */ -#define DCCPF_SEQ_WMIN 32 -#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull -/* Maximum number of SP values that fit in a single (Confirm) option */ -#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2) - -enum dccp_feat_type { - FEAT_AT_RX = 1, /* located at RX side of half-connection */ - FEAT_AT_TX = 2, /* located at TX side of half-connection */ - FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */ - FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */ - FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */ -}; - -enum dccp_feat_state { - FEAT_DEFAULT = 0, /* using default values from 6.4 */ - FEAT_INITIALISING, /* feature is being initialised */ - FEAT_CHANGING, /* Change sent but not confirmed yet */ - FEAT_UNSTABLE, /* local modification in state CHANGING */ - FEAT_STABLE /* both ends (think they) agree */ -}; - -/** - * dccp_feat_val - Container for SP or NN feature values - * @nn: single NN value - * @sp.vec: single SP value plus optional preference list - * @sp.len: length of @sp.vec in bytes - */ -typedef union { - u64 nn; - struct { - u8 *vec; - u8 len; - } sp; -} dccp_feat_val; - -/** - * struct feat_entry - Data structure to perform feature negotiation - * @val: feature's current value (SP features may have preference list) - * @state: feature's current state - * @feat_num: one of %dccp_feature_numbers - * @needs_mandatory: whether Mandatory options should be sent - * @needs_confirm: whether to send a Confirm instead of a Change - * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm) - * @is_local: feature location (1) or feature-remote (0) - * @node: list pointers, entries arranged in FIFO order - */ -struct dccp_feat_entry { - dccp_feat_val val; - enum dccp_feat_state state:8; - u8 feat_num; - - bool needs_mandatory, - needs_confirm, - empty_confirm, - is_local; - - struct list_head node; -}; - -static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry) -{ - if (entry->needs_confirm) - return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R; - return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R; -} - -/** - * struct ccid_dependency - Track changes resulting from choosing a CCID - * @dependent_feat: one of %dccp_feature_numbers - * @is_local: local (1) or remote (0) @dependent_feat - * @is_mandatory: whether presence of @dependent_feat is mission-critical or not - * @val: corresponding default value for @dependent_feat (u8 is sufficient here) - */ -struct ccid_dependency { - u8 dependent_feat; - bool is_local:1, - is_mandatory:1; - u8 val; -}; - -/* - * Sysctls to seed defaults for feature negotiation - */ -extern unsigned long sysctl_dccp_sequence_window; -extern int sysctl_dccp_rx_ccid; -extern int sysctl_dccp_tx_ccid; - -int dccp_feat_init(struct sock *sk); -int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len); -int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, - u8 mand, u8 opt, u8 feat, u8 *val, u8 len); -int dccp_feat_clone_list(struct list_head const *, struct list_head *); - -/* - * Encoding variable-length options and their maximum length. - * - * This affects NN options (SP options are all u8) and other variable-length - * options (see table 3 in RFC 4340). The limit is currently given the Sequence - * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other - * options consume less than 6 bytes (timestamps are 4 bytes). - * When updating this constant (e.g. due to new internet drafts / RFCs), make - * sure that you also update all code which refers to it. - */ -#define DCCP_OPTVAL_MAXLEN 6 - -void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); -u64 dccp_decode_value_var(const u8 *bf, const u8 len); -u64 dccp_feat_nn_get(struct sock *sk, u8 feat); - -int dccp_insert_option_mandatory(struct sk_buff *skb); -int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len, - bool repeat_first); -#endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c deleted file mode 100644 index 2cbb757a894f..000000000000 --- a/net/dccp/input.c +++ /dev/null @@ -1,739 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/input.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/skbuff.h> -#include <linux/slab.h> - -#include <net/sock.h> - -#include "ackvec.h" -#include "ccid.h" -#include "dccp.h" - -/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ -int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; - -static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb) -{ - __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); - sk->sk_data_ready(sk); -} - -static void dccp_fin(struct sock *sk, struct sk_buff *skb) -{ - /* - * On receiving Close/CloseReq, both RD/WR shutdown are performed. - * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after - * receiving the closing segment, but there is no guarantee that such - * data will be processed at all. - */ - sk->sk_shutdown = SHUTDOWN_MASK; - sock_set_flag(sk, SOCK_DONE); - dccp_enqueue_skb(sk, skb); -} - -static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb) -{ - int queued = 0; - - switch (sk->sk_state) { - /* - * We ignore Close when received in one of the following states: - * - CLOSED (may be a late or duplicate packet) - * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier) - * - RESPOND (already handled by dccp_check_req) - */ - case DCCP_CLOSING: - /* - * Simultaneous-close: receiving a Close after sending one. This - * can happen if both client and server perform active-close and - * will result in an endless ping-pong of crossing and retrans- - * mitted Close packets, which only terminates when one of the - * nodes times out (min. 64 seconds). Quicker convergence can be - * achieved when one of the nodes acts as tie-breaker. - * This is ok as both ends are done with data transfer and each - * end is just waiting for the other to acknowledge termination. - */ - if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) - break; - fallthrough; - case DCCP_REQUESTING: - case DCCP_ACTIVE_CLOSEREQ: - dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_done(sk); - break; - case DCCP_OPEN: - case DCCP_PARTOPEN: - /* Give waiting application a chance to read pending data */ - queued = 1; - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_PASSIVE_CLOSE); - fallthrough; - case DCCP_PASSIVE_CLOSE: - /* - * Retransmitted Close: we have already enqueued the first one. - */ - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - } - return queued; -} - -static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) -{ - int queued = 0; - - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == CloseReq) - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); - return queued; - } - - /* Step 13: process relevant Client states < CLOSEREQ */ - switch (sk->sk_state) { - case DCCP_REQUESTING: - dccp_send_close(sk, 0); - dccp_set_state(sk, DCCP_CLOSING); - break; - case DCCP_OPEN: - case DCCP_PARTOPEN: - /* Give waiting application a chance to read pending data */ - queued = 1; - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ); - fallthrough; - case DCCP_PASSIVE_CLOSEREQ: - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - } - return queued; -} - -static u16 dccp_reset_code_convert(const u8 code) -{ - static const u16 error_code[] = { - [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ - [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ - [DCCP_RESET_CODE_ABORTED] = ECONNRESET, - - [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED, - [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED, - [DCCP_RESET_CODE_TOO_BUSY] = EUSERS, - [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT, - - [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG, - [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR, - [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC, - [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ, - [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP, - }; - - return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code]; -} - -static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) -{ - u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); - - sk->sk_err = err; - - /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ - dccp_fin(sk, skb); - - if (err && !sock_flag(sk, SOCK_DEAD)) - sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); -} - -static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; - - if (av == NULL) - return; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); - dccp_ackvec_input(av, skb); -} - -static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - /* Don't deliver to RX CCID when node has shut down read end. */ - if (!(sk->sk_shutdown & RCV_SHUTDOWN)) - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - /* - * Until the TX queue has been drained, we can not honour SHUT_WR, since - * we need received feedback as input to adjust congestion control. - */ - if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN)) - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); -} - -static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - struct dccp_sock *dp = dccp_sk(sk); - u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, - ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - - /* - * Step 5: Prepare sequence numbers for Sync - * If P.type == Sync or P.type == SyncAck, - * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, - * / * P is valid, so update sequence number variables - * accordingly. After this update, P will pass the tests - * in Step 6. A SyncAck is generated if necessary in - * Step 15 * / - * Update S.GSR, S.SWL, S.SWH - * Otherwise, - * Drop packet and return - */ - if (dh->dccph_type == DCCP_PKT_SYNC || - dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && - dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) - dccp_update_gsr(sk, seqno); - else - return -1; - } - - /* - * Step 6: Check sequence numbers - * Let LSWL = S.SWL and LAWL = S.AWL - * If P.type == CloseReq or P.type == Close or P.type == Reset, - * LSWL := S.GSR + 1, LAWL := S.GAR - * If LSWL <= P.seqno <= S.SWH - * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), - * Update S.GSR, S.SWL, S.SWH - * If P.type != Sync, - * Update S.GAR - */ - lswl = dp->dccps_swl; - lawl = dp->dccps_awl; - - if (dh->dccph_type == DCCP_PKT_CLOSEREQ || - dh->dccph_type == DCCP_PKT_CLOSE || - dh->dccph_type == DCCP_PKT_RESET) { - lswl = ADD48(dp->dccps_gsr, 1); - lawl = dp->dccps_gar; - } - - if (between48(seqno, lswl, dp->dccps_swh) && - (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(ackno, lawl, dp->dccps_awh))) { - dccp_update_gsr(sk, seqno); - - if (dh->dccph_type != DCCP_PKT_SYNC && - ackno != DCCP_PKT_WITHOUT_ACK_SEQ && - after48(ackno, dp->dccps_gar)) - dp->dccps_gar = ackno; - } else { - unsigned long now = jiffies; - /* - * Step 6: Check sequence numbers - * Otherwise, - * If P.type == Reset, - * Send Sync packet acknowledging S.GSR - * Otherwise, - * Send Sync packet acknowledging P.seqno - * Drop packet and return - * - * These Syncs are rate-limited as per RFC 4340, 7.5.4: - * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. - */ - if (time_before(now, (dp->dccps_rate_last + - sysctl_dccp_sync_ratelimit))) - return -1; - - DCCP_WARN("Step 6 failed for %s packet, " - "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " - "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " - "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, (unsigned long long) seqno, - (unsigned long long) dp->dccps_swh, - (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" - : "exists", - (unsigned long long) lawl, (unsigned long long) ackno, - (unsigned long long) dp->dccps_awh); - - dp->dccps_rate_last = now; - - if (dh->dccph_type == DCCP_PKT_RESET) - seqno = dp->dccps_gsr; - dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); - return -1; - } - - return 0; -} - -static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned int len) -{ - struct dccp_sock *dp = dccp_sk(sk); - - switch (dccp_hdr(skb)->dccph_type) { - case DCCP_PKT_DATAACK: - case DCCP_PKT_DATA: - /* - * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when - * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening" - * - sk_receive_queue is full, use Code 2, "Receive Buffer" - */ - dccp_enqueue_skb(sk, skb); - return 0; - case DCCP_PKT_ACK: - goto discard; - case DCCP_PKT_RESET: - /* - * Step 9: Process Reset - * If P.type == Reset, - * Tear down connection - * S.state := TIMEWAIT - * Set TIMEWAIT timer - * Drop packet and return - */ - dccp_rcv_reset(sk, skb); - return 0; - case DCCP_PKT_CLOSEREQ: - if (dccp_rcv_closereq(sk, skb)) - return 0; - goto discard; - case DCCP_PKT_CLOSE: - if (dccp_rcv_close(sk, skb)) - return 0; - goto discard; - case DCCP_PKT_REQUEST: - /* Step 7 - * or (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state >= OPEN and P.type == Request - * and P.seqno >= S.OSR) - * or (S.state >= OPEN and P.type == Response - * and P.seqno >= S.OSR) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - if (dp->dccps_role != DCCP_ROLE_LISTEN) - goto send_sync; - goto check_seq; - case DCCP_PKT_RESPONSE: - if (dp->dccps_role != DCCP_ROLE_CLIENT) - goto send_sync; -check_seq: - if (dccp_delta_seqno(dp->dccps_osr, - DCCP_SKB_CB(skb)->dccpd_seq) >= 0) { -send_sync: - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNC); - } - break; - case DCCP_PKT_SYNC: - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNCACK); - /* - * From RFC 4340, sec. 5.7 - * - * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets - * MAY have non-zero-length application data areas, whose - * contents receivers MUST ignore. - */ - goto discard; - } - - DCCP_INC_STATS(DCCP_MIB_INERRS); -discard: - __kfree_skb(skb); - return 0; -} - -int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned int len) -{ - if (dccp_check_seqno(sk, skb)) - goto discard; - - if (dccp_parse_options(sk, NULL, skb)) - return 1; - - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); - - return __dccp_rcv_established(sk, skb, dh, len); -discard: - __kfree_skb(skb); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_rcv_established); - -static int dccp_rcv_request_sent_state_process(struct sock *sk, - struct sk_buff *skb, - const struct dccp_hdr *dh, - const unsigned int len) -{ - /* - * Step 4: Prepare sequence numbers in REQUEST - * If S.state == REQUEST, - * If (P.type == Response or P.type == Reset) - * and S.AWL <= P.ackno <= S.AWH, - * / * Set sequence number variables corresponding to the - * other endpoint, so P will pass the tests in Step 6 * / - * Set S.GSR, S.ISR, S.SWL, S.SWH - * / * Response processing continues in Step 10; Reset - * processing continues in Step 9 * / - */ - if (dh->dccph_type == DCCP_PKT_RESPONSE) { - const struct inet_connection_sock *icsk = inet_csk(sk); - struct dccp_sock *dp = dccp_sk(sk); - long tstamp = dccp_timestamp(); - - if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dp->dccps_awl, dp->dccps_awh)) { - dccp_pr_debug("invalid ackno: S.AWL=%llu, " - "P.ackno=%llu, S.AWH=%llu\n", - (unsigned long long)dp->dccps_awl, - (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long)dp->dccps_awh); - goto out_invalid_packet; - } - - /* - * If option processing (Step 8) failed, return 1 here so that - * dccp_v4_do_rcv() sends a Reset. The Reset code depends on - * the option type and is set in dccp_parse_options(). - */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; - - /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ - if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) - dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - - dp->dccps_options_received.dccpor_timestamp_echo)); - - /* Stop the REQUEST timer */ - inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - WARN_ON(sk->sk_send_head == NULL); - kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - - /* - * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect - * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH - * is done as part of activating the feature values below, since - * these settings depend on the local/remote Sequence Window - * features, which were undefined or not confirmed until now. - */ - dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; - - dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); - - /* - * Step 10: Process REQUEST state (second part) - * If S.state == REQUEST, - * / * If we get here, P is a valid Response from the - * server (see Step 4), and we should move to - * PARTOPEN state. PARTOPEN means send an Ack, - * don't send Data packets, retransmit Acks - * periodically, and always include any Init Cookie - * from the Response * / - * S.state := PARTOPEN - * Set PARTOPEN timer - * Continue with S.state == PARTOPEN - * / * Step 12 will send the Ack completing the - * three-way handshake * / - */ - dccp_set_state(sk, DCCP_PARTOPEN); - - /* - * If feature negotiation was successful, activate features now; - * an activation failure means that this host could not activate - * one ore more features (e.g. insufficient memory), which would - * leave at least one feature in an undefined state. - */ - if (dccp_feat_activate_values(sk, &dp->dccps_featneg)) - goto unable_to_proceed; - - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - } - - if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) || - icsk->icsk_accept_queue.rskq_defer_accept) { - /* Save one ACK. Data will be ready after - * several ticks, if write_pending is set. - * - * It may be deleted, but with this feature tcpdumps - * look so _wonderfully_ clever, that I was not able - * to stand against the temptation 8) --ANK - */ - /* - * OK, in DCCP we can as well do a similar trick, its - * even in the draft, but there is no need for us to - * schedule an ack here, as dccp_sendmsg does this for - * us, also stated in the draft. -acme - */ - __kfree_skb(skb); - return 0; - } - dccp_send_ack(sk); - return -1; - } - -out_invalid_packet: - /* dccp_v4_do_rcv will send a reset */ - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - return 1; - -unable_to_proceed: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED; - /* - * We mark this socket as no longer usable, so that the loop in - * dccp_sendmsg() terminates and the application gets notified. - */ - dccp_set_state(sk, DCCP_CLOSED); - sk->sk_err = ECOMM; - return 1; -} - -static int dccp_rcv_respond_partopen_state_process(struct sock *sk, - struct sk_buff *skb, - const struct dccp_hdr *dh, - const unsigned int len) -{ - struct dccp_sock *dp = dccp_sk(sk); - u32 sample = dp->dccps_options_received.dccpor_timestamp_echo; - int queued = 0; - - switch (dh->dccph_type) { - case DCCP_PKT_RESET: - inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); - break; - case DCCP_PKT_DATA: - if (sk->sk_state == DCCP_RESPOND) - break; - fallthrough; - case DCCP_PKT_DATAACK: - case DCCP_PKT_ACK: - /* - * FIXME: we should be resetting the PARTOPEN (DELACK) timer - * here but only if we haven't used the DELACK timer for - * something else, like sending a delayed ack for a TIMESTAMP - * echo, etc, for now were not clearing it, sending an extra - * ACK when there is nothing else to do in DELACK is not a big - * deal after all. - */ - - /* Stop the PARTOPEN timer */ - if (sk->sk_state == DCCP_PARTOPEN) - inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); - - /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ - if (likely(sample)) { - long delta = dccp_timestamp() - sample; - - dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta); - } - - dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; - dccp_set_state(sk, DCCP_OPEN); - - if (dh->dccph_type == DCCP_PKT_DATAACK || - dh->dccph_type == DCCP_PKT_DATA) { - __dccp_rcv_established(sk, skb, dh, len); - queued = 1; /* packet was queued - (by __dccp_rcv_established) */ - } - break; - } - - return queued; -} - -int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct dccp_hdr *dh, unsigned int len) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int old_state = sk->sk_state; - bool acceptable; - int queued = 0; - - /* - * Step 3: Process LISTEN state - * - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie option, - * (* Must scan the packet's options to check for Init - * Cookies. Only Init Cookies are processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies *) - * (* Generate a new socket and switch to that socket *) - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookies - * Initialize S.GAR := S.ISS - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init - * Cookies Continue with S.state == RESPOND - * (* A Response packet will be generated in Step 11 *) - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - */ - if (sk->sk_state == DCCP_LISTEN) { - if (dh->dccph_type == DCCP_PKT_REQUEST) { - /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH and RCU right there. - */ - rcu_read_lock(); - local_bh_disable(); - acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; - local_bh_enable(); - rcu_read_unlock(); - if (!acceptable) - return 1; - consume_skb(skb); - return 0; - } - if (dh->dccph_type == DCCP_PKT_RESET) - goto discard; - - /* Caller (dccp_v4_do_rcv) will send Reset */ - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; - } else if (sk->sk_state == DCCP_CLOSED) { - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; - } - - /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ - if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) - goto discard; - - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - if ((dp->dccps_role != DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_RESPONSE) || - (dp->dccps_role == DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); - goto discard; - } - - /* Step 8: Process options */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; - - /* - * Step 9: Process Reset - * If P.type == Reset, - * Tear down connection - * S.state := TIMEWAIT - * Set TIMEWAIT timer - * Drop packet and return - */ - if (dh->dccph_type == DCCP_PKT_RESET) { - dccp_rcv_reset(sk, skb); - return 0; - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ - if (dccp_rcv_closereq(sk, skb)) - return 0; - goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ - if (dccp_rcv_close(sk, skb)) - return 0; - goto discard; - } - - switch (sk->sk_state) { - case DCCP_REQUESTING: - queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); - if (queued >= 0) - return queued; - - __kfree_skb(skb); - return 0; - - case DCCP_PARTOPEN: - /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); - fallthrough; - case DCCP_RESPOND: - queued = dccp_rcv_respond_partopen_state_process(sk, skb, - dh, len); - break; - } - - if (dh->dccph_type == DCCP_PKT_ACK || - dh->dccph_type == DCCP_PKT_DATAACK) { - switch (old_state) { - case DCCP_PARTOPEN: - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - break; - } - } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); - goto discard; - } - - if (!queued) { -discard: - __kfree_skb(skb); - } - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_rcv_state_process); - -/** - * dccp_sample_rtt - Validate and finalise computation of RTT sample - * @sk: socket structure - * @delta: number of microseconds between packet and acknowledgment - * - * The routine is kept generic to work in different contexts. It should be - * called immediately when the ACK used for the RTT sample arrives. - */ -u32 dccp_sample_rtt(struct sock *sk, long delta) -{ - /* dccpor_elapsed_time is either zeroed out or set and > 0 */ - delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; - - if (unlikely(delta <= 0)) { - DCCP_WARN("unusable RTT sample %ld, using min\n", delta); - return DCCP_SANE_RTT_MIN; - } - if (unlikely(delta > DCCP_SANE_RTT_MAX)) { - DCCP_WARN("RTT sample %ld too large, using max\n", delta); - return DCCP_SANE_RTT_MAX; - } - - return delta; -} diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c deleted file mode 100644 index 2045ddac0fe9..000000000000 --- a/net/dccp/ipv4.c +++ /dev/null @@ -1,1101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/ipv4.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/icmp.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/random.h> - -#include <net/icmp.h> -#include <net/inet_common.h> -#include <net/inet_dscp.h> -#include <net/inet_hashtables.h> -#include <net/inet_sock.h> -#include <net/protocol.h> -#include <net/sock.h> -#include <net/timewait_sock.h> -#include <net/tcp_states.h> -#include <net/xfrm.h> -#include <net/secure_seq.h> -#include <net/netns/generic.h> -#include <net/rstreason.h> - -#include "ackvec.h" -#include "ccid.h" -#include "dccp.h" -#include "feat.h" - -struct dccp_v4_pernet { - struct sock *v4_ctl_sk; -}; - -static unsigned int dccp_v4_pernet_id __read_mostly; - -/* - * The per-net v4_ctl_sk socket is used for responding to - * the Out-of-the-blue (OOTB) packets. A control sock will be created - * for this socket at the initialization time. - */ - -int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) -{ - const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct inet_sock *inet = inet_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - __be16 orig_sport, orig_dport; - __be32 daddr, nexthop; - struct flowi4 *fl4; - struct rtable *rt; - int err; - struct ip_options_rcu *inet_opt; - - dp->dccps_role = DCCP_ROLE_CLIENT; - - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - if (usin->sin_family != AF_INET) - return -EAFNOSUPPORT; - - nexthop = daddr = usin->sin_addr.s_addr; - - inet_opt = rcu_dereference_protected(inet->inet_opt, - lockdep_sock_is_held(sk)); - if (inet_opt != NULL && inet_opt->opt.srr) { - if (daddr == 0) - return -EINVAL; - nexthop = inet_opt->opt.faddr; - } - - orig_sport = inet->inet_sport; - orig_dport = usin->sin_port; - fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, - sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, - orig_dport, sk); - if (IS_ERR(rt)) - return PTR_ERR(rt); - - if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { - ip_rt_put(rt); - return -ENETUNREACH; - } - - if (inet_opt == NULL || !inet_opt->opt.srr) - daddr = fl4->daddr; - - if (inet->inet_saddr == 0) { - err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); - if (err) { - ip_rt_put(rt); - return err; - } - } else { - sk_rcv_saddr_set(sk, inet->inet_saddr); - } - - inet->inet_dport = usin->sin_port; - sk_daddr_set(sk, daddr); - - inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet_opt) - inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - /* - * Socket identity is still unknown (sport may be zero). - * However we set state to DCCP_REQUESTING and not releasing socket - * lock select source port, enter ourselves into the hash tables and - * complete initialization after this. - */ - dccp_set_state(sk, DCCP_REQUESTING); - err = inet_hash_connect(&dccp_death_row, sk); - if (err != 0) - goto failure; - - rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, - inet->inet_sport, inet->inet_dport, sk); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto failure; - } - /* OK, now commit destination to socket. */ - sk_setup_caps(sk, &rt->dst); - - dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - inet->inet_dport); - atomic_set(&inet->inet_id, get_random_u16()); - - err = dccp_connect(sk); - rt = NULL; - if (err != 0) - goto failure; -out: - return err; -failure: - /* - * This unhashes the socket and releases the local port, if necessary. - */ - dccp_set_state(sk, DCCP_CLOSED); - inet_bhash2_reset_saddr(sk); - ip_rt_put(rt); - sk->sk_route_caps = 0; - inet->inet_dport = 0; - goto out; -} -EXPORT_SYMBOL_GPL(dccp_v4_connect); - -/* - * This routine does path mtu discovery as defined in RFC1191. - */ -static inline void dccp_do_pmtu_discovery(struct sock *sk, - const struct iphdr *iph, - u32 mtu) -{ - struct dst_entry *dst; - const struct inet_sock *inet = inet_sk(sk); - const struct dccp_sock *dp = dccp_sk(sk); - - /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs - * send out by Linux are always < 576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == DCCP_LISTEN) - return; - - dst = inet_csk_update_pmtu(sk, mtu); - if (!dst) - return; - - /* Something is about to be wrong... Remember soft error - * for the case, if this connection will not able to recover. - */ - if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); - - mtu = dst_mtu(dst); - - if (inet->pmtudisc != IP_PMTUDISC_DONT && - ip_sk_accept_pmtu(sk) && - inet_csk(sk)->icsk_pmtu_cookie > mtu) { - dccp_sync_mss(sk, mtu); - - /* - * From RFC 4340, sec. 14.1: - * - * DCCP-Sync packets are the best choice for upward - * probing, since DCCP-Sync probes do not risk application - * data loss. - */ - dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); - } /* else let the usual retransmit timer handle it */ -} - -static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_check(sk, 0); - - if (dst) - dst->ops->redirect(dst, sk, skb); -} - -void dccp_req_err(struct sock *sk, u64 seq) - { - struct request_sock *req = inet_reqsk(sk); - struct net *net = sock_net(sk); - - /* - * ICMPs are not backlogged, hence we cannot get an established - * socket here. - */ - if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { - __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { - /* - * Still in RESPOND, just remove it silently. - * There is no good way to pass the error to the newly - * created socket, and POSIX does not want network - * errors returned from accept(). - */ - inet_csk_reqsk_queue_drop(req->rsk_listener, req); - } - reqsk_put(req); -} -EXPORT_SYMBOL(dccp_req_err); - -/* - * This routine is called by the ICMP module when it gets some sort of error - * condition. If err < 0 then the socket should be closed and the error - * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. - * After adjustment header points to the first 8 bytes of the tcp header. We - * need to find the appropriate port. - * - * The locking strategy used here is very "optimistic". When someone else - * accesses the socket the ICMP is just dropped and for some paths there is no - * check at all. A more general error queue to queue errors for later handling - * is probably better. - */ -static int dccp_v4_err(struct sk_buff *skb, u32 info) -{ - const struct iphdr *iph = (struct iphdr *)skb->data; - const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh; - struct dccp_sock *dp; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct sock *sk; - __u64 seq; - int err; - struct net *net = dev_net(skb->dev); - - if (!pskb_may_pull(skb, offset + sizeof(*dh))) - return -EINVAL; - dh = (struct dccp_hdr *)(skb->data + offset); - if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) - return -EINVAL; - iph = (struct iphdr *)skb->data; - dh = (struct dccp_hdr *)(skb->data + offset); - - sk = __inet_lookup_established(net, &dccp_hashinfo, - iph->daddr, dh->dccph_dport, - iph->saddr, ntohs(dh->dccph_sport), - inet_iif(skb), 0); - if (!sk) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return -ENOENT; - } - - if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); - return 0; - } - seq = dccp_hdr_seq(dh); - if (sk->sk_state == DCCP_NEW_SYN_RECV) { - dccp_req_err(sk, seq); - return 0; - } - - bh_lock_sock(sk); - /* If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - */ - if (sock_owned_by_user(sk)) - __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); - - if (sk->sk_state == DCCP_CLOSED) - goto out; - - dp = dccp_sk(sk); - if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - switch (type) { - case ICMP_REDIRECT: - if (!sock_owned_by_user(sk)) - dccp_do_redirect(skb, sk); - goto out; - case ICMP_SOURCE_QUENCH: - /* Just silently ignore these. */ - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - break; - case ICMP_DEST_UNREACH: - if (code > NR_ICMP_UNREACH) - goto out; - - if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ - if (!sock_owned_by_user(sk)) - dccp_do_pmtu_discovery(sk, iph, info); - goto out; - } - - err = icmp_err_convert[code].errno; - break; - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - default: - goto out; - } - - switch (sk->sk_state) { - case DCCP_REQUESTING: - case DCCP_RESPOND: - if (!sock_owned_by_user(sk)) { - __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); - sk->sk_err = err; - - sk_error_report(sk); - - dccp_done(sk); - } else { - WRITE_ONCE(sk->sk_err_soft, err); - } - goto out; - } - - /* If we've already connected we will keep trying - * until we time out, or the user gives up. - * - * rfc1122 4.2.3.9 allows to consider as hard errors - * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, - * but it is obsoleted by pmtu discovery). - * - * Note, that in modern internet, where routing is unreliable - * and in each dark corner broken firewalls sit, sending random - * errors ordered by their masters even this two messages finally lose - * their original sense (even Linux sends invalid PORT_UNREACHs) - * - * Now we are in compliance with RFCs. - * --ANK (980905) - */ - - if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) { - sk->sk_err = err; - sk_error_report(sk); - } else { /* Only an error on timeout */ - WRITE_ONCE(sk->sk_err_soft, err); - } -out: - bh_unlock_sock(sk); - sock_put(sk); - return 0; -} - -static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, - __be32 src, __be32 dst) -{ - return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); -} - -void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb) -{ - const struct inet_sock *inet = inet_sk(sk); - struct dccp_hdr *dh = dccp_hdr(skb); - - dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, - inet->inet_saddr, - inet->inet_daddr); -} -EXPORT_SYMBOL_GPL(dccp_v4_send_check); - -static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) -{ - return secure_dccp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - dccp_hdr(skb)->dccph_dport, - dccp_hdr(skb)->dccph_sport); -} - -/* - * The three way handshake has completed - we got a valid ACK or DATAACK - - * now create the new socket. - * - * This is the equivalent of TCP's tcp_v4_syn_recv_sock - */ -struct sock *dccp_v4_request_recv_sock(const struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst, - struct request_sock *req_unhash, - bool *own_req) -{ - struct inet_request_sock *ireq; - struct inet_sock *newinet; - struct sock *newsk; - - if (sk_acceptq_is_full(sk)) - goto exit_overflow; - - newsk = dccp_create_openreq_child(sk, req, skb); - if (newsk == NULL) - goto exit_nonewsk; - - newinet = inet_sk(newsk); - ireq = inet_rsk(req); - RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); - newinet->mc_index = inet_iif(skb); - newinet->mc_ttl = ip_hdr(skb)->ttl; - atomic_set(&newinet->inet_id, get_random_u16()); - - if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) - goto put_and_exit; - - sk_setup_caps(newsk, dst); - - dccp_sync_mss(newsk, dst_mtu(dst)); - - if (__inet_inherit_port(sk, newsk) < 0) - goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); - if (*own_req) - ireq->ireq_opt = NULL; - else - newinet->inet_opt = NULL; - return newsk; - -exit_overflow: - __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); -exit_nonewsk: - dst_release(dst); -exit: - __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); - return NULL; -put_and_exit: - newinet->inet_opt = NULL; - inet_csk_prepare_forced_close(newsk); - dccp_done(newsk); - goto exit; -} -EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); - -static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, - struct sk_buff *skb) -{ - struct rtable *rt; - const struct iphdr *iph = ip_hdr(skb); - struct flowi4 fl4 = { - .flowi4_oif = inet_iif(skb), - .daddr = iph->saddr, - .saddr = iph->daddr, - .flowi4_tos = inet_dscp_to_dsfield(inet_sk_dscp(inet_sk(sk))), - .flowi4_scope = ip_sock_rt_scope(sk), - .flowi4_proto = sk->sk_protocol, - .fl4_sport = dccp_hdr(skb)->dccph_dport, - .fl4_dport = dccp_hdr(skb)->dccph_sport, - }; - - security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); - rt = ip_route_output_flow(net, &fl4, sk); - if (IS_ERR(rt)) { - IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - - return &rt->dst; -} - -static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req) -{ - int err = -1; - struct sk_buff *skb; - struct dst_entry *dst; - struct flowi4 fl4; - - dst = inet_csk_route_req(sk, &fl4, req); - if (dst == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - struct dccp_hdr *dh = dccp_hdr(skb); - - dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, - ireq->ir_rmt_addr); - rcu_read_lock(); - err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, - rcu_dereference(ireq->ireq_opt), - READ_ONCE(inet_sk(sk)->tos)); - rcu_read_unlock(); - err = net_xmit_eval(err); - } - -out: - dst_release(dst); - return err; -} - -static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, - enum sk_rst_reason reason) -{ - int err; - const struct iphdr *rxiph; - struct sk_buff *skb; - struct dst_entry *dst; - struct net *net = dev_net(skb_dst(rxskb)->dev); - struct dccp_v4_pernet *pn; - struct sock *ctl_sk; - - /* Never send a reset in response to a reset. */ - if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) - return; - - if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) - return; - - pn = net_generic(net, dccp_v4_pernet_id); - ctl_sk = pn->v4_ctl_sk; - dst = dccp_v4_route_skb(net, ctl_sk, rxskb); - if (dst == NULL) - return; - - skb = dccp_ctl_make_reset(ctl_sk, rxskb); - if (skb == NULL) - goto out; - - rxiph = ip_hdr(rxskb); - dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, - rxiph->daddr); - skb_dst_set(skb, dst_clone(dst)); - - local_bh_disable(); - bh_lock_sock(ctl_sk); - err = ip_build_and_send_pkt(skb, ctl_sk, - rxiph->daddr, rxiph->saddr, NULL, - inet_sk(ctl_sk)->tos); - bh_unlock_sock(ctl_sk); - - if (net_xmit_eval(err) == 0) { - __DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - __DCCP_INC_STATS(DCCP_MIB_OUTRSTS); - } - local_bh_enable(); -out: - dst_release(dst); -} - -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); -} - -void dccp_syn_ack_timeout(const struct request_sock *req) -{ -} -EXPORT_SYMBOL(dccp_syn_ack_timeout); - -static struct request_sock_ops dccp_request_sock_ops __read_mostly = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, - .syn_ack_timeout = dccp_syn_ack_timeout, -}; - -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return 0; /* discard, don't send a reset here */ - - if (dccp_bad_service_code(sk, service)) { - dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - if (sk_acceptq_is_full(sk)) - goto drop; - - req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true); - if (req == NULL) - goto drop; - - if (dccp_reqsk_init(req, dccp_sk(sk), skb)) - goto drop_and_free; - - dreq = dccp_rsk(req); - if (dccp_parse_options(sk, dreq, skb)) - goto drop_and_free; - - ireq = inet_rsk(req); - sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); - sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->ir_mark = inet_request_mark(sk, skb); - ireq->ireq_family = AF_INET; - ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). - */ - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_gsr = dreq->dreq_isr; - dreq->dreq_iss = dccp_v4_init_sequence(skb); - dreq->dreq_gss = dreq->dreq_iss; - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req)) - goto drop_and_free; - - if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) - reqsk_free(req); - else - reqsk_put(req); - - return 0; - -drop_and_free: - reqsk_free(req); -drop: - __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); - return -1; -} -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - -int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_hdr *dh = dccp_hdr(skb); - - if (sk->sk_state == DCCP_OPEN) { /* Fast path */ - if (dccp_rcv_established(sk, skb, dh, skb->len)) - goto reset; - return 0; - } - - /* - * Step 3: Process LISTEN state - * If P.type == Request or P contains a valid Init Cookie option, - * (* Must scan the packet's options to check for Init - * Cookies. Only Init Cookies are processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies *) - * (* Generate a new socket and switch to that socket *) - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookies - * Initialize S.GAR := S.ISS - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies - * Continue with S.state == RESPOND - * (* A Response packet will be generated in Step 11 *) - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - * - * NOTE: the check for the packet types is done in - * dccp_rcv_state_process - */ - - if (dccp_rcv_state_process(sk, skb, dh, skb->len)) - goto reset; - return 0; - -reset: - dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - kfree_skb(skb); - return 0; -} -EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); - -/** - * dccp_invalid_packet - check for malformed packets - * @skb: Packet to validate - * - * Implements RFC 4340, 8.5: Step 1: Check header basics - * Packets that fail these checks are ignored and do not receive Resets. - */ -int dccp_invalid_packet(struct sk_buff *skb) -{ - const struct dccp_hdr *dh; - unsigned int cscov; - u8 dccph_doff; - - if (skb->pkt_type != PACKET_HOST) - return 1; - - /* If the packet is shorter than 12 bytes, drop packet and return */ - if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - DCCP_WARN("pskb_may_pull failed\n"); - return 1; - } - - dh = dccp_hdr(skb); - - /* If P.type is not understood, drop packet and return */ - if (dh->dccph_type >= DCCP_PKT_INVALID) { - DCCP_WARN("invalid packet type\n"); - return 1; - } - - /* - * If P.Data Offset is too small for packet type, drop packet and return - */ - dccph_doff = dh->dccph_doff; - if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); - return 1; - } - /* - * If P.Data Offset is too large for packet, drop packet and return - */ - if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { - DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); - return 1; - } - dh = dccp_hdr(skb); - /* - * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet - * has short sequence numbers), drop packet and return - */ - if ((dh->dccph_type < DCCP_PKT_DATA || - dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { - DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", - dccp_packet_name(dh->dccph_type)); - return 1; - } - - /* - * If P.CsCov is too large for the packet size, drop packet and return. - * This must come _before_ checksumming (not as RFC 4340 suggests). - */ - cscov = dccp_csum_coverage(skb); - if (cscov > skb->len) { - DCCP_WARN("P.CsCov %u exceeds packet length %d\n", - dh->dccph_cscov, skb->len); - return 1; - } - - /* If header checksum is incorrect, drop packet and return. - * (This step is completed in the AF-dependent functions.) */ - skb->csum = skb_checksum(skb, 0, cscov, 0); - - return 0; -} -EXPORT_SYMBOL_GPL(dccp_invalid_packet); - -/* this is called when real data arrives */ -static int dccp_v4_rcv(struct sk_buff *skb) -{ - const struct dccp_hdr *dh; - const struct iphdr *iph; - bool refcounted; - struct sock *sk; - int min_cov; - - /* Step 1: Check header basics */ - - if (dccp_invalid_packet(skb)) - goto discard_it; - - iph = ip_hdr(skb); - /* Step 1: If header checksum is incorrect, drop packet and return */ - if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) { - DCCP_WARN("dropped packet with invalid checksum\n"); - goto discard_it; - } - - dh = dccp_hdr(skb); - - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); - DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; - - dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu", - dccp_packet_name(dh->dccph_type), - &iph->saddr, ntohs(dh->dccph_sport), - &iph->daddr, ntohs(dh->dccph_dport), - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); - - if (dccp_packet_without_ack(skb)) { - DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; - dccp_pr_debug_cat("\n"); - } else { - DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - } - -lookup: - sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport, 0, &refcounted); - if (!sk) { - dccp_pr_debug("failed to look up flow ID in table and " - "get corresponding socket\n"); - goto no_dccp_socket; - } - - /* - * Step 2: - * ... or S.state == TIMEWAIT, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; - } - - if (sk->sk_state == DCCP_NEW_SYN_RECV) { - struct request_sock *req = inet_reqsk(sk); - struct sock *nsk; - - sk = req->rsk_listener; - if (unlikely(sk->sk_state != DCCP_LISTEN)) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; - } - sock_hold(sk); - refcounted = true; - nsk = dccp_check_req(sk, skb, req); - if (!nsk) { - reqsk_put(req); - goto discard_and_relse; - } - if (nsk == sk) { - reqsk_put(req); - } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - goto discard_and_relse; - } else { - sock_put(sk); - return 0; - } - } - /* - * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov - */ - min_cov = dccp_sk(sk)->dccps_pcrlen; - if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { - dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", - dh->dccph_cscov, min_cov); - /* FIXME: "Such packets SHOULD be reported using Data Dropped - * options (Section 11.7) with Drop Code 0, Protocol - * Constraints." */ - goto discard_and_relse; - } - - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto discard_and_relse; - nf_reset_ct(skb); - - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); - -no_dccp_socket: - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard_it; - /* - * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - */ - if (dh->dccph_type != DCCP_PKT_RESET) { - DCCP_SKB_CB(skb)->dccpd_reset_code = - DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - } - -discard_it: - kfree_skb(skb); - return 0; - -discard_and_relse: - if (refcounted) - sock_put(sk); - goto discard_it; -} - -static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { - .queue_xmit = ip_queue_xmit, - .send_check = dccp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, - .conn_request = dccp_v4_conn_request, - .syn_recv_sock = dccp_v4_request_recv_sock, - .net_header_len = sizeof(struct iphdr), - .setsockopt = ip_setsockopt, - .getsockopt = ip_getsockopt, -}; - -static int dccp_v4_init_sock(struct sock *sk) -{ - static __u8 dccp_v4_ctl_sock_initialized; - int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); - - if (err == 0) { - if (unlikely(!dccp_v4_ctl_sock_initialized)) - dccp_v4_ctl_sock_initialized = 1; - inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; - } - - return err; -} - -static struct timewait_sock_ops dccp_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct inet_timewait_sock), -}; - -static struct proto dccp_v4_prot = { - .name = "DCCP", - .owner = THIS_MODULE, - .close = dccp_close, - .connect = dccp_v4_connect, - .disconnect = dccp_disconnect, - .ioctl = dccp_ioctl, - .init = dccp_v4_init_sock, - .setsockopt = dccp_setsockopt, - .getsockopt = dccp_getsockopt, - .sendmsg = dccp_sendmsg, - .recvmsg = dccp_recvmsg, - .backlog_rcv = dccp_v4_do_rcv, - .hash = inet_hash, - .unhash = inet_unhash, - .accept = inet_csk_accept, - .get_port = inet_csk_get_port, - .shutdown = dccp_shutdown, - .destroy = dccp_destroy_sock, - .orphan_count = &dccp_orphan_count, - .max_header = MAX_DCCP_HEADER, - .obj_size = sizeof(struct dccp_sock), - .slab_flags = SLAB_TYPESAFE_BY_RCU, - .rsk_prot = &dccp_request_sock_ops, - .twsk_prot = &dccp_timewait_sock_ops, - .h.hashinfo = &dccp_hashinfo, -}; - -static const struct net_protocol dccp_v4_protocol = { - .handler = dccp_v4_rcv, - .err_handler = dccp_v4_err, - .no_policy = 1, - .icmp_strict_tag_validation = 1, -}; - -static const struct proto_ops inet_dccp_ops = { - .family = PF_INET, - .owner = THIS_MODULE, - .release = inet_release, - .bind = inet_bind, - .connect = inet_stream_connect, - .socketpair = sock_no_socketpair, - .accept = inet_accept, - .getname = inet_getname, - /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ - .poll = dccp_poll, - .ioctl = inet_ioctl, - .gettstamp = sock_gettstamp, - /* FIXME: work on inet_listen to rename it to sock_common_listen */ - .listen = inet_dccp_listen, - .shutdown = inet_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = inet_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, -}; - -static struct inet_protosw dccp_v4_protosw = { - .type = SOCK_DCCP, - .protocol = IPPROTO_DCCP, - .prot = &dccp_v4_prot, - .ops = &inet_dccp_ops, - .flags = INET_PROTOSW_ICSK, -}; - -static int __net_init dccp_v4_init_net(struct net *net) -{ - struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id); - - if (dccp_hashinfo.bhash == NULL) - return -ESOCKTNOSUPPORT; - - return inet_ctl_sock_create(&pn->v4_ctl_sk, PF_INET, - SOCK_DCCP, IPPROTO_DCCP, net); -} - -static void __net_exit dccp_v4_exit_net(struct net *net) -{ - struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id); - - inet_ctl_sock_destroy(pn->v4_ctl_sk); -} - -static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&dccp_hashinfo); -} - -static struct pernet_operations dccp_v4_ops = { - .init = dccp_v4_init_net, - .exit = dccp_v4_exit_net, - .exit_batch = dccp_v4_exit_batch, - .id = &dccp_v4_pernet_id, - .size = sizeof(struct dccp_v4_pernet), -}; - -static int __init dccp_v4_init(void) -{ - int err = proto_register(&dccp_v4_prot, 1); - - if (err) - goto out; - - inet_register_protosw(&dccp_v4_protosw); - - err = register_pernet_subsys(&dccp_v4_ops); - if (err) - goto out_destroy_ctl_sock; - - err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); - if (err) - goto out_proto_unregister; - -out: - return err; -out_proto_unregister: - unregister_pernet_subsys(&dccp_v4_ops); -out_destroy_ctl_sock: - inet_unregister_protosw(&dccp_v4_protosw); - proto_unregister(&dccp_v4_prot); - goto out; -} - -static void __exit dccp_v4_exit(void) -{ - inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); - unregister_pernet_subsys(&dccp_v4_ops); - inet_unregister_protosw(&dccp_v4_protosw); - proto_unregister(&dccp_v4_prot); -} - -module_init(dccp_v4_init); -module_exit(dccp_v4_exit); - -/* - * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) - * values directly, Also cover the case where the protocol is not specified, - * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP - */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); -MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c deleted file mode 100644 index e24dbffabfc1..000000000000 --- a/net/dccp/ipv6.c +++ /dev/null @@ -1,1174 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DCCP over IPv6 - * Linux INET6 implementation - * - * Based on net/dccp6/ipv6.c - * - * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - */ - -#include <linux/module.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/xfrm.h> -#include <linux/string.h> - -#include <net/addrconf.h> -#include <net/inet_common.h> -#include <net/inet_hashtables.h> -#include <net/inet_sock.h> -#include <net/inet6_connection_sock.h> -#include <net/inet6_hashtables.h> -#include <net/ip6_route.h> -#include <net/ipv6.h> -#include <net/protocol.h> -#include <net/transp_v6.h> -#include <net/ip6_checksum.h> -#include <net/xfrm.h> -#include <net/secure_seq.h> -#include <net/netns/generic.h> -#include <net/sock.h> -#include <net/rstreason.h> - -#include "dccp.h" -#include "ipv6.h" -#include "feat.h" - -struct dccp_v6_pernet { - struct sock *v6_ctl_sk; -}; - -static unsigned int dccp_v6_pernet_id __read_mostly; - -/* The per-net v6_ctl_sk is used for sending RSTs and ACKs */ - -static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; -static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; - -/* add pseudo-header to DCCP checksum stored in skb->csum */ -static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, - const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); -} - -static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_hdr *dh = dccp_hdr(skb); - - dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); -} - -static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) -{ - return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - dccp_hdr(skb)->dccph_dport, - dccp_hdr(skb)->dccph_sport ); - -} - -static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) -{ - const struct ipv6hdr *hdr; - const struct dccp_hdr *dh; - struct dccp_sock *dp; - struct ipv6_pinfo *np; - struct sock *sk; - int err; - __u64 seq; - struct net *net = dev_net(skb->dev); - - if (!pskb_may_pull(skb, offset + sizeof(*dh))) - return -EINVAL; - dh = (struct dccp_hdr *)(skb->data + offset); - if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) - return -EINVAL; - hdr = (const struct ipv6hdr *)skb->data; - dh = (struct dccp_hdr *)(skb->data + offset); - - sk = __inet6_lookup_established(net, &dccp_hashinfo, - &hdr->daddr, dh->dccph_dport, - &hdr->saddr, ntohs(dh->dccph_sport), - inet6_iif(skb), 0); - - if (!sk) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return -ENOENT; - } - - if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); - return 0; - } - seq = dccp_hdr_seq(dh); - if (sk->sk_state == DCCP_NEW_SYN_RECV) { - dccp_req_err(sk, seq); - return 0; - } - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) - __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); - - if (sk->sk_state == DCCP_CLOSED) - goto out; - - dp = dccp_sk(sk); - if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && - !between48(seq, dp->dccps_awl, dp->dccps_awh)) { - __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - np = inet6_sk(sk); - - if (type == NDISC_REDIRECT) { - if (!sock_owned_by_user(sk)) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst) - dst->ops->redirect(dst, sk, skb); - } - goto out; - } - - if (type == ICMPV6_PKT_TOOBIG) { - struct dst_entry *dst = NULL; - - if (!ip6_sk_accept_pmtu(sk)) - goto out; - - if (sock_owned_by_user(sk)) - goto out; - if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) - goto out; - - dst = inet6_csk_update_pmtu(sk, ntohl(info)); - if (!dst) - goto out; - - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) - dccp_sync_mss(sk, dst_mtu(dst)); - goto out; - } - - icmpv6_err_convert(type, code, &err); - - /* Might be for an request_sock */ - switch (sk->sk_state) { - case DCCP_REQUESTING: - case DCCP_RESPOND: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ - if (!sock_owned_by_user(sk)) { - __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); - sk->sk_err = err; - /* - * Wake people up to see the error - * (see connect in sock.c) - */ - sk_error_report(sk); - dccp_done(sk); - } else { - WRITE_ONCE(sk->sk_err_soft, err); - } - goto out; - } - - if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { - sk->sk_err = err; - sk_error_report(sk); - } else { - WRITE_ONCE(sk->sk_err_soft, err); - } -out: - bh_unlock_sock(sk); - sock_put(sk); - return 0; -} - - -static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req) -{ - struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff *skb; - struct in6_addr *final_p, final; - struct flowi6 fl6; - int err = -1; - struct dst_entry *dst; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq->ir_v6_rmt_addr; - fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowlabel = 0; - fl6.flowi6_oif = ireq->ir_iif; - fl6.fl6_dport = ireq->ir_rmt_port; - fl6.fl6_sport = htons(ireq->ir_num); - security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); - - - rcu_read_lock(); - final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); - rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; - goto done; - } - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - struct dccp_hdr *dh = dccp_hdr(skb); - struct ipv6_txoptions *opt; - - dh->dccph_checksum = dccp_v6_csum_finish(skb, - &ireq->ir_v6_loc_addr, - &ireq->ir_v6_rmt_addr); - fl6.daddr = ireq->ir_v6_rmt_addr; - rcu_read_lock(); - opt = ireq->ipv6_opt; - if (!opt) - opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, - np->tclass, READ_ONCE(sk->sk_priority)); - rcu_read_unlock(); - err = net_xmit_eval(err); - } - -done: - dst_release(dst); - return err; -} - -static void dccp_v6_reqsk_destructor(struct request_sock *req) -{ - dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - kfree(inet_rsk(req)->ipv6_opt); - kfree_skb(inet_rsk(req)->pktopts); -} - -static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, - enum sk_rst_reason reason) -{ - const struct ipv6hdr *rxip6h; - struct sk_buff *skb; - struct flowi6 fl6; - struct net *net = dev_net(skb_dst(rxskb)->dev); - struct dccp_v6_pernet *pn; - struct sock *ctl_sk; - struct dst_entry *dst; - - if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) - return; - - if (!ipv6_unicast_destination(rxskb)) - return; - - pn = net_generic(net, dccp_v6_pernet_id); - ctl_sk = pn->v6_ctl_sk; - skb = dccp_ctl_make_reset(ctl_sk, rxskb); - if (skb == NULL) - return; - - rxip6h = ipv6_hdr(rxskb); - dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, - &rxip6h->daddr); - - memset(&fl6, 0, sizeof(fl6)); - fl6.daddr = rxip6h->saddr; - fl6.saddr = rxip6h->daddr; - - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.flowi6_oif = inet6_iif(rxskb); - fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; - fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6)); - - /* sk = NULL, but it is safe for now. RST socket required. */ - dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); - if (!IS_ERR(dst)) { - skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0); - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS(DCCP_MIB_OUTRSTS); - return; - } - - kfree_skb(skb); -} - -static struct request_sock_ops dccp6_request_sock_ops = { - .family = AF_INET6, - .obj_size = sizeof(struct dccp6_request_sock), - .rtx_syn_ack = dccp_v6_send_response, - .send_ack = dccp_reqsk_send_ack, - .destructor = dccp_v6_reqsk_destructor, - .send_reset = dccp_v6_ctl_send_reset, - .syn_ack_timeout = dccp_syn_ack_timeout, -}; - -static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct request_sock *req; - struct dccp_request_sock *dreq; - struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - - if (skb->protocol == htons(ETH_P_IP)) - return dccp_v4_conn_request(sk, skb); - - if (!ipv6_unicast_destination(skb)) - return 0; /* discard, don't send a reset here */ - - if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { - __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); - return 0; - } - - if (dccp_bad_service_code(sk, service)) { - dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * There are no SYN attacks on IPv6, yet... - */ - dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - if (sk_acceptq_is_full(sk)) - goto drop; - - req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); - if (req == NULL) - goto drop; - - if (dccp_reqsk_init(req, dccp_sk(sk), skb)) - goto drop_and_free; - - dreq = dccp_rsk(req); - if (dccp_parse_options(sk, dreq, skb)) - goto drop_and_free; - - ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - ireq->ir_rmt_addr = LOOPBACK4_IPV6; - ireq->ir_loc_addr = LOOPBACK4_IPV6; - - ireq->ireq_family = AF_INET6; - ireq->ir_mark = inet_request_mark(sk, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - refcount_inc(&skb->users); - ireq->pktopts = skb; - } - ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); - - /* So that link locals have meaning */ - if (!ireq->ir_iif && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). - */ - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_gsr = dreq->dreq_isr; - dreq->dreq_iss = dccp_v6_init_sequence(skb); - dreq->dreq_gss = dreq->dreq_iss; - dreq->dreq_service = service; - - if (dccp_v6_send_response(sk, req)) - goto drop_and_free; - - if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) - reqsk_free(req); - else - reqsk_put(req); - - return 0; - -drop_and_free: - reqsk_free(req); -drop: - __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); - return -1; -} - -static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst, - struct request_sock *req_unhash, - bool *own_req) -{ - struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *newnp; - const struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_txoptions *opt; - struct inet_sock *newinet; - struct dccp6_sock *newdp6; - struct sock *newsk; - - if (skb->protocol == htons(ETH_P_IP)) { - /* - * v6 mapped - */ - newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, - req_unhash, own_req); - if (newsk == NULL) - return NULL; - - newdp6 = (struct dccp6_sock *)newsk; - newinet = inet_sk(newsk); - newinet->pinet6 = &newdp6->inet6; - newnp = inet6_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - - newnp->saddr = newsk->sk_v6_rcv_saddr; - - inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; - newsk->sk_backlog_rcv = dccp_v4_do_rcv; - newnp->pktoptions = NULL; - newnp->opt = NULL; - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet_iif(skb); - newnp->mcast_hops = ip_hdr(skb)->ttl; - - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks count - * here, dccp_create_openreq_child now does this for us, see the comment in - * that function for the gory details. -acme - */ - - /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 icsk.icsk_af_ops. - Sync it now. - */ - dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); - - return newsk; - } - - - if (sk_acceptq_is_full(sk)) - goto out_overflow; - - if (!dst) { - struct flowi6 fl6; - - dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP); - if (!dst) - goto out; - } - - newsk = dccp_create_openreq_child(sk, req, skb); - if (newsk == NULL) - goto out_nonewsk; - - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks - * count here, dccp_create_openreq_child now does this for us, see the - * comment in that function for the gory details. -acme - */ - - ip6_dst_store(newsk, dst, NULL, NULL); - newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | - NETIF_F_TSO); - newdp6 = (struct dccp6_sock *)newsk; - newinet = inet_sk(newsk); - newinet->pinet6 = &newdp6->inet6; - newnp = inet6_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - - newnp->saddr = ireq->ir_v6_loc_addr; - - /* Now IPv6 options... - - First: no IPv4 options. - */ - newinet->inet_opt = NULL; - - /* Clone RX bits */ - newnp->rxopt.all = np->rxopt.all; - - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - newnp->ipv6_fl_list = NULL; - newnp->pktoptions = NULL; - newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - - /* - * Clone native IPv6 options from listening socket (if any) - * - * Yes, keeping reference count would be much more clever, but we make - * one more one thing there: reattach optmem to newsk. - */ - opt = ireq->ipv6_opt; - if (!opt) - opt = rcu_dereference(np->opt); - if (opt) { - opt = ipv6_dup_options(newsk, opt); - RCU_INIT_POINTER(newnp->opt, opt); - } - inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (opt) - inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + - opt->opt_flen; - - dccp_sync_mss(newsk, dst_mtu(dst)); - - if (__inet_inherit_port(sk, newsk) < 0) { - inet_csk_prepare_forced_close(newsk); - dccp_done(newsk); - goto out; - } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); - /* Clone pktoptions received with SYN, if we own the req */ - if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); - consume_skb(ireq->pktopts); - ireq->pktopts = NULL; - } - - return newsk; - -out_overflow: - __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); -out_nonewsk: - dst_release(dst); -out: - __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); - return NULL; -} - -/* The socket must have it's spinlock held when we get - * here. - * - * We have a potential double-lock case here, so even when - * doing backlog processing we use the BH locking scheme. - * This is because we cannot sleep with the original spinlock - * held. - */ -static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff *opt_skb = NULL; - - /* Imagine: socket is IPv6. IPv4 packet arrives, - goes to IPv4 receive handler and backlogged. - From backlog it always goes here. Kerboom... - Fortunately, dccp_rcv_established and rcv_established - handle them correctly, but it is not case with - dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK - */ - - if (skb->protocol == htons(ETH_P_IP)) - return dccp_v4_do_rcv(sk, skb); - - if (sk_filter(sk, skb)) - goto discard; - - /* - * socket locking is here for SMP purposes as backlog rcv is currently - * called with bh processing disabled. - */ - - /* Do Stevens' IPV6_PKTOPTIONS. - - Yes, guys, it is the only place in our code, where we - may make it not affecting IPv4. - The rest of code is protocol independent, - and I do not like idea to uglify IPv4. - - Actually, all the idea behind IPV6_PKTOPTIONS - looks not very well thought. For now we latch - options, received in the last packet, enqueued - by tcp. Feel free to propose better solution. - --ANK (980728) - */ - if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) - opt_skb = skb_clone_and_charge_r(skb, sk); - - if (sk->sk_state == DCCP_OPEN) { /* Fast path */ - if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) - goto reset; - if (opt_skb) - goto ipv6_pktoptions; - return 0; - } - - /* - * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie option, - * (* Must scan the packet's options to check for Init - * Cookies. Only Init Cookies are processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies *) - * (* Generate a new socket and switch to that socket *) - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookies - * Initialize S.GAR := S.ISS - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies - * Continue with S.state == RESPOND - * (* A Response packet will be generated in Step 11 *) - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - * - * NOTE: the check for the packet types is done in - * dccp_rcv_state_process - */ - - if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) - goto reset; - if (opt_skb) - goto ipv6_pktoptions; - return 0; - -reset: - dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); -discard: - if (opt_skb != NULL) - __kfree_skb(opt_skb); - kfree_skb(skb); - return 0; - -/* Handling IPV6_PKTOPTIONS skb the similar - * way it's done for net/ipv6/tcp_ipv6.c - */ -ipv6_pktoptions: - if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { - if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb)); - if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); - if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) - np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (inet6_test_bit(REPFLOW, sk)) - np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); - if (ipv6_opt_accepted(sk, opt_skb, - &DCCP_SKB_CB(opt_skb)->header.h6)) { - memmove(IP6CB(opt_skb), - &DCCP_SKB_CB(opt_skb)->header.h6, - sizeof(struct inet6_skb_parm)); - opt_skb = xchg(&np->pktoptions, opt_skb); - } else { - __kfree_skb(opt_skb); - opt_skb = xchg(&np->pktoptions, NULL); - } - } - - kfree_skb(opt_skb); - return 0; -} - -static int dccp_v6_rcv(struct sk_buff *skb) -{ - const struct dccp_hdr *dh; - bool refcounted; - struct sock *sk; - int min_cov; - - /* Step 1: Check header basics */ - - if (dccp_invalid_packet(skb)) - goto discard_it; - - /* Step 1: If header checksum is incorrect, drop packet and return. */ - if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr)) { - DCCP_WARN("dropped packet with invalid checksum\n"); - goto discard_it; - } - - dh = dccp_hdr(skb); - - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); - DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; - - if (dccp_packet_without_ack(skb)) - DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; - else - DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - -lookup: - sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport, - inet6_iif(skb), 0, &refcounted); - if (!sk) { - dccp_pr_debug("failed to look up flow ID in table and " - "get corresponding socket\n"); - goto no_dccp_socket; - } - - /* - * Step 2: - * ... or S.state == TIMEWAIT, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); - inet_twsk_put(inet_twsk(sk)); - goto no_dccp_socket; - } - - if (sk->sk_state == DCCP_NEW_SYN_RECV) { - struct request_sock *req = inet_reqsk(sk); - struct sock *nsk; - - sk = req->rsk_listener; - if (unlikely(sk->sk_state != DCCP_LISTEN)) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; - } - sock_hold(sk); - refcounted = true; - nsk = dccp_check_req(sk, skb, req); - if (!nsk) { - reqsk_put(req); - goto discard_and_relse; - } - if (nsk == sk) { - reqsk_put(req); - } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - goto discard_and_relse; - } else { - sock_put(sk); - return 0; - } - } - /* - * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov - */ - min_cov = dccp_sk(sk)->dccps_pcrlen; - if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { - dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", - dh->dccph_cscov, min_cov); - /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */ - goto discard_and_relse; - } - - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) - goto discard_and_relse; - nf_reset_ct(skb); - - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, - refcounted) ? -1 : 0; - -no_dccp_socket: - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard_it; - /* - * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return - */ - if (dh->dccph_type != DCCP_PKT_RESET) { - DCCP_SKB_CB(skb)->dccpd_reset_code = - DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - } - -discard_it: - kfree_skb(skb); - return 0; - -discard_and_relse: - if (refcounted) - sock_put(sk); - goto discard_it; -} - -static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) -{ - struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; - struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p, final; - struct ipv6_txoptions *opt; - struct flowi6 fl6; - struct dst_entry *dst; - int addr_type; - int err; - - dp->dccps_role = DCCP_ROLE_CLIENT; - - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - - if (usin->sin6_family != AF_INET6) - return -EAFNOSUPPORT; - - memset(&fl6, 0, sizeof(fl6)); - - if (inet6_test_bit(SNDFLOW, sk)) { - fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl6.flowlabel); - if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { - struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (IS_ERR(flowlabel)) - return -EINVAL; - fl6_sock_release(flowlabel); - } - } - /* - * connect() to INADDR_ANY means loopback (BSD'ism). - */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 1; - - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type & IPV6_ADDR_MULTICAST) - return -ENETUNREACH; - - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (addr_len >= sizeof(struct sockaddr_in6) && - usin->sin6_scope_id) { - /* If interface is set while binding, indices - * must coincide. - */ - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != usin->sin6_scope_id) - return -EINVAL; - - sk->sk_bound_dev_if = usin->sin6_scope_id; - } - - /* Connect to link-local address requires an interface */ - if (!sk->sk_bound_dev_if) - return -EINVAL; - } - - sk->sk_v6_daddr = usin->sin6_addr; - np->flow_label = fl6.flowlabel; - - /* - * DCCP over IPv4 - */ - if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = icsk->icsk_ext_hdr_len; - struct sockaddr_in sin; - - net_dbg_ratelimited("connect: ipv4 mapped\n"); - - if (ipv6_only_sock(sk)) - return -ENETUNREACH; - - sin.sin_family = AF_INET; - sin.sin_port = usin->sin6_port; - sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - - icsk->icsk_af_ops = &dccp_ipv6_mapped; - sk->sk_backlog_rcv = dccp_v4_do_rcv; - - err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); - if (err) { - icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &dccp_ipv6_af_ops; - sk->sk_backlog_rcv = dccp_v6_do_rcv; - goto failure; - } - np->saddr = sk->sk_v6_rcv_saddr; - return err; - } - - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - saddr = &sk->sk_v6_rcv_saddr; - - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = sk->sk_v6_daddr; - fl6.saddr = saddr ? *saddr : np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = usin->sin6_port; - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - - opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); - final_p = fl6_update_dst(&fl6, opt, &final); - - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto failure; - } - - if (saddr == NULL) { - saddr = &fl6.saddr; - - err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); - if (err) - goto failure; - } - - /* set the source address */ - np->saddr = *saddr; - inet->inet_rcv_saddr = LOOPBACK4_IPV6; - - ip6_dst_store(sk, dst, NULL, NULL); - - icsk->icsk_ext_hdr_len = 0; - if (opt) - icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; - - inet->inet_dport = usin->sin6_port; - - dccp_set_state(sk, DCCP_REQUESTING); - err = inet6_hash_connect(&dccp_death_row, sk); - if (err) - goto late_failure; - - dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport); - err = dccp_connect(sk); - if (err) - goto late_failure; - - return 0; - -late_failure: - dccp_set_state(sk, DCCP_CLOSED); - inet_bhash2_reset_saddr(sk); - __sk_dst_reset(sk); -failure: - inet->inet_dport = 0; - sk->sk_route_caps = 0; - return err; -} - -static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { - .queue_xmit = inet6_csk_xmit, - .send_check = dccp_v6_send_check, - .rebuild_header = inet6_sk_rebuild_header, - .conn_request = dccp_v6_conn_request, - .syn_recv_sock = dccp_v6_request_recv_sock, - .net_header_len = sizeof(struct ipv6hdr), - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, -}; - -/* - * DCCP over IPv4 via INET6 API - */ -static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { - .queue_xmit = ip_queue_xmit, - .send_check = dccp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, - .conn_request = dccp_v6_conn_request, - .syn_recv_sock = dccp_v6_request_recv_sock, - .net_header_len = sizeof(struct iphdr), - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, -}; - -static void dccp_v6_sk_destruct(struct sock *sk) -{ - dccp_destruct_common(sk); - inet6_sock_destruct(sk); -} - -/* NOTE: A lot of things set to zero explicitly by call to - * sk_alloc() so need not be done here. - */ -static int dccp_v6_init_sock(struct sock *sk) -{ - static __u8 dccp_v6_ctl_sock_initialized; - int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized); - - if (err == 0) { - if (unlikely(!dccp_v6_ctl_sock_initialized)) - dccp_v6_ctl_sock_initialized = 1; - inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; - sk->sk_destruct = dccp_v6_sk_destruct; - } - - return err; -} - -static struct timewait_sock_ops dccp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct dccp6_timewait_sock), -}; - -static struct proto dccp_v6_prot = { - .name = "DCCPv6", - .owner = THIS_MODULE, - .close = dccp_close, - .connect = dccp_v6_connect, - .disconnect = dccp_disconnect, - .ioctl = dccp_ioctl, - .init = dccp_v6_init_sock, - .setsockopt = dccp_setsockopt, - .getsockopt = dccp_getsockopt, - .sendmsg = dccp_sendmsg, - .recvmsg = dccp_recvmsg, - .backlog_rcv = dccp_v6_do_rcv, - .hash = inet6_hash, - .unhash = inet_unhash, - .accept = inet_csk_accept, - .get_port = inet_csk_get_port, - .shutdown = dccp_shutdown, - .destroy = dccp_destroy_sock, - .orphan_count = &dccp_orphan_count, - .max_header = MAX_DCCP_HEADER, - .obj_size = sizeof(struct dccp6_sock), - .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6), - .slab_flags = SLAB_TYPESAFE_BY_RCU, - .rsk_prot = &dccp6_request_sock_ops, - .twsk_prot = &dccp6_timewait_sock_ops, - .h.hashinfo = &dccp_hashinfo, -}; - -static const struct inet6_protocol dccp_v6_protocol = { - .handler = dccp_v6_rcv, - .err_handler = dccp_v6_err, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, -}; - -static const struct proto_ops inet6_dccp_ops = { - .family = PF_INET6, - .owner = THIS_MODULE, - .release = inet6_release, - .bind = inet6_bind, - .connect = inet_stream_connect, - .socketpair = sock_no_socketpair, - .accept = inet_accept, - .getname = inet6_getname, - .poll = dccp_poll, - .ioctl = inet6_ioctl, - .gettstamp = sock_gettstamp, - .listen = inet_dccp_listen, - .shutdown = inet_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = inet_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, -#ifdef CONFIG_COMPAT - .compat_ioctl = inet6_compat_ioctl, -#endif -}; - -static struct inet_protosw dccp_v6_protosw = { - .type = SOCK_DCCP, - .protocol = IPPROTO_DCCP, - .prot = &dccp_v6_prot, - .ops = &inet6_dccp_ops, - .flags = INET_PROTOSW_ICSK, -}; - -static int __net_init dccp_v6_init_net(struct net *net) -{ - struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); - - if (dccp_hashinfo.bhash == NULL) - return -ESOCKTNOSUPPORT; - - return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6, - SOCK_DCCP, IPPROTO_DCCP, net); -} - -static void __net_exit dccp_v6_exit_net(struct net *net) -{ - struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); - - inet_ctl_sock_destroy(pn->v6_ctl_sk); -} - -static struct pernet_operations dccp_v6_ops = { - .init = dccp_v6_init_net, - .exit = dccp_v6_exit_net, - .id = &dccp_v6_pernet_id, - .size = sizeof(struct dccp_v6_pernet), -}; - -static int __init dccp_v6_init(void) -{ - int err = proto_register(&dccp_v6_prot, 1); - - if (err) - goto out; - - inet6_register_protosw(&dccp_v6_protosw); - - err = register_pernet_subsys(&dccp_v6_ops); - if (err) - goto out_destroy_ctl_sock; - - err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); - if (err) - goto out_unregister_proto; - -out: - return err; -out_unregister_proto: - unregister_pernet_subsys(&dccp_v6_ops); -out_destroy_ctl_sock: - inet6_unregister_protosw(&dccp_v6_protosw); - proto_unregister(&dccp_v6_prot); - goto out; -} - -static void __exit dccp_v6_exit(void) -{ - inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); - unregister_pernet_subsys(&dccp_v6_ops); - inet6_unregister_protosw(&dccp_v6_protosw); - proto_unregister(&dccp_v6_prot); -} - -module_init(dccp_v6_init); -module_exit(dccp_v6_exit); - -/* - * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) - * values directly, Also cover the case where the protocol is not specified, - * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP - */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); -MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h deleted file mode 100644 index c5d14c48def1..000000000000 --- a/net/dccp/ipv6.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _DCCP_IPV6_H -#define _DCCP_IPV6_H -/* - * net/dccp/ipv6.h - * - * An implementation of the DCCP protocol - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - */ - -#include <linux/dccp.h> -#include <linux/ipv6.h> - -struct dccp6_sock { - struct dccp_sock dccp; - struct ipv6_pinfo inet6; -}; - -struct dccp6_request_sock { - struct dccp_request_sock dccp; -}; - -struct dccp6_timewait_sock { - struct inet_timewait_sock inet; -}; - -#endif /* _DCCP_IPV6_H */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c deleted file mode 100644 index fecc8190064f..000000000000 --- a/net/dccp/minisocks.c +++ /dev/null @@ -1,266 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/minisocks.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/gfp.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/timer.h> - -#include <net/sock.h> -#include <net/xfrm.h> -#include <net/inet_timewait_sock.h> -#include <net/rstreason.h> - -#include "ackvec.h" -#include "ccid.h" -#include "dccp.h" -#include "feat.h" - -struct inet_timewait_death_row dccp_death_row = { - .tw_refcount = REFCOUNT_INIT(1), - .sysctl_max_tw_buckets = NR_FILE * 2, - .hashinfo = &dccp_hashinfo, -}; - -EXPORT_SYMBOL_GPL(dccp_death_row); - -void dccp_time_wait(struct sock *sk, int state, int timeo) -{ - struct inet_timewait_sock *tw; - - tw = inet_twsk_alloc(sk, &dccp_death_row, state); - - if (tw != NULL) { - const struct inet_connection_sock *icsk = inet_csk(sk); - const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); -#if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == PF_INET6) { - tw->tw_v6_daddr = sk->sk_v6_daddr; - tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - tw->tw_ipv6only = sk->sk_ipv6only; - } -#endif - - /* Get the TIME_WAIT timeout firing. */ - if (timeo < rto) - timeo = rto; - - if (state == DCCP_TIME_WAIT) - timeo = DCCP_TIMEWAIT_LEN; - - /* Linkage updates. - * Note that access to tw after this point is illegal. - */ - inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); - } else { - /* Sorry, if we're out of memory, just CLOSE this - * socket up. We've got bigger problems than - * non-graceful socket closings. - */ - DCCP_WARN("time wait bucket table overflow\n"); - } - - dccp_done(sk); -} - -struct sock *dccp_create_openreq_child(const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb) -{ - /* - * Step 3: Process LISTEN state - * - * (* Generate a new socket and switch to that socket *) - * Set S := new socket for this port pair - */ - struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); - - if (newsk != NULL) { - struct dccp_request_sock *dreq = dccp_rsk(req); - struct inet_connection_sock *newicsk = inet_csk(newsk); - struct dccp_sock *newdp = dccp_sk(newsk); - - newdp->dccps_role = DCCP_ROLE_SERVER; - newdp->dccps_hc_rx_ackvec = NULL; - newdp->dccps_service_list = NULL; - newdp->dccps_hc_rx_ccid = NULL; - newdp->dccps_hc_tx_ccid = NULL; - newdp->dccps_service = dreq->dreq_service; - newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; - newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - - INIT_LIST_HEAD(&newdp->dccps_featneg); - /* - * Step 3: Process LISTEN state - * - * Choose S.ISS (initial seqno) or set from Init Cookies - * Initialize S.GAR := S.ISS - * Set S.ISR, S.GSR from packet (or Init Cookies) - * - * Setting AWL/AWH and SWL/SWH happens as part of the feature - * activation below, as these windows all depend on the local - * and remote Sequence Window feature values (7.5.2). - */ - newdp->dccps_iss = dreq->dreq_iss; - newdp->dccps_gss = dreq->dreq_gss; - newdp->dccps_gar = newdp->dccps_iss; - newdp->dccps_isr = dreq->dreq_isr; - newdp->dccps_gsr = dreq->dreq_gsr; - - /* - * Activate features: initialise CCIDs, sequence windows etc. - */ - if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { - sk_free_unlock_clone(newsk); - return NULL; - } - dccp_init_xmit_timers(newsk); - - __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); - } - return newsk; -} - -EXPORT_SYMBOL_GPL(dccp_create_openreq_child); - -/* - * Process an incoming packet for RESPOND sockets represented - * as an request_sock. - */ -struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req) -{ - struct sock *child = NULL; - struct dccp_request_sock *dreq = dccp_rsk(req); - bool own_req; - - /* TCP/DCCP listeners became lockless. - * DCCP stores complex state in its request_sock, so we need - * a protection for them, now this code runs without being protected - * by the parent (listener) lock. - */ - spin_lock_bh(&dreq->dreq_lock); - - /* Check for retransmitted REQUEST */ - if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) { - dccp_pr_debug("Retransmitted REQUEST\n"); - dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq; - /* - * Send another RESPONSE packet - * To protect against Request floods, increment retrans - * counter (backoff, monitored by dccp_response_timer). - */ - inet_rtx_syn_ack(sk, req); - } - /* Network Duplicate, discard packet */ - goto out; - } - - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - - if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && - dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) - goto drop; - - /* Invalid ACK */ - if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dreq->dreq_iss, dreq->dreq_gss)) { - dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " - "dreq_iss=%llu, dreq_gss=%llu\n", - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dreq->dreq_iss, - (unsigned long long) dreq->dreq_gss); - goto drop; - } - - if (dccp_parse_options(sk, dreq, skb)) - goto drop; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - req, &own_req); - if (child) { - child = inet_csk_complete_hashdance(sk, child, req, own_req); - goto out; - } - - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; -drop: - if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); - - inet_csk_reqsk_queue_drop(sk, req); -out: - spin_unlock_bh(&dreq->dreq_lock); - return child; -} - -EXPORT_SYMBOL_GPL(dccp_check_req); - -/* - * Queue segment on the new socket if the new socket is active, - * otherwise we just shortcircuit this and continue with - * the new socket. - */ -int dccp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb) - __releases(child) -{ - int ret = 0; - const int state = child->sk_state; - - if (!sock_owned_by_user(child)) { - ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), - skb->len); - - /* Wakeup parent, send SIGIO */ - if (state == DCCP_RESPOND && child->sk_state != state) - parent->sk_data_ready(parent); - } else { - /* Alas, it is possible again, because we do lookup - * in main socket hash table and lock on listening - * socket does not protect us more. - */ - __sk_add_backlog(child, skb); - } - - bh_unlock_sock(child); - sock_put(child); - return ret; -} - -EXPORT_SYMBOL_GPL(dccp_child_process); - -void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - struct request_sock *rsk) -{ - DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); -} - -EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); - -int dccp_reqsk_init(struct request_sock *req, - struct dccp_sock const *dp, struct sk_buff const *skb) -{ - struct dccp_request_sock *dreq = dccp_rsk(req); - - spin_lock_init(&dreq->dreq_lock); - inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); - inet_rsk(req)->acked = 0; - dreq->dreq_timestamp_echo = 0; - - /* inherit feature negotiation options from listening socket */ - return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); -} - -EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c deleted file mode 100644 index db62d4767024..000000000000 --- a/net/dccp/options.c +++ /dev/null @@ -1,609 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/options.c - * - * An implementation of the DCCP protocol - * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> - */ -#include <linux/dccp.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/unaligned.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> - -#include "ackvec.h" -#include "ccid.h" -#include "dccp.h" -#include "feat.h" - -u64 dccp_decode_value_var(const u8 *bf, const u8 len) -{ - u64 value = 0; - - if (len >= DCCP_OPTVAL_MAXLEN) - value += ((u64)*bf++) << 40; - if (len > 4) - value += ((u64)*bf++) << 32; - if (len > 3) - value += ((u64)*bf++) << 24; - if (len > 2) - value += ((u64)*bf++) << 16; - if (len > 1) - value += ((u64)*bf++) << 8; - if (len > 0) - value += *bf; - - return value; -} - -/** - * dccp_parse_options - Parse DCCP options present in @skb - * @sk: client|server|listening dccp socket (when @dreq != NULL) - * @dreq: request socket to use during connection setup, or NULL - * @skb: frame to parse - */ -int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - const struct dccp_hdr *dh = dccp_hdr(skb); - const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; - unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); - unsigned char *opt_ptr = options; - const unsigned char *opt_end = (unsigned char *)dh + - (dh->dccph_doff * 4); - struct dccp_options_received *opt_recv = &dp->dccps_options_received; - unsigned char opt, len; - unsigned char *value; - u32 elapsed_time; - __be32 opt_val; - int rc; - int mandatory = 0; - - memset(opt_recv, 0, sizeof(*opt_recv)); - - opt = len = 0; - while (opt_ptr != opt_end) { - opt = *opt_ptr++; - len = 0; - value = NULL; - - /* Check if this isn't a single byte option */ - if (opt > DCCPO_MAX_RESERVED) { - if (opt_ptr == opt_end) - goto out_nonsensical_length; - - len = *opt_ptr++; - if (len < 2) - goto out_nonsensical_length; - /* - * Remove the type and len fields, leaving - * just the value size - */ - len -= 2; - value = opt_ptr; - opt_ptr += len; - - if (opt_ptr > opt_end) - goto out_nonsensical_length; - } - - /* - * CCID-specific options are ignored during connection setup, as - * negotiation may still be in progress (see RFC 4340, 10.3). - * The same applies to Ack Vectors, as these depend on the CCID. - */ - if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC || - opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) - goto ignore_option; - - switch (opt) { - case DCCPO_PADDING: - break; - case DCCPO_MANDATORY: - if (mandatory) - goto out_invalid_option; - if (pkt_type != DCCP_PKT_DATA) - mandatory = 1; - break; - case DCCPO_NDP_COUNT: - if (len > 6) - goto out_invalid_option; - - opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); - dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), - (unsigned long long)opt_recv->dccpor_ndp); - break; - case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: - if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ - break; - if (len == 0) - goto out_invalid_option; - rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, - *value, value + 1, len - 1); - if (rc) - goto out_featneg_failed; - break; - case DCCPO_TIMESTAMP: - if (len != 4) - goto out_invalid_option; - /* - * RFC 4340 13.1: "The precise time corresponding to - * Timestamp Value zero is not specified". We use - * zero to indicate absence of a meaningful timestamp. - */ - opt_val = get_unaligned((__be32 *)value); - if (unlikely(opt_val == 0)) { - DCCP_WARN("Timestamp with zero value\n"); - break; - } - - if (dreq != NULL) { - dreq->dreq_timestamp_echo = ntohl(opt_val); - dreq->dreq_timestamp_time = dccp_timestamp(); - } else { - opt_recv->dccpor_timestamp = - dp->dccps_timestamp_echo = ntohl(opt_val); - dp->dccps_timestamp_time = dccp_timestamp(); - } - dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", - dccp_role(sk), ntohl(opt_val), - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - /* schedule an Ack in case this sender is quiescent */ - inet_csk_schedule_ack(sk); - break; - case DCCPO_TIMESTAMP_ECHO: - if (len != 4 && len != 6 && len != 8) - goto out_invalid_option; - - opt_val = get_unaligned((__be32 *)value); - opt_recv->dccpor_timestamp_echo = ntohl(opt_val); - - dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " - "ackno=%llu", dccp_role(sk), - opt_recv->dccpor_timestamp_echo, - len + 2, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - - value += 4; - - if (len == 4) { /* no elapsed time included */ - dccp_pr_debug_cat("\n"); - break; - } - - if (len == 6) { /* 2-byte elapsed time */ - __be16 opt_val2 = get_unaligned((__be16 *)value); - elapsed_time = ntohs(opt_val2); - } else { /* 4-byte elapsed time */ - opt_val = get_unaligned((__be32 *)value); - elapsed_time = ntohl(opt_val); - } - - dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time); - - /* Give precedence to the biggest ELAPSED_TIME */ - if (elapsed_time > opt_recv->dccpor_elapsed_time) - opt_recv->dccpor_elapsed_time = elapsed_time; - break; - case DCCPO_ELAPSED_TIME: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 13.2 */ - break; - - if (len == 2) { - __be16 opt_val2 = get_unaligned((__be16 *)value); - elapsed_time = ntohs(opt_val2); - } else if (len == 4) { - opt_val = get_unaligned((__be32 *)value); - elapsed_time = ntohl(opt_val); - } else { - goto out_invalid_option; - } - - if (elapsed_time > opt_recv->dccpor_elapsed_time) - opt_recv->dccpor_elapsed_time = elapsed_time; - - dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", - dccp_role(sk), elapsed_time); - break; - case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC: - if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, - pkt_type, opt, value, len)) - goto out_invalid_option; - break; - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ - break; - /* - * Ack vectors are processed by the TX CCID if it is - * interested. The RX CCID need not parse Ack Vectors, - * since it is only interested in clearing old state. - */ - fallthrough; - case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: - if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, - pkt_type, opt, value, len)) - goto out_invalid_option; - break; - default: - DCCP_CRIT("DCCP(%p): option %d(len=%d) not " - "implemented, ignoring", sk, opt, len); - break; - } -ignore_option: - if (opt != DCCPO_MANDATORY) - mandatory = 0; - } - - /* mandatory was the last byte in option list -> reset connection */ - if (mandatory) - goto out_invalid_option; - -out_nonsensical_length: - /* RFC 4340, 5.8: ignore option and all remaining option space */ - return 0; - -out_invalid_option: - DCCP_INC_STATS(DCCP_MIB_INVALIDOPT); - rc = DCCP_RESET_CODE_OPTION_ERROR; -out_featneg_failed: - DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); - DCCP_SKB_CB(skb)->dccpd_reset_code = rc; - DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; - DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; - DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_parse_options); - -void dccp_encode_value_var(const u64 value, u8 *to, const u8 len) -{ - if (len >= DCCP_OPTVAL_MAXLEN) - *to++ = (value & 0xFF0000000000ull) >> 40; - if (len > 4) - *to++ = (value & 0xFF00000000ull) >> 32; - if (len > 3) - *to++ = (value & 0xFF000000) >> 24; - if (len > 2) - *to++ = (value & 0xFF0000) >> 16; - if (len > 1) - *to++ = (value & 0xFF00) >> 8; - if (len > 0) - *to++ = (value & 0xFF); -} - -static inline u8 dccp_ndp_len(const u64 ndp) -{ - if (likely(ndp <= 0xFF)) - return 1; - return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); -} - -int dccp_insert_option(struct sk_buff *skb, const unsigned char option, - const void *value, const unsigned char len) -{ - unsigned char *to; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; - - to = skb_push(skb, len + 2); - *to++ = option; - *to++ = len + 2; - - memcpy(to, value, len); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_insert_option); - -static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - u64 ndp = dp->dccps_ndp_count; - - if (dccp_non_data_packet(skb)) - ++dp->dccps_ndp_count; - else - dp->dccps_ndp_count = 0; - - if (ndp > 0) { - unsigned char *ptr; - const int ndp_len = dccp_ndp_len(ndp); - const int len = ndp_len + 2; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - ptr = skb_push(skb, len); - *ptr++ = DCCPO_NDP_COUNT; - *ptr++ = len; - dccp_encode_value_var(ndp, ptr, ndp_len); - } - - return 0; -} - -static inline int dccp_elapsed_time_len(const u32 elapsed_time) -{ - return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; -} - -static int dccp_insert_option_timestamp(struct sk_buff *skb) -{ - __be32 now = htonl(dccp_timestamp()); - /* yes this will overflow but that is the point as we want a - * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ - - return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); -} - -static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, - struct dccp_request_sock *dreq, - struct sk_buff *skb) -{ - __be32 tstamp_echo; - unsigned char *to; - u32 elapsed_time, elapsed_time_len, len; - - if (dreq != NULL) { - elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time; - tstamp_echo = htonl(dreq->dreq_timestamp_echo); - dreq->dreq_timestamp_echo = 0; - } else { - elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time; - tstamp_echo = htonl(dp->dccps_timestamp_echo); - dp->dccps_timestamp_echo = 0; - } - - elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - len = 6 + elapsed_time_len; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_TIMESTAMP_ECHO; - *to++ = len; - - memcpy(to, &tstamp_echo, 4); - to += 4; - - if (elapsed_time_len == 2) { - const __be16 var16 = htons((u16)elapsed_time); - memcpy(to, &var16, 2); - } else if (elapsed_time_len == 4) { - const __be32 var32 = htonl(elapsed_time); - memcpy(to, &var32, 4); - } - - return 0; -} - -static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const u16 buflen = dccp_ackvec_buflen(av); - /* Figure out how many options do we need to represent the ackvec */ - const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); - u16 len = buflen + 2 * nr_opts; - u8 i, nonce = 0; - const unsigned char *tail, *from; - unsigned char *to; - - if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, - dccp_packet_name(dcb->dccpd_type)); - return -1; - } - /* - * Since Ack Vectors are variable-length, we can not always predict - * their size. To catch exception cases where the space is running out - * on the skb, a separate Sync is scheduled to carry the Ack Vector. - */ - if (len > DCCPAV_MIN_OPTLEN && - len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { - DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " - "MPS=%u ==> reduce payload size?\n", len, skb->len, - dcb->dccpd_opt_len, dp->dccps_mss_cache); - dp->dccps_sync_scheduled = 1; - return 0; - } - dcb->dccpd_opt_len += len; - - to = skb_push(skb, len); - len = buflen; - from = av->av_buf + av->av_buf_head; - tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; - - for (i = 0; i < nr_opts; ++i) { - int copylen = len; - - if (len > DCCP_SINGLE_OPT_MAXLEN) - copylen = DCCP_SINGLE_OPT_MAXLEN; - - /* - * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via - * its type; ack_nonce is the sum of all individual buf_nonce's. - */ - nonce ^= av->av_buf_nonce[i]; - - *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; - *to++ = copylen + 2; - - /* Check if buf_head wraps */ - if (from + copylen > tail) { - const u16 tailsize = tail - from; - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - copylen -= tailsize; - from = av->av_buf; - } - - memcpy(to, from, copylen); - from += copylen; - to += copylen; - len -= copylen; - } - /* - * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. - */ - if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) - return -ENOBUFS; - return 0; -} - -/** - * dccp_insert_option_mandatory - Mandatory option (5.8.2) - * @skb: frame into which to insert option - * - * Note that since we are using skb_push, this function needs to be called - * _after_ inserting the option it is supposed to influence (stack order). - */ -int dccp_insert_option_mandatory(struct sk_buff *skb) -{ - if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len++; - *(u8 *)skb_push(skb, 1) = DCCPO_MANDATORY; - return 0; -} - -/** - * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb - * @skb: frame to insert feature negotiation option into - * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R - * @feat: one out of %dccp_feature_numbers - * @val: NN value or SP array (preferred element first) to copy - * @len: true length of @val in bytes (excluding first element repetition) - * @repeat_first: whether to copy the first element of @val twice - * - * The last argument is used to construct Confirm options, where the preferred - * value and the preference list appear separately (RFC 4340, 6.3.1). Preference - * lists are kept such that the preferred entry is always first, so we only need - * to copy twice, and avoid the overhead of cloning into a bigger array. - */ -int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len, bool repeat_first) -{ - u8 tot_len, *to; - - /* take the `Feature' field and possible repetition into account */ - if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) { - DCCP_WARN("length %u for feature %u too large\n", len, feat); - return -1; - } - - if (unlikely(val == NULL || len == 0)) - len = repeat_first = false; - tot_len = 3 + repeat_first + len; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { - DCCP_WARN("packet too small for feature %d option!\n", feat); - return -1; - } - DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len; - - to = skb_push(skb, tot_len); - *to++ = type; - *to++ = tot_len; - *to++ = feat; - - if (repeat_first) - *to++ = *val; - if (len) - memcpy(to, val, len); - return 0; -} - -/* The length of all options needs to be a multiple of 4 (5.8) */ -static void dccp_insert_option_padding(struct sk_buff *skb) -{ - int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; - - if (padding != 0) { - padding = 4 - padding; - memset(skb_push(skb, padding), 0, padding); - DCCP_SKB_CB(skb)->dccpd_opt_len += padding; - } -} - -int dccp_insert_options(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - - DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - - if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb)) - return -1; - - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { - - /* Feature Negotiation */ - if (dccp_feat_insert_opts(dp, NULL, skb)) - return -1; - - if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { - /* - * Obtain RTT sample from Request/Response exchange. - * This is currently used for TFRC initialisation. - */ - if (dccp_insert_option_timestamp(skb)) - return -1; - - } else if (dccp_ackvec_pending(sk) && - dccp_insert_option_ackvec(sk, skb)) { - return -1; - } - } - - if (dp->dccps_hc_rx_insert_options) { - if (ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb)) - return -1; - dp->dccps_hc_rx_insert_options = 0; - } - - if (dp->dccps_timestamp_echo != 0 && - dccp_insert_option_timestamp_echo(dp, NULL, skb)) - return -1; - - dccp_insert_option_padding(skb); - return 0; -} - -int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) -{ - DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - - if (dccp_feat_insert_opts(NULL, dreq, skb)) - return -1; - - /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */ - if (dccp_insert_option_timestamp(skb)) - return -1; - - if (dreq->dreq_timestamp_echo != 0 && - dccp_insert_option_timestamp_echo(NULL, dreq, skb)) - return -1; - - dccp_insert_option_padding(skb); - return 0; -} diff --git a/net/dccp/output.c b/net/dccp/output.c deleted file mode 100644 index 39cf3430177a..000000000000 --- a/net/dccp/output.c +++ /dev/null @@ -1,708 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/output.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/sched/signal.h> - -#include <net/inet_sock.h> -#include <net/sock.h> - -#include "ackvec.h" -#include "ccid.h" -#include "dccp.h" - -static inline void dccp_event_ack_sent(struct sock *sk) -{ - inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); -} - -/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ -static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) -{ - skb_set_owner_w(skb, sk); - WARN_ON(sk->sk_send_head); - sk->sk_send_head = skb; - return skb_clone(sk->sk_send_head, gfp_any()); -} - -/* - * All SKB's seen here are completely headerless. It is our - * job to build the DCCP header, and pass the packet down to - * IP so it can do the same plus pass the packet off to the - * device. - */ -static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) -{ - if (likely(skb != NULL)) { - struct inet_sock *inet = inet_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - struct dccp_hdr *dh; - /* XXX For now we're using only 48 bits sequence numbers */ - const u32 dccp_header_size = sizeof(*dh) + - sizeof(struct dccp_hdr_ext) + - dccp_packet_hdr_len(dcb->dccpd_type); - int err, set_ack = 1; - u64 ackno = dp->dccps_gsr; - /* - * Increment GSS here already in case the option code needs it. - * Update GSS for real only if option processing below succeeds. - */ - dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); - - switch (dcb->dccpd_type) { - case DCCP_PKT_DATA: - set_ack = 0; - fallthrough; - case DCCP_PKT_DATAACK: - case DCCP_PKT_RESET: - break; - - case DCCP_PKT_REQUEST: - set_ack = 0; - /* Use ISS on the first (non-retransmitted) Request. */ - if (icsk->icsk_retransmits == 0) - dcb->dccpd_seq = dp->dccps_iss; - fallthrough; - - case DCCP_PKT_SYNC: - case DCCP_PKT_SYNCACK: - ackno = dcb->dccpd_ack_seq; - fallthrough; - default: - /* - * Set owner/destructor: some skbs are allocated via - * alloc_skb (e.g. when retransmission may happen). - * Only Data, DataAck, and Reset packets should come - * through here with skb->sk set. - */ - WARN_ON(skb->sk); - skb_set_owner_w(skb, sk); - break; - } - - if (dccp_insert_options(sk, skb)) { - kfree_skb(skb); - return -EPROTO; - } - - - /* Build DCCP header and checksum it. */ - dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_type = dcb->dccpd_type; - dh->dccph_sport = inet->inet_sport; - dh->dccph_dport = inet->inet_dport; - dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; - dh->dccph_ccval = dcb->dccpd_ccval; - dh->dccph_cscov = dp->dccps_pcslen; - /* XXX For now we're using only 48 bits sequence numbers */ - dh->dccph_x = 1; - - dccp_update_gss(sk, dcb->dccpd_seq); - dccp_hdr_set_seq(dh, dp->dccps_gss); - if (set_ack) - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); - - switch (dcb->dccpd_type) { - case DCCP_PKT_REQUEST: - dccp_hdr_request(skb)->dccph_req_service = - dp->dccps_service; - /* - * Limit Ack window to ISS <= P.ackno <= GSS, so that - * only Responses to Requests we sent are considered. - */ - dp->dccps_awl = dp->dccps_iss; - break; - case DCCP_PKT_RESET: - dccp_hdr_reset(skb)->dccph_reset_code = - dcb->dccpd_reset_code; - break; - } - - icsk->icsk_af_ops->send_check(sk, skb); - - if (set_ack) - dccp_event_ack_sent(sk); - - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - - err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); - return net_xmit_eval(err); - } - return -ENOBUFS; -} - -/** - * dccp_determine_ccmps - Find out about CCID-specific packet-size limits - * @dp: socket to find packet size limits of - * - * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), - * since the RX CCID is restricted to feedback packets (Acks), which are small - * in comparison with the data traffic. A value of 0 means "no current CCMPS". - */ -static u32 dccp_determine_ccmps(const struct dccp_sock *dp) -{ - const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid; - - if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL) - return 0; - return tx_ccid->ccid_ops->ccid_ccmps; -} - -unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct dccp_sock *dp = dccp_sk(sk); - u32 ccmps = dccp_determine_ccmps(dp); - u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; - - /* Account for header lengths and IPv4/v6 option overhead */ - cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + - sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); - - /* - * Leave enough headroom for common DCCP header options. - * This only considers options which may appear on DCCP-Data packets, as - * per table 3 in RFC 4340, 5.8. When running out of space for other - * options (eg. Ack Vector which can take up to 255 bytes), it is better - * to schedule a separate Ack. Thus we leave headroom for the following: - * - 1 byte for Slow Receiver (11.6) - * - 6 bytes for Timestamp (13.1) - * - 10 bytes for Timestamp Echo (13.3) - * - 8 bytes for NDP count (7.7, when activated) - * - 6 bytes for Data Checksum (9.3) - * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) - */ - cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + - (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); - - /* And store cached results */ - icsk->icsk_pmtu_cookie = pmtu; - WRITE_ONCE(dp->dccps_mss_cache, cur_mps); - - return cur_mps; -} - -EXPORT_SYMBOL_GPL(dccp_sync_mss); - -void dccp_write_space(struct sock *sk) -{ - struct socket_wq *wq; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible(&wq->wait); - /* Should agree with poll, otherwise some programs break */ - if (sock_writeable(sk)) - sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); - - rcu_read_unlock(); -} - -/** - * dccp_wait_for_ccid - Await CCID send permission - * @sk: socket to wait for - * @delay: timeout in jiffies - * - * This is used by CCIDs which need to delay the send time in process context. - */ -static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) -{ - DEFINE_WAIT(wait); - long remaining; - - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - sk->sk_write_pending++; - release_sock(sk); - - remaining = schedule_timeout(delay); - - lock_sock(sk); - sk->sk_write_pending--; - finish_wait(sk_sleep(sk), &wait); - - if (signal_pending(current) || sk->sk_err) - return -1; - return remaining; -} - -/** - * dccp_xmit_packet - Send data packet under control of CCID - * @sk: socket to send data packet on - * - * Transmits next-queued payload and informs CCID to account for the packet. - */ -static void dccp_xmit_packet(struct sock *sk) -{ - int err, len; - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb = dccp_qpolicy_pop(sk); - - if (unlikely(skb == NULL)) - return; - len = skb->len; - - if (sk->sk_state == DCCP_PARTOPEN) { - const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; - /* - * See 8.1.5 - Handshake Completion. - * - * For robustness we resend Confirm options until the client has - * entered OPEN. During the initial feature negotiation, the MPS - * is smaller than usual, reduced by the Change/Confirm options. - */ - if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { - DCCP_WARN("Payload too large (%d) for featneg.\n", len); - dccp_send_ack(sk); - dccp_feat_list_purge(&dp->dccps_featneg); - } - - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - inet_csk(sk)->icsk_rto, - DCCP_RTO_MAX); - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) { - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; - } else { - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; - } - - err = dccp_transmit_skb(sk, skb); - if (err) - dccp_pr_debug("transmit_skb() returned err=%d\n", err); - /* - * Register this one as sent even if an error occurred. To the remote - * end a local packet drop is indistinguishable from network loss, i.e. - * any local drop will eventually be reported via receiver feedback. - */ - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); - - /* - * If the CCID needs to transfer additional header options out-of-band - * (e.g. Ack Vectors or feature-negotiation options), it activates this - * flag to schedule a Sync. The Sync will automatically incorporate all - * currently pending header options, thus clearing the backlog. - */ - if (dp->dccps_sync_scheduled) - dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); -} - -/** - * dccp_flush_write_queue - Drain queue at end of connection - * @sk: socket to be drained - * @time_budget: time allowed to drain the queue - * - * Since dccp_sendmsg queues packets without waiting for them to be sent, it may - * happen that the TX queue is not empty at the end of a connection. We give the - * HC-sender CCID a grace period of up to @time_budget jiffies. If this function - * returns with a non-empty write queue, it will be purged later. - */ -void dccp_flush_write_queue(struct sock *sk, long *time_budget) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; - long delay, rc; - - while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - - switch (ccid_packet_dequeue_eval(rc)) { - case CCID_PACKET_WILL_DEQUEUE_LATER: - /* - * If the CCID determines when to send, the next sending - * time is unknown or the CCID may not even send again - * (e.g. remote host crashes or lost Ack packets). - */ - DCCP_WARN("CCID did not manage to send all packets\n"); - return; - case CCID_PACKET_DELAY: - delay = msecs_to_jiffies(rc); - if (delay > *time_budget) - return; - rc = dccp_wait_for_ccid(sk, delay); - if (rc < 0) - return; - *time_budget -= (delay - rc); - /* check again if we can send now */ - break; - case CCID_PACKET_SEND_AT_ONCE: - dccp_xmit_packet(sk); - break; - case CCID_PACKET_ERR: - skb_dequeue(&sk->sk_write_queue); - kfree_skb(skb); - dccp_pr_debug("packet discarded due to err=%ld\n", rc); - } - } -} - -void dccp_write_xmit(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; - - while ((skb = dccp_qpolicy_top(sk))) { - int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - - switch (ccid_packet_dequeue_eval(rc)) { - case CCID_PACKET_WILL_DEQUEUE_LATER: - return; - case CCID_PACKET_DELAY: - sk_reset_timer(sk, &dp->dccps_xmit_timer, - jiffies + msecs_to_jiffies(rc)); - return; - case CCID_PACKET_SEND_AT_ONCE: - dccp_xmit_packet(sk); - break; - case CCID_PACKET_ERR: - dccp_qpolicy_drop(sk, skb); - dccp_pr_debug("packet discarded due to err=%d\n", rc); - } - } -} - -/** - * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets - * @sk: socket to perform retransmit on - * - * There are only four retransmittable packet types in DCCP: - * - Request in client-REQUEST state (sec. 8.1.1), - * - CloseReq in server-CLOSEREQ state (sec. 8.3), - * - Close in node-CLOSING state (sec. 8.3), - * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()). - * This function expects sk->sk_send_head to contain the original skb. - */ -int dccp_retransmit_skb(struct sock *sk) -{ - WARN_ON(sk->sk_send_head == NULL); - - if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) - return -EHOSTUNREACH; /* Routing failure or similar. */ - - /* this count is used to distinguish original and retransmitted skb */ - inet_csk(sk)->icsk_retransmits++; - - return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); -} - -struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst, - struct request_sock *req) -{ - struct dccp_hdr *dh; - struct dccp_request_sock *dreq; - const u32 dccp_header_size = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_response); - struct sk_buff *skb; - - /* sk is marked const to clearly express we dont hold socket lock. - * sock_wmalloc() will atomically change sk->sk_wmem_alloc, - * it is safe to promote sk to non const. - */ - skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1, - GFP_ATOMIC); - if (!skb) - return NULL; - - skb_reserve(skb, MAX_DCCP_HEADER); - - skb_dst_set(skb, dst_clone(dst)); - - dreq = dccp_rsk(req); - if (inet_rsk(req)->acked) /* increase GSS upon retransmission */ - dccp_inc_seqno(&dreq->dreq_gss); - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_gss; - - /* Resolve feature dependencies resulting from choice of CCID */ - if (dccp_feat_server_ccid_dependencies(dreq)) - goto response_failed; - - if (dccp_insert_options_rsk(dreq, skb)) - goto response_failed; - - /* Build and checksum header */ - dh = dccp_zeroed_hdr(skb, dccp_header_size); - - dh->dccph_sport = htons(inet_rsk(req)->ir_num); - dh->dccph_dport = inet_rsk(req)->ir_rmt_port; - dh->dccph_doff = (dccp_header_size + - DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; - dh->dccph_type = DCCP_PKT_RESPONSE; - dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dreq->dreq_gss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr); - dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; - - dccp_csum_outgoing(skb); - - /* We use `acked' to remember that a Response was already sent. */ - inet_rsk(req)->acked = 1; - DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - return skb; -response_failed: - kfree_skb(skb); - return NULL; -} - -EXPORT_SYMBOL_GPL(dccp_make_response); - -/* answer offending packet in @rcv_skb with Reset from control socket @ctl */ -struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *rcv_skb) -{ - struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb); - const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); - struct dccp_hdr_reset *dhr; - struct sk_buff *skb; - - skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); - if (skb == NULL) - return NULL; - - skb_reserve(skb, sk->sk_prot->max_header); - - /* Swap the send and the receive. */ - dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - - dhr = dccp_hdr_reset(skb); - dhr->dccph_reset_code = dcb->dccpd_reset_code; - - switch (dcb->dccpd_reset_code) { - case DCCP_RESET_CODE_PACKET_ERROR: - dhr->dccph_reset_data[0] = rxdh->dccph_type; - break; - case DCCP_RESET_CODE_OPTION_ERROR: - case DCCP_RESET_CODE_MANDATORY_ERROR: - memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3); - break; - } - /* - * From RFC 4340, 8.3.1: - * If P.ackno exists, set R.seqno := P.ackno + 1. - * Else set R.seqno := 0. - */ - if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1)); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq); - - dccp_csum_outgoing(skb); - return skb; -} - -EXPORT_SYMBOL_GPL(dccp_ctl_make_reset); - -/* send Reset on established socket, to close or abort the connection */ -int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) -{ - struct sk_buff *skb; - /* - * FIXME: what if rebuild_header fails? - * Should we be doing a rebuild_header here? - */ - int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk); - - if (err != 0) - return err; - - skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC); - if (skb == NULL) - return -ENOBUFS; - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, sk->sk_prot->max_header); - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; - DCCP_SKB_CB(skb)->dccpd_reset_code = code; - - return dccp_transmit_skb(sk, skb); -} - -/* - * Do all connect socket setups that can be done AF independent. - */ -int dccp_connect(struct sock *sk) -{ - struct sk_buff *skb; - struct dccp_sock *dp = dccp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - - sk->sk_err = 0; - sock_reset_flag(sk, SOCK_DONE); - - dccp_sync_mss(sk, dst_mtu(dst)); - - /* do not connect if feature negotiation setup fails */ - if (dccp_feat_finalise_settings(dccp_sk(sk))) - return -EPROTO; - - /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ - dp->dccps_gar = dp->dccps_iss; - - skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); - if (unlikely(skb == NULL)) - return -ENOBUFS; - - /* Reserve space for headers. */ - skb_reserve(skb, sk->sk_prot->max_header); - - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - - dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); - DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); - - /* Timer for repeating the REQUEST until an answer. */ - icsk->icsk_retransmits = 0; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - icsk->icsk_rto, DCCP_RTO_MAX); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_connect); - -void dccp_send_ack(struct sock *sk) -{ - /* If we have been reset, we may not send again. */ - if (sk->sk_state != DCCP_CLOSED) { - struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, - GFP_ATOMIC); - - if (skb == NULL) { - inet_csk_schedule_ack(sk); - inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MAX, - DCCP_RTO_MAX); - return; - } - - /* Reserve space for headers */ - skb_reserve(skb, sk->sk_prot->max_header); - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; - dccp_transmit_skb(sk, skb); - } -} - -EXPORT_SYMBOL_GPL(dccp_send_ack); - -#if 0 -/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ -void dccp_send_delayed_ack(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - /* - * FIXME: tune this timer. elapsed time fixes the skew, so no problem - * with using 2s, and active senders also piggyback the ACK into a - * DATAACK packet, so this is really for quiescent senders. - */ - unsigned long timeout = jiffies + 2 * HZ; - - /* Use new timeout only if there wasn't a older one earlier. */ - if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { - /* If delack timer was blocked or is about to expire, - * send ACK now. - * - * FIXME: check the "about to expire" part - */ - if (icsk->icsk_ack.blocked) { - dccp_send_ack(sk); - return; - } - - if (!time_before(timeout, icsk_delack_timeout(icsk))) - timeout = icsk_delack_timeout(icsk); - } - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; - sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); -} -#endif - -void dccp_send_sync(struct sock *sk, const u64 ackno, - const enum dccp_pkt_type pkt_type) -{ - /* - * We are not putting this on the write queue, so - * dccp_transmit_skb() will set the ownership to this - * sock. - */ - struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); - - if (skb == NULL) { - /* FIXME: how to make sure the sync is sent? */ - DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type)); - return; - } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, sk->sk_prot->max_header); - DCCP_SKB_CB(skb)->dccpd_type = pkt_type; - DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; - - /* - * Clear the flag in case the Sync was scheduled for out-of-band data, - * such as carrying a long Ack Vector. - */ - dccp_sk(sk)->dccps_sync_scheduled = 0; - - dccp_transmit_skb(sk, skb); -} - -EXPORT_SYMBOL_GPL(dccp_send_sync); - -/* - * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This - * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under - * any circumstances. - */ -void dccp_send_close(struct sock *sk, const int active) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; - const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; - - skb = alloc_skb(sk->sk_prot->max_header, prio); - if (skb == NULL) - return; - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, sk->sk_prot->max_header); - if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait) - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ; - else - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; - - if (active) { - skb = dccp_skb_entail(sk, skb); - /* - * Retransmission timer for active-close: RFC 4340, 8.3 requires - * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ - * state can be left. The initial timeout is 2 RTTs. - * Since RTT measurement is done by the CCIDs, there is no easy - * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4 - * is too low (200ms); we use a high value to avoid unnecessary - * retransmissions when the link RTT is > 0.2 seconds. - * FIXME: Let main module sample RTTs and use that instead. - */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } - dccp_transmit_skb(sk, skb); -} diff --git a/net/dccp/proto.c b/net/dccp/proto.c deleted file mode 100644 index fcc5c9d64f46..000000000000 --- a/net/dccp/proto.c +++ /dev/null @@ -1,1293 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/proto.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/in.h> -#include <linux/if_arp.h> -#include <linux/init.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <net/checksum.h> - -#include <net/inet_sock.h> -#include <net/inet_common.h> -#include <net/sock.h> -#include <net/xfrm.h> - -#include <asm/ioctls.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/delay.h> -#include <linux/poll.h> - -#include "ccid.h" -#include "dccp.h" -#include "feat.h" - -#define CREATE_TRACE_POINTS -#include "trace.h" - -DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; - -EXPORT_SYMBOL_GPL(dccp_statistics); - -DEFINE_PER_CPU(unsigned int, dccp_orphan_count); -EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count); - -struct inet_hashinfo dccp_hashinfo; -EXPORT_SYMBOL_GPL(dccp_hashinfo); - -/* the maximum queue length for tx in packets. 0 is no limit */ -int sysctl_dccp_tx_qlen __read_mostly = 5; - -#ifdef CONFIG_IP_DCCP_DEBUG -static const char *dccp_state_name(const int state) -{ - static const char *const dccp_state_names[] = { - [DCCP_OPEN] = "OPEN", - [DCCP_REQUESTING] = "REQUESTING", - [DCCP_PARTOPEN] = "PARTOPEN", - [DCCP_LISTEN] = "LISTEN", - [DCCP_RESPOND] = "RESPOND", - [DCCP_CLOSING] = "CLOSING", - [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", - [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", - [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", - [DCCP_TIME_WAIT] = "TIME_WAIT", - [DCCP_CLOSED] = "CLOSED", - }; - - if (state >= DCCP_MAX_STATES) - return "INVALID STATE!"; - else - return dccp_state_names[state]; -} -#endif - -void dccp_set_state(struct sock *sk, const int state) -{ - const int oldstate = sk->sk_state; - - dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, - dccp_state_name(oldstate), dccp_state_name(state)); - WARN_ON(state == oldstate); - - switch (state) { - case DCCP_OPEN: - if (oldstate != DCCP_OPEN) - DCCP_INC_STATS(DCCP_MIB_CURRESTAB); - /* Client retransmits all Confirm options until entering OPEN */ - if (oldstate == DCCP_PARTOPEN) - dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); - break; - - case DCCP_CLOSED: - if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || - oldstate == DCCP_CLOSING) - DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); - - sk->sk_prot->unhash(sk); - if (inet_csk(sk)->icsk_bind_hash != NULL && - !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) - inet_put_port(sk); - fallthrough; - default: - if (oldstate == DCCP_OPEN) - DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); - } - - /* Change state AFTER socket is unhashed to avoid closed - * socket sitting in hash tables. - */ - inet_sk_set_state(sk, state); -} - -EXPORT_SYMBOL_GPL(dccp_set_state); - -static void dccp_finish_passive_close(struct sock *sk) -{ - switch (sk->sk_state) { - case DCCP_PASSIVE_CLOSE: - /* Node (client or server) has received Close packet. */ - dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_set_state(sk, DCCP_CLOSED); - break; - case DCCP_PASSIVE_CLOSEREQ: - /* - * Client received CloseReq. We set the `active' flag so that - * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. - */ - dccp_send_close(sk, 1); - dccp_set_state(sk, DCCP_CLOSING); - } -} - -void dccp_done(struct sock *sk) -{ - dccp_set_state(sk, DCCP_CLOSED); - dccp_clear_xmit_timers(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - else - inet_csk_destroy_sock(sk); -} - -EXPORT_SYMBOL_GPL(dccp_done); - -const char *dccp_packet_name(const int type) -{ - static const char *const dccp_packet_names[] = { - [DCCP_PKT_REQUEST] = "REQUEST", - [DCCP_PKT_RESPONSE] = "RESPONSE", - [DCCP_PKT_DATA] = "DATA", - [DCCP_PKT_ACK] = "ACK", - [DCCP_PKT_DATAACK] = "DATAACK", - [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", - [DCCP_PKT_CLOSE] = "CLOSE", - [DCCP_PKT_RESET] = "RESET", - [DCCP_PKT_SYNC] = "SYNC", - [DCCP_PKT_SYNCACK] = "SYNCACK", - }; - - if (type >= DCCP_NR_PKT_TYPES) - return "INVALID"; - else - return dccp_packet_names[type]; -} - -EXPORT_SYMBOL_GPL(dccp_packet_name); - -void dccp_destruct_common(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_tx_ccid = NULL; -} -EXPORT_SYMBOL_GPL(dccp_destruct_common); - -static void dccp_sk_destruct(struct sock *sk) -{ - dccp_destruct_common(sk); - inet_sock_destruct(sk); -} - -int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - - pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, " - "please contact the netdev mailing list\n"); - - icsk->icsk_rto = DCCP_TIMEOUT_INIT; - icsk->icsk_syn_retries = sysctl_dccp_request_retries; - sk->sk_state = DCCP_CLOSED; - sk->sk_write_space = dccp_write_space; - sk->sk_destruct = dccp_sk_destruct; - icsk->icsk_sync_mss = dccp_sync_mss; - dp->dccps_mss_cache = 536; - dp->dccps_rate_last = jiffies; - dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; - - dccp_init_xmit_timers(sk); - - INIT_LIST_HEAD(&dp->dccps_featneg); - /* control socket doesn't need feat nego */ - if (likely(ctl_sock_initialized)) - return dccp_feat_init(sk); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_init_sock); - -void dccp_destroy_sock(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - - __skb_queue_purge(&sk->sk_write_queue); - if (sk->sk_send_head != NULL) { - kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - } - - /* Clean up a referenced DCCP bind bucket. */ - if (inet_csk(sk)->icsk_bind_hash != NULL) - inet_put_port(sk); - - kfree(dp->dccps_service_list); - dp->dccps_service_list = NULL; - - if (dp->dccps_hc_rx_ackvec != NULL) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - dp->dccps_hc_rx_ccid = NULL; - - /* clean up feature negotiation state */ - dccp_feat_list_purge(&dp->dccps_featneg); -} - -EXPORT_SYMBOL_GPL(dccp_destroy_sock); - -static inline int dccp_need_reset(int state) -{ - return state != DCCP_CLOSED && state != DCCP_LISTEN && - state != DCCP_REQUESTING; -} - -int dccp_disconnect(struct sock *sk, int flags) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - const int old_state = sk->sk_state; - - if (old_state != DCCP_CLOSED) - dccp_set_state(sk, DCCP_CLOSED); - - /* - * This corresponds to the ABORT function of RFC793, sec. 3.8 - * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". - */ - if (old_state == DCCP_LISTEN) { - inet_csk_listen_stop(sk); - } else if (dccp_need_reset(old_state)) { - dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); - sk->sk_err = ECONNRESET; - } else if (old_state == DCCP_REQUESTING) - sk->sk_err = ECONNRESET; - - dccp_clear_xmit_timers(sk); - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - dp->dccps_hc_rx_ccid = NULL; - - __skb_queue_purge(&sk->sk_receive_queue); - __skb_queue_purge(&sk->sk_write_queue); - if (sk->sk_send_head != NULL) { - __kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - } - - inet->inet_dport = 0; - - inet_bhash2_reset_saddr(sk); - - sk->sk_shutdown = 0; - sock_reset_flag(sk, SOCK_DONE); - - icsk->icsk_backoff = 0; - inet_csk_delack_init(sk); - __sk_dst_reset(sk); - - WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); - - sk_error_report(sk); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_disconnect); - -/* - * Wait for a DCCP event. - * - * Note that we don't need to lock the socket, as the upper poll layers - * take care of normal races (between the test and the event) and we don't - * go look at any of the socket buffers directly. - */ -__poll_t dccp_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - __poll_t mask; - u8 shutdown; - int state; - - sock_poll_wait(file, sock, wait); - - state = inet_sk_state_load(sk); - if (state == DCCP_LISTEN) - return inet_csk_listen_poll(sk); - - /* Socket is not locked. We are protected from async events - by poll logic and correct handling of state changes - made by another threads is impossible in any case. - */ - - mask = 0; - if (READ_ONCE(sk->sk_err)) - mask = EPOLLERR; - shutdown = READ_ONCE(sk->sk_shutdown); - - if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED) - mask |= EPOLLHUP; - if (shutdown & RCV_SHUTDOWN) - mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - - /* Connected? */ - if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { - if (atomic_read(&sk->sk_rmem_alloc) > 0) - mask |= EPOLLIN | EPOLLRDNORM; - - if (!(shutdown & SEND_SHUTDOWN)) { - if (sk_stream_is_writeable(sk)) { - mask |= EPOLLOUT | EPOLLWRNORM; - } else { /* send SIGIO later */ - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - - /* Race breaker. If space is freed after - * wspace test but before the flags are set, - * IO signal will be lost. - */ - if (sk_stream_is_writeable(sk)) - mask |= EPOLLOUT | EPOLLWRNORM; - } - } - } - return mask; -} -EXPORT_SYMBOL_GPL(dccp_poll); - -int dccp_ioctl(struct sock *sk, int cmd, int *karg) -{ - int rc = -ENOTCONN; - - lock_sock(sk); - - if (sk->sk_state == DCCP_LISTEN) - goto out; - - switch (cmd) { - case SIOCOUTQ: { - *karg = sk_wmem_alloc_get(sk); - /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and - * always 0, comparably to UDP. - */ - - rc = 0; - } - break; - case SIOCINQ: { - struct sk_buff *skb; - *karg = 0; - - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { - /* - * We will only return the amount of this packet since - * that is all that will be read. - */ - *karg = skb->len; - } - rc = 0; - } - break; - default: - rc = -ENOIOCTLCMD; - break; - } -out: - release_sock(sk); - return rc; -} - -EXPORT_SYMBOL_GPL(dccp_ioctl); - -static int dccp_setsockopt_service(struct sock *sk, const __be32 service, - sockptr_t optval, unsigned int optlen) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_service_list *sl = NULL; - - if (service == DCCP_SERVICE_INVALID_VALUE || - optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) - return -EINVAL; - - if (optlen > sizeof(service)) { - sl = kmalloc(optlen, GFP_KERNEL); - if (sl == NULL) - return -ENOMEM; - - sl->dccpsl_nr = optlen / sizeof(u32) - 1; - if (copy_from_sockptr_offset(sl->dccpsl_list, optval, - sizeof(service), optlen - sizeof(service)) || - dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { - kfree(sl); - return -EFAULT; - } - } - - lock_sock(sk); - dp->dccps_service = service; - - kfree(dp->dccps_service_list); - - dp->dccps_service_list = sl; - release_sock(sk); - return 0; -} - -static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) -{ - u8 *list, len; - int i, rc; - - if (cscov < 0 || cscov > 15) - return -EINVAL; - /* - * Populate a list of permissible values, in the range cscov...15. This - * is necessary since feature negotiation of single values only works if - * both sides incidentally choose the same value. Since the list starts - * lowest-value first, negotiation will pick the smallest shared value. - */ - if (cscov == 0) - return 0; - len = 16 - cscov; - - list = kmalloc(len, GFP_KERNEL); - if (list == NULL) - return -ENOBUFS; - - for (i = 0; i < len; i++) - list[i] = cscov++; - - rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); - - if (rc == 0) { - if (rx) - dccp_sk(sk)->dccps_pcrlen = cscov; - else - dccp_sk(sk)->dccps_pcslen = cscov; - } - kfree(list); - return rc; -} - -static int dccp_setsockopt_ccid(struct sock *sk, int type, - sockptr_t optval, unsigned int optlen) -{ - u8 *val; - int rc = 0; - - if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) - return -EINVAL; - - val = memdup_sockptr(optval, optlen); - if (IS_ERR(val)) - return PTR_ERR(val); - - lock_sock(sk); - if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) - rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); - - if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) - rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); - release_sock(sk); - - kfree(val); - return rc; -} - -static int do_dccp_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen) -{ - struct dccp_sock *dp = dccp_sk(sk); - int val, err = 0; - - switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - return 0; - case DCCP_SOCKOPT_CHANGE_L: - case DCCP_SOCKOPT_CHANGE_R: - DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); - return 0; - case DCCP_SOCKOPT_CCID: - case DCCP_SOCKOPT_RX_CCID: - case DCCP_SOCKOPT_TX_CCID: - return dccp_setsockopt_ccid(sk, optname, optval, optlen); - } - - if (optlen < (int)sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(int))) - return -EFAULT; - - if (optname == DCCP_SOCKOPT_SERVICE) - return dccp_setsockopt_service(sk, val, optval, optlen); - - lock_sock(sk); - switch (optname) { - case DCCP_SOCKOPT_SERVER_TIMEWAIT: - if (dp->dccps_role != DCCP_ROLE_SERVER) - err = -EOPNOTSUPP; - else - dp->dccps_server_timewait = (val != 0); - break; - case DCCP_SOCKOPT_SEND_CSCOV: - err = dccp_setsockopt_cscov(sk, val, false); - break; - case DCCP_SOCKOPT_RECV_CSCOV: - err = dccp_setsockopt_cscov(sk, val, true); - break; - case DCCP_SOCKOPT_QPOLICY_ID: - if (sk->sk_state != DCCP_CLOSED) - err = -EISCONN; - else if (val < 0 || val >= DCCPQ_POLICY_MAX) - err = -EINVAL; - else - dp->dccps_qpolicy = val; - break; - case DCCP_SOCKOPT_QPOLICY_TXQLEN: - if (val < 0) - err = -EINVAL; - else - dp->dccps_tx_qlen = val; - break; - default: - err = -ENOPROTOOPT; - break; - } - release_sock(sk); - - return err; -} - -int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen) -{ - if (level != SOL_DCCP) - return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, - optname, optval, - optlen); - return do_dccp_setsockopt(sk, level, optname, optval, optlen); -} - -EXPORT_SYMBOL_GPL(dccp_setsockopt); - -static int dccp_getsockopt_service(struct sock *sk, int len, - __be32 __user *optval, - int __user *optlen) -{ - const struct dccp_sock *dp = dccp_sk(sk); - const struct dccp_service_list *sl; - int err = -ENOENT, slen = 0, total_len = sizeof(u32); - - lock_sock(sk); - if ((sl = dp->dccps_service_list) != NULL) { - slen = sl->dccpsl_nr * sizeof(u32); - total_len += slen; - } - - err = -EINVAL; - if (total_len > len) - goto out; - - err = 0; - if (put_user(total_len, optlen) || - put_user(dp->dccps_service, optval) || - (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) - err = -EFAULT; -out: - release_sock(sk); - return err; -} - -static int do_dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct dccp_sock *dp; - int val, len; - - if (get_user(len, optlen)) - return -EFAULT; - - if (len < (int)sizeof(int)) - return -EINVAL; - - dp = dccp_sk(sk); - - switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - return 0; - case DCCP_SOCKOPT_SERVICE: - return dccp_getsockopt_service(sk, len, - (__be32 __user *)optval, optlen); - case DCCP_SOCKOPT_GET_CUR_MPS: - val = READ_ONCE(dp->dccps_mss_cache); - break; - case DCCP_SOCKOPT_AVAILABLE_CCIDS: - return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); - case DCCP_SOCKOPT_TX_CCID: - val = ccid_get_current_tx_ccid(dp); - if (val < 0) - return -ENOPROTOOPT; - break; - case DCCP_SOCKOPT_RX_CCID: - val = ccid_get_current_rx_ccid(dp); - if (val < 0) - return -ENOPROTOOPT; - break; - case DCCP_SOCKOPT_SERVER_TIMEWAIT: - val = dp->dccps_server_timewait; - break; - case DCCP_SOCKOPT_SEND_CSCOV: - val = dp->dccps_pcslen; - break; - case DCCP_SOCKOPT_RECV_CSCOV: - val = dp->dccps_pcrlen; - break; - case DCCP_SOCKOPT_QPOLICY_ID: - val = dp->dccps_qpolicy; - break; - case DCCP_SOCKOPT_QPOLICY_TXQLEN: - val = dp->dccps_tx_qlen; - break; - case 128 ... 191: - return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, - len, (u32 __user *)optval, optlen); - case 192 ... 255: - return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, - len, (u32 __user *)optval, optlen); - default: - return -ENOPROTOOPT; - } - - len = sizeof(val); - if (put_user(len, optlen) || copy_to_user(optval, &val, len)) - return -EFAULT; - - return 0; -} - -int dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level != SOL_DCCP) - return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, - optname, optval, - optlen); - return do_dccp_getsockopt(sk, level, optname, optval, optlen); -} - -EXPORT_SYMBOL_GPL(dccp_getsockopt); - -static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) -{ - struct cmsghdr *cmsg; - - /* - * Assign an (opaque) qpolicy priority value to skb->priority. - * - * We are overloading this skb field for use with the qpolicy subystem. - * The skb->priority is normally used for the SO_PRIORITY option, which - * is initialised from sk_priority. Since the assignment of sk_priority - * to skb->priority happens later (on layer 3), we overload this field - * for use with queueing priorities as long as the skb is on layer 4. - * The default priority value (if nothing is set) is 0. - */ - skb->priority = 0; - - for_each_cmsghdr(cmsg, msg) { - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; - - if (cmsg->cmsg_level != SOL_DCCP) - continue; - - if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && - !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) - return -EINVAL; - - switch (cmsg->cmsg_type) { - case DCCP_SCM_PRIORITY: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) - return -EINVAL; - skb->priority = *(__u32 *)CMSG_DATA(cmsg); - break; - default: - return -EINVAL; - } - } - return 0; -} - -int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) -{ - const struct dccp_sock *dp = dccp_sk(sk); - const int flags = msg->msg_flags; - const int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; - int rc, size; - long timeo; - - trace_dccp_probe(sk, len); - - if (len > READ_ONCE(dp->dccps_mss_cache)) - return -EMSGSIZE; - - lock_sock(sk); - - timeo = sock_sndtimeo(sk, noblock); - - /* - * We have to use sk_stream_wait_connect here to set sk_write_pending, - * so that the trick in dccp_rcv_request_sent_state_process. - */ - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) - if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) - goto out_release; - - size = sk->sk_prot->max_header + len; - release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); - lock_sock(sk); - if (skb == NULL) - goto out_release; - - if (dccp_qpolicy_full(sk)) { - rc = -EAGAIN; - goto out_discard; - } - - if (sk->sk_state == DCCP_CLOSED) { - rc = -ENOTCONN; - goto out_discard; - } - - /* We need to check dccps_mss_cache after socket is locked. */ - if (len > dp->dccps_mss_cache) { - rc = -EMSGSIZE; - goto out_discard; - } - - skb_reserve(skb, sk->sk_prot->max_header); - rc = memcpy_from_msg(skb_put(skb, len), msg, len); - if (rc != 0) - goto out_discard; - - rc = dccp_msghdr_parse(msg, skb); - if (rc != 0) - goto out_discard; - - dccp_qpolicy_push(sk, skb); - /* - * The xmit_timer is set if the TX CCID is rate-based and will expire - * when congestion control permits to release further packets into the - * network. Window-based CCIDs do not use this timer. - */ - if (!timer_pending(&dp->dccps_xmit_timer)) - dccp_write_xmit(sk); -out_release: - release_sock(sk); - return rc ? : len; -out_discard: - kfree_skb(skb); - goto out_release; -} - -EXPORT_SYMBOL_GPL(dccp_sendmsg); - -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) -{ - const struct dccp_hdr *dh; - long timeo; - - lock_sock(sk); - - if (sk->sk_state == DCCP_LISTEN) { - len = -ENOTCONN; - goto out; - } - - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - do { - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - - if (skb == NULL) - goto verify_sock_status; - - dh = dccp_hdr(skb); - - switch (dh->dccph_type) { - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - goto found_ok_skb; - - case DCCP_PKT_CLOSE: - case DCCP_PKT_CLOSEREQ: - if (!(flags & MSG_PEEK)) - dccp_finish_passive_close(sk); - fallthrough; - case DCCP_PKT_RESET: - dccp_pr_debug("found fin (%s) ok!\n", - dccp_packet_name(dh->dccph_type)); - len = 0; - goto found_fin_ok; - default: - dccp_pr_debug("packet_type=%s\n", - dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb); - } -verify_sock_status: - if (sock_flag(sk, SOCK_DONE)) { - len = 0; - break; - } - - if (sk->sk_err) { - len = sock_error(sk); - break; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN) { - len = 0; - break; - } - - if (sk->sk_state == DCCP_CLOSED) { - if (!sock_flag(sk, SOCK_DONE)) { - /* This occurs when user tries to read - * from never connected socket. - */ - len = -ENOTCONN; - break; - } - len = 0; - break; - } - - if (!timeo) { - len = -EAGAIN; - break; - } - - if (signal_pending(current)) { - len = sock_intr_errno(timeo); - break; - } - - sk_wait_data(sk, &timeo, NULL); - continue; - found_ok_skb: - if (len > skb->len) - len = skb->len; - else if (len < skb->len) - msg->msg_flags |= MSG_TRUNC; - - if (skb_copy_datagram_msg(skb, 0, msg, len)) { - /* Exception. Bailout! */ - len = -EFAULT; - break; - } - if (flags & MSG_TRUNC) - len = skb->len; - found_fin_ok: - if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); - break; - } while (1); -out: - release_sock(sk); - return len; -} - -EXPORT_SYMBOL_GPL(dccp_recvmsg); - -int inet_dccp_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - unsigned char old_state; - int err; - - lock_sock(sk); - - err = -EINVAL; - if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) - goto out; - - old_state = sk->sk_state; - if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) - goto out; - - WRITE_ONCE(sk->sk_max_ack_backlog, backlog); - /* Really, if the socket is already in listen state - * we can only allow the backlog to be adjusted. - */ - if (old_state != DCCP_LISTEN) { - struct dccp_sock *dp = dccp_sk(sk); - - dp->dccps_role = DCCP_ROLE_LISTEN; - - /* do not start to listen if feature negotiation setup fails */ - if (dccp_feat_finalise_settings(dp)) { - err = -EPROTO; - goto out; - } - - err = inet_csk_listen_start(sk); - if (err) - goto out; - } - err = 0; - -out: - release_sock(sk); - return err; -} - -EXPORT_SYMBOL_GPL(inet_dccp_listen); - -static void dccp_terminate_connection(struct sock *sk) -{ - u8 next_state = DCCP_CLOSED; - - switch (sk->sk_state) { - case DCCP_PASSIVE_CLOSE: - case DCCP_PASSIVE_CLOSEREQ: - dccp_finish_passive_close(sk); - break; - case DCCP_PARTOPEN: - dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); - inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); - fallthrough; - case DCCP_OPEN: - dccp_send_close(sk, 1); - - if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && - !dccp_sk(sk)->dccps_server_timewait) - next_state = DCCP_ACTIVE_CLOSEREQ; - else - next_state = DCCP_CLOSING; - fallthrough; - default: - dccp_set_state(sk, next_state); - } -} - -void dccp_close(struct sock *sk, long timeout) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; - u32 data_was_unread = 0; - int state; - - lock_sock(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - - if (sk->sk_state == DCCP_LISTEN) { - dccp_set_state(sk, DCCP_CLOSED); - - /* Special case. */ - inet_csk_listen_stop(sk); - - goto adjudge_to_death; - } - - sk_stop_timer(sk, &dp->dccps_xmit_timer); - - /* - * We need to flush the recv. buffs. We do this only on the - * descriptor close, not protocol-sourced closes, because the - *reader process may not have drained the data yet! - */ - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - data_was_unread += skb->len; - __kfree_skb(skb); - } - - /* If socket has been already reset kill it. */ - if (sk->sk_state == DCCP_CLOSED) - goto adjudge_to_death; - - if (data_was_unread) { - /* Unread data was tossed, send an appropriate Reset Code */ - DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); - dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); - dccp_set_state(sk, DCCP_CLOSED); - } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { - /* Check zero linger _after_ checking for unread data. */ - sk->sk_prot->disconnect(sk, 0); - } else if (sk->sk_state != DCCP_CLOSED) { - /* - * Normal connection termination. May need to wait if there are - * still packets in the TX queue that are delayed by the CCID. - */ - dccp_flush_write_queue(sk, &timeout); - dccp_terminate_connection(sk); - } - - /* - * Flush write queue. This may be necessary in several cases: - * - we have been closed by the peer but still have application data; - * - abortive termination (unread data or zero linger time), - * - normal termination but queue could not be flushed within time limit - */ - __skb_queue_purge(&sk->sk_write_queue); - - sk_stream_wait_close(sk, timeout); - -adjudge_to_death: - state = sk->sk_state; - sock_hold(sk); - sock_orphan(sk); - - /* - * It is the last release_sock in its life. It will remove backlog. - */ - release_sock(sk); - /* - * Now socket is owned by kernel and we acquire BH lock - * to finish close. No need to check for user refs. - */ - local_bh_disable(); - bh_lock_sock(sk); - WARN_ON(sock_owned_by_user(sk)); - - this_cpu_inc(dccp_orphan_count); - - /* Have we already been destroyed by a softirq or backlog? */ - if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) - goto out; - - if (sk->sk_state == DCCP_CLOSED) - inet_csk_destroy_sock(sk); - - /* Otherwise, socket is reprieved until protocol close. */ - -out: - bh_unlock_sock(sk); - local_bh_enable(); - sock_put(sk); -} - -EXPORT_SYMBOL_GPL(dccp_close); - -void dccp_shutdown(struct sock *sk, int how) -{ - dccp_pr_debug("called shutdown(%x)\n", how); -} - -EXPORT_SYMBOL_GPL(dccp_shutdown); - -static inline int __init dccp_mib_init(void) -{ - dccp_statistics = alloc_percpu(struct dccp_mib); - if (!dccp_statistics) - return -ENOMEM; - return 0; -} - -static inline void dccp_mib_exit(void) -{ - free_percpu(dccp_statistics); -} - -static int thash_entries; -module_param(thash_entries, int, 0444); -MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); - -#ifdef CONFIG_IP_DCCP_DEBUG -bool dccp_debug; -module_param(dccp_debug, bool, 0644); -MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); - -EXPORT_SYMBOL_GPL(dccp_debug); -#endif - -static int __init dccp_init(void) -{ - unsigned long goal; - unsigned long nr_pages = totalram_pages(); - int ehash_order, bhash_order, i; - int rc; - - BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > - sizeof_field(struct sk_buff, cb)); - rc = inet_hashinfo2_init_mod(&dccp_hashinfo); - if (rc) - goto out_fail; - rc = -ENOBUFS; - dccp_hashinfo.bind_bucket_cachep = - kmem_cache_create("dccp_bind_bucket", - sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); - if (!dccp_hashinfo.bind_bucket_cachep) - goto out_free_hashinfo2; - dccp_hashinfo.bind2_bucket_cachep = - kmem_cache_create("dccp_bind2_bucket", - sizeof(struct inet_bind2_bucket), 0, - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); - if (!dccp_hashinfo.bind2_bucket_cachep) - goto out_free_bind_bucket_cachep; - - /* - * Size and allocate the main established and bind bucket - * hash tables. - * - * The methodology is similar to that of the buffer cache. - */ - if (nr_pages >= (128 * 1024)) - goal = nr_pages >> (21 - PAGE_SHIFT); - else - goal = nr_pages >> (23 - PAGE_SHIFT); - - if (thash_entries) - goal = (thash_entries * - sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; - for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) - ; - do { - unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / - sizeof(struct inet_ehash_bucket); - - while (hash_size & (hash_size - 1)) - hash_size--; - dccp_hashinfo.ehash_mask = hash_size - 1; - dccp_hashinfo.ehash = (struct inet_ehash_bucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); - } while (!dccp_hashinfo.ehash && --ehash_order > 0); - - if (!dccp_hashinfo.ehash) { - DCCP_CRIT("Failed to allocate DCCP established hash table"); - goto out_free_bind2_bucket_cachep; - } - - for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) - INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); - - if (inet_ehash_locks_alloc(&dccp_hashinfo)) - goto out_free_dccp_ehash; - - bhash_order = ehash_order; - - do { - dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / - sizeof(struct inet_bind_hashbucket); - if ((dccp_hashinfo.bhash_size > (64 * 1024)) && - bhash_order > 0) - continue; - dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); - } while (!dccp_hashinfo.bhash && --bhash_order >= 0); - - if (!dccp_hashinfo.bhash) { - DCCP_CRIT("Failed to allocate DCCP bind hash table"); - goto out_free_dccp_locks; - } - - dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order); - - if (!dccp_hashinfo.bhash2) { - DCCP_CRIT("Failed to allocate DCCP bind2 hash table"); - goto out_free_dccp_bhash; - } - - for (i = 0; i < dccp_hashinfo.bhash_size; i++) { - spin_lock_init(&dccp_hashinfo.bhash[i].lock); - INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); - spin_lock_init(&dccp_hashinfo.bhash2[i].lock); - INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); - } - - dccp_hashinfo.pernet = false; - - rc = dccp_mib_init(); - if (rc) - goto out_free_dccp_bhash2; - - rc = dccp_ackvec_init(); - if (rc) - goto out_free_dccp_mib; - - rc = dccp_sysctl_init(); - if (rc) - goto out_ackvec_exit; - - rc = ccid_initialize_builtins(); - if (rc) - goto out_sysctl_exit; - - dccp_timestamping_init(); - - return 0; - -out_sysctl_exit: - dccp_sysctl_exit(); -out_ackvec_exit: - dccp_ackvec_exit(); -out_free_dccp_mib: - dccp_mib_exit(); -out_free_dccp_bhash2: - free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); -out_free_dccp_bhash: - free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); -out_free_dccp_locks: - inet_ehash_locks_free(&dccp_hashinfo); -out_free_dccp_ehash: - free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); -out_free_bind2_bucket_cachep: - kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep); -out_free_bind_bucket_cachep: - kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); -out_free_hashinfo2: - inet_hashinfo2_free_mod(&dccp_hashinfo); -out_fail: - dccp_hashinfo.bhash = NULL; - dccp_hashinfo.bhash2 = NULL; - dccp_hashinfo.ehash = NULL; - dccp_hashinfo.bind_bucket_cachep = NULL; - dccp_hashinfo.bind2_bucket_cachep = NULL; - return rc; -} - -static void __exit dccp_fini(void) -{ - int bhash_order = get_order(dccp_hashinfo.bhash_size * - sizeof(struct inet_bind_hashbucket)); - - ccid_cleanup_builtins(); - dccp_mib_exit(); - free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); - free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); - free_pages((unsigned long)dccp_hashinfo.ehash, - get_order((dccp_hashinfo.ehash_mask + 1) * - sizeof(struct inet_ehash_bucket))); - inet_ehash_locks_free(&dccp_hashinfo); - kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); - dccp_ackvec_exit(); - dccp_sysctl_exit(); - inet_hashinfo2_free_mod(&dccp_hashinfo); -} - -module_init(dccp_init); -module_exit(dccp_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); -MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c deleted file mode 100644 index 5ba204ec0aca..000000000000 --- a/net/dccp/qpolicy.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/qpolicy.c - * - * Policy-based packet dequeueing interface for DCCP. - * - * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> - */ -#include "dccp.h" - -/* - * Simple Dequeueing Policy: - * If tx_qlen is different from 0, enqueue up to tx_qlen elements. - */ -static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) -{ - skb_queue_tail(&sk->sk_write_queue, skb); -} - -static bool qpolicy_simple_full(struct sock *sk) -{ - return dccp_sk(sk)->dccps_tx_qlen && - sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; -} - -static struct sk_buff *qpolicy_simple_top(struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - -/* - * Priority-based Dequeueing Policy: - * If tx_qlen is different from 0 and the queue has reached its upper bound - * of tx_qlen elements, replace older packets lowest-priority-first. - */ -static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) -{ - struct sk_buff *skb, *best = NULL; - - skb_queue_walk(&sk->sk_write_queue, skb) - if (best == NULL || skb->priority > best->priority) - best = skb; - return best; -} - -static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) -{ - struct sk_buff *skb, *worst = NULL; - - skb_queue_walk(&sk->sk_write_queue, skb) - if (worst == NULL || skb->priority < worst->priority) - worst = skb; - return worst; -} - -static bool qpolicy_prio_full(struct sock *sk) -{ - if (qpolicy_simple_full(sk)) - dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); - return false; -} - -/** - * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface - * @push: add a new @skb to the write queue - * @full: indicates that no more packets will be admitted - * @top: peeks at whatever the queueing policy defines as its `top' - * @params: parameter passed to policy operation - */ -struct dccp_qpolicy_operations { - void (*push) (struct sock *sk, struct sk_buff *skb); - bool (*full) (struct sock *sk); - struct sk_buff* (*top) (struct sock *sk); - __be32 params; -}; - -static struct dccp_qpolicy_operations qpol_table[DCCPQ_POLICY_MAX] = { - [DCCPQ_POLICY_SIMPLE] = { - .push = qpolicy_simple_push, - .full = qpolicy_simple_full, - .top = qpolicy_simple_top, - .params = 0, - }, - [DCCPQ_POLICY_PRIO] = { - .push = qpolicy_simple_push, - .full = qpolicy_prio_full, - .top = qpolicy_prio_best_skb, - .params = DCCP_SCM_PRIORITY, - }, -}; - -/* - * Externally visible interface - */ -void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) -{ - qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); -} - -bool dccp_qpolicy_full(struct sock *sk) -{ - return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); -} - -void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) -{ - if (skb != NULL) { - skb_unlink(skb, &sk->sk_write_queue); - kfree_skb(skb); - } -} - -struct sk_buff *dccp_qpolicy_top(struct sock *sk) -{ - return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); -} - -struct sk_buff *dccp_qpolicy_pop(struct sock *sk) -{ - struct sk_buff *skb = dccp_qpolicy_top(sk); - - if (skb != NULL) { - /* Clear any skb fields that we used internally */ - skb->priority = 0; - skb_unlink(skb, &sk->sk_write_queue); - } - return skb; -} - -bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) -{ - /* check if exactly one bit is set */ - if (!param || (param & (param - 1))) - return false; - return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; -} diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c deleted file mode 100644 index b15845fd6300..000000000000 --- a/net/dccp/sysctl.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/dccp/sysctl.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@mandriva.com> - */ - -#include <linux/mm.h> -#include <linux/sysctl.h> -#include "dccp.h" -#include "feat.h" - -/* Boundary values */ -static int u8_max = 0xFF; -static unsigned long seqw_min = DCCPF_SEQ_WMIN, - seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ - -static struct ctl_table dccp_default_table[] = { - { - .procname = "seq_window", - .data = &sysctl_dccp_sequence_window, - .maxlen = sizeof(sysctl_dccp_sequence_window), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ - .extra2 = &seqw_max, - }, - { - .procname = "rx_ccid", - .data = &sysctl_dccp_rx_ccid, - .maxlen = sizeof(sysctl_dccp_rx_ccid), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &u8_max, /* RFC 4340, 10. */ - }, - { - .procname = "tx_ccid", - .data = &sysctl_dccp_tx_ccid, - .maxlen = sizeof(sysctl_dccp_tx_ccid), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &u8_max, /* RFC 4340, 10. */ - }, - { - .procname = "request_retries", - .data = &sysctl_dccp_request_retries, - .maxlen = sizeof(sysctl_dccp_request_retries), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = &u8_max, - }, - { - .procname = "retries1", - .data = &sysctl_dccp_retries1, - .maxlen = sizeof(sysctl_dccp_retries1), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &u8_max, - }, - { - .procname = "retries2", - .data = &sysctl_dccp_retries2, - .maxlen = sizeof(sysctl_dccp_retries2), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &u8_max, - }, - { - .procname = "tx_qlen", - .data = &sysctl_dccp_tx_qlen, - .maxlen = sizeof(sysctl_dccp_tx_qlen), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "sync_ratelimit", - .data = &sysctl_dccp_sync_ratelimit, - .maxlen = sizeof(sysctl_dccp_sync_ratelimit), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, -}; - -static struct ctl_table_header *dccp_table_header; - -int __init dccp_sysctl_init(void) -{ - dccp_table_header = register_net_sysctl(&init_net, "net/dccp/default", - dccp_default_table); - - return dccp_table_header != NULL ? 0 : -ENOMEM; -} - -void dccp_sysctl_exit(void) -{ - if (dccp_table_header != NULL) { - unregister_net_sysctl_table(dccp_table_header); - dccp_table_header = NULL; - } -} diff --git a/net/dccp/timer.c b/net/dccp/timer.c deleted file mode 100644 index 232ac4ae0a73..000000000000 --- a/net/dccp/timer.c +++ /dev/null @@ -1,272 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dccp/timer.c - * - * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - */ - -#include <linux/dccp.h> -#include <linux/skbuff.h> -#include <linux/export.h> - -#include "dccp.h" - -/* sysctl variables governing numbers of retransmission attempts */ -int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES; -int sysctl_dccp_retries1 __read_mostly = TCP_RETR1; -int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; - -static void dccp_write_err(struct sock *sk) -{ - sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; - sk_error_report(sk); - - dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); - dccp_done(sk); - __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT); -} - -/* A write timeout has occurred. Process the after effects. */ -static int dccp_write_timeout(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - int retry_until; - - if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { - if (icsk->icsk_retransmits != 0) - dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? - : sysctl_dccp_request_retries; - } else { - if (icsk->icsk_retransmits >= sysctl_dccp_retries1) { - /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu - black hole detection. :-( - - It is place to make it. It is not made. I do not want - to make it. It is disguisting. It does not work in any - case. Let me to cite the same draft, which requires for - us to implement this: - - "The one security concern raised by this memo is that ICMP black holes - are often caused by over-zealous security administrators who block - all ICMP messages. It is vitally important that those who design and - deploy security systems understand the impact of strict filtering on - upper-layer protocols. The safest web site in the world is worthless - if most TCP implementations cannot transfer data from it. It would - be far nicer to have all of the black holes fixed rather than fixing - all of the TCP implementations." - - Golden words :-). - */ - - dst_negative_advice(sk); - } - - retry_until = sysctl_dccp_retries2; - /* - * FIXME: see tcp_write_timout and tcp_out_of_resources - */ - } - - if (icsk->icsk_retransmits >= retry_until) { - /* Has it gone just too far? */ - dccp_write_err(sk); - return 1; - } - return 0; -} - -/* - * The DCCP retransmit timer. - */ -static void dccp_retransmit_timer(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - /* - * More than 4MSL (8 minutes) has passed, a RESET(aborted) was - * sent, no need to retransmit, this sock is dead. - */ - if (dccp_write_timeout(sk)) - return; - - /* - * We want to know the number of packets retransmitted, not the - * total number of retransmissions of clones of original packets. - */ - if (icsk->icsk_retransmits == 0) - __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS); - - if (dccp_retransmit_skb(sk) != 0) { - /* - * Retransmission failed because of local congestion, - * do not backoff. - */ - if (--icsk->icsk_retransmits == 0) - icsk->icsk_retransmits = 1; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - min(icsk->icsk_rto, - TCP_RESOURCE_PROBE_INTERVAL), - DCCP_RTO_MAX); - return; - } - - icsk->icsk_backoff++; - - icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, - DCCP_RTO_MAX); - if (icsk->icsk_retransmits > sysctl_dccp_retries1) - __sk_dst_reset(sk); -} - -static void dccp_write_timer(struct timer_list *t) -{ - struct inet_connection_sock *icsk = - from_timer(icsk, t, icsk_retransmit_timer); - struct sock *sk = &icsk->icsk_inet.sk; - int event = 0; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, - jiffies + (HZ / 20)); - goto out; - } - - if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) - goto out; - - if (time_after(icsk_timeout(icsk), jiffies)) { - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, - icsk_timeout(icsk)); - goto out; - } - - event = icsk->icsk_pending; - icsk->icsk_pending = 0; - - switch (event) { - case ICSK_TIME_RETRANS: - dccp_retransmit_timer(sk); - break; - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -static void dccp_keepalive_timer(struct timer_list *t) -{ - struct sock *sk = from_timer(sk, t, sk_timer); - - pr_err("dccp should not use a keepalive timer !\n"); - sock_put(sk); -} - -/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ -static void dccp_delack_timer(struct timer_list *t) -{ - struct inet_connection_sock *icsk = - from_timer(icsk, t, icsk_delack_timer); - struct sock *sk = &icsk->icsk_inet.sk; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, - jiffies + TCP_DELACK_MIN); - goto out; - } - - if (sk->sk_state == DCCP_CLOSED || - !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) - goto out; - if (time_after(icsk_delack_timeout(icsk), jiffies)) { - sk_reset_timer(sk, &icsk->icsk_delack_timer, - icsk_delack_timeout(icsk)); - goto out; - } - - icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - - if (inet_csk_ack_scheduled(sk)) { - if (!inet_csk_in_pingpong_mode(sk)) { - /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, - icsk->icsk_rto); - } else { - /* Delayed ACK missed: leave pingpong mode and - * deflate ATO. - */ - inet_csk_exit_pingpong_mode(sk); - icsk->icsk_ack.ato = TCP_ATO_MIN; - } - dccp_send_ack(sk); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); - } -out: - bh_unlock_sock(sk); - sock_put(sk); -} - -/** - * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface - * @t: pointer to the tasklet associated with this handler - * - * See the comments above %ccid_dequeueing_decision for supported modes. - */ -static void dccp_write_xmitlet(struct tasklet_struct *t) -{ - struct dccp_sock *dp = from_tasklet(dp, t, dccps_xmitlet); - struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) - sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); - else - dccp_write_xmit(sk); - bh_unlock_sock(sk); - sock_put(sk); -} - -static void dccp_write_xmit_timer(struct timer_list *t) -{ - struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer); - - dccp_write_xmitlet(&dp->dccps_xmitlet); -} - -void dccp_init_xmit_timers(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - - tasklet_setup(&dp->dccps_xmitlet, dccp_write_xmitlet); - timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0); - inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, - &dccp_keepalive_timer); -} - -static ktime_t dccp_timestamp_seed; -/** - * dccp_timestamp - 10s of microseconds time source - * Returns the number of 10s of microseconds since loading DCCP. This is native - * DCCP time difference format (RFC 4340, sec. 13). - * Please note: This will wrap around about circa every 11.9 hours. - */ -u32 dccp_timestamp(void) -{ - u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); - - do_div(delta, 10); - return delta; -} -EXPORT_SYMBOL_GPL(dccp_timestamp); - -void __init dccp_timestamping_init(void) -{ - dccp_timestamp_seed = ktime_get_real(); -} diff --git a/net/dccp/trace.h b/net/dccp/trace.h deleted file mode 100644 index 5a43b3508c7f..000000000000 --- a/net/dccp/trace.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM dccp - -#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_DCCP_H - -#include <net/sock.h> -#include "dccp.h" -#include "ccids/ccid3.h" -#include <linux/tracepoint.h> -#include <trace/events/net_probe_common.h> - -TRACE_EVENT(dccp_probe, - - TP_PROTO(struct sock *sk, size_t size), - - TP_ARGS(sk, size), - - TP_STRUCT__entry( - /* sockaddr_in6 is always bigger than sockaddr_in */ - __array(__u8, saddr, sizeof(struct sockaddr_in6)) - __array(__u8, daddr, sizeof(struct sockaddr_in6)) - __field(__u16, sport) - __field(__u16, dport) - __field(__u16, size) - __field(__u16, tx_s) - __field(__u32, tx_rtt) - __field(__u32, tx_p) - __field(__u32, tx_x_calc) - __field(__u64, tx_x_recv) - __field(__u64, tx_x) - __field(__u32, tx_t_ipi) - ), - - TP_fast_assign( - const struct inet_sock *inet = inet_sk(sk); - struct ccid3_hc_tx_sock *hc = NULL; - - if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) - hc = ccid3_hc_tx_sk(sk); - - memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); - memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); - - TP_STORE_ADDR_PORTS(__entry, inet, sk); - - /* For filtering use */ - __entry->sport = ntohs(inet->inet_sport); - __entry->dport = ntohs(inet->inet_dport); - - __entry->size = size; - if (hc) { - __entry->tx_s = hc->tx_s; - __entry->tx_rtt = hc->tx_rtt; - __entry->tx_p = hc->tx_p; - __entry->tx_x_calc = hc->tx_x_calc; - __entry->tx_x_recv = hc->tx_x_recv >> 6; - __entry->tx_x = hc->tx_x >> 6; - __entry->tx_t_ipi = hc->tx_t_ipi; - } else { - __entry->tx_s = 0; - memset_startat(__entry, 0, tx_rtt); - } - ), - - TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d " - "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d", - __entry->saddr, __entry->daddr, __entry->size, - __entry->tx_s, __entry->tx_rtt, __entry->tx_p, - __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x, - __entry->tx_t_ipi) -); - -#endif /* _TRACE_TCP_H */ - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> diff --git a/net/devlink/dev.c b/net/devlink/dev.c index d6e3db300acb..02602704bdea 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -775,7 +775,7 @@ static int devlink_info_version_put(struct devlink_info_req *req, int attr, req->version_cb(version_name, version_type, req->version_cb_priv); - if (!req->msg) + if (!req->msg || !*version_value) return 0; nest = nla_nest_start_noflag(req->msg, attr); diff --git a/net/devlink/health.c b/net/devlink/health.c index 57db6799722a..b3ce8ecbb7fb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -735,7 +735,7 @@ static void devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) return; } - item->nla_type = NLA_NUL_STRING; + item->nla_type = DEVLINK_VAR_ATTR_TYPE_NUL_STRING; item->len = strlen(name) + 1; item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; memcpy(&item->value, name, item->len); @@ -822,32 +822,37 @@ static void devlink_fmsg_put_value(struct devlink_fmsg *fmsg, static void devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { devlink_fmsg_err_if_binary(fmsg); - devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), + DEVLINK_VAR_ATTR_TYPE_FLAG); } static void devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { devlink_fmsg_err_if_binary(fmsg); - devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), + DEVLINK_VAR_ATTR_TYPE_U8); } void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { devlink_fmsg_err_if_binary(fmsg); - devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), + DEVLINK_VAR_ATTR_TYPE_U32); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); static void devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { devlink_fmsg_err_if_binary(fmsg); - devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), + DEVLINK_VAR_ATTR_TYPE_U64); } void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { devlink_fmsg_err_if_binary(fmsg); - devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING); + devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, + DEVLINK_VAR_ATTR_TYPE_NUL_STRING); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); @@ -857,7 +862,8 @@ void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, if (!fmsg->err && !fmsg->putting_binary) fmsg->err = -EINVAL; - devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); + devlink_fmsg_put_value(fmsg, value, value_len, + DEVLINK_VAR_ATTR_TYPE_BINARY); } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); @@ -928,43 +934,26 @@ void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); static int -devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb) -{ - switch (msg->nla_type) { - case NLA_FLAG: - case NLA_U8: - case NLA_U32: - case NLA_U64: - case NLA_NUL_STRING: - case NLA_BINARY: - return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, - msg->nla_type); - default: - return -EINVAL; - } -} - -static int devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) { int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; u8 tmp; switch (msg->nla_type) { - case NLA_FLAG: + case DEVLINK_VAR_ATTR_TYPE_FLAG: /* Always provide flag data, regardless of its value */ tmp = *(bool *)msg->value; return nla_put_u8(skb, attrtype, tmp); - case NLA_U8: + case DEVLINK_VAR_ATTR_TYPE_U8: return nla_put_u8(skb, attrtype, *(u8 *)msg->value); - case NLA_U32: + case DEVLINK_VAR_ATTR_TYPE_U32: return nla_put_u32(skb, attrtype, *(u32 *)msg->value); - case NLA_U64: + case DEVLINK_VAR_ATTR_TYPE_U64: return devlink_nl_put_u64(skb, attrtype, *(u64 *)msg->value); - case NLA_NUL_STRING: + case DEVLINK_VAR_ATTR_TYPE_NUL_STRING: return nla_put_string(skb, attrtype, (char *)&msg->value); - case NLA_BINARY: + case DEVLINK_VAR_ATTR_TYPE_BINARY: return nla_put(skb, attrtype, msg->len, (void *)&msg->value); default: return -EINVAL; @@ -998,7 +987,8 @@ devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, err = nla_put_flag(skb, item->attrtype); break; case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA: - err = devlink_fmsg_item_fill_type(item, skb); + err = nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, + item->nla_type); if (err) break; err = devlink_fmsg_item_fill_data(item, skb); diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index f9786d51f68f..e340d955cf3b 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -10,6 +10,33 @@ #include <uapi/linux/devlink.h> +/* Sparse enums validation callbacks */ +static int +devlink_attr_param_type_validate(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + switch (nla_get_u8(attr)) { + case DEVLINK_VAR_ATTR_TYPE_U8: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_U16: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_U32: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_U64: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_STRING: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_FLAG: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_NUL_STRING: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_BINARY: + return 0; + } + NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value"); + return -EINVAL; +} + /* Common nested types */ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, }, @@ -273,7 +300,7 @@ static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_VA [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, - [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8, }, + [DEVLINK_ATTR_PARAM_TYPE] = NLA_POLICY_VALIDATE_FN(NLA_U8, &devlink_attr_param_type_validate), [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), }; diff --git a/net/devlink/param.c b/net/devlink/param.c index dcf0d1ccebba..b29abf8d3ed4 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -167,25 +167,6 @@ static int devlink_param_set(struct devlink *devlink, } static int -devlink_param_type_to_nla_type(enum devlink_param_type param_type) -{ - switch (param_type) { - case DEVLINK_PARAM_TYPE_U8: - return NLA_U8; - case DEVLINK_PARAM_TYPE_U16: - return NLA_U16; - case DEVLINK_PARAM_TYPE_U32: - return NLA_U32; - case DEVLINK_PARAM_TYPE_STRING: - return NLA_STRING; - case DEVLINK_PARAM_TYPE_BOOL: - return NLA_FLAG; - default: - return -EINVAL; - } -} - -static int devlink_nl_param_value_fill_one(struct sk_buff *msg, enum devlink_param_type type, enum devlink_param_cmode cmode, @@ -247,7 +228,6 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; struct nlattr *param_attr; - int nla_type; void *hdr; int err; int i; @@ -293,11 +273,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, goto param_nest_cancel; if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC)) goto param_nest_cancel; - - nla_type = devlink_param_type_to_nla_type(param->type); - if (nla_type < 0) - goto param_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type)) + if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, param->type)) goto param_nest_cancel; param_values_list = nla_nest_start_noflag(msg, @@ -419,25 +395,7 @@ devlink_param_type_get_from_info(struct genl_info *info, if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE)) return -EINVAL; - switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) { - case NLA_U8: - *param_type = DEVLINK_PARAM_TYPE_U8; - break; - case NLA_U16: - *param_type = DEVLINK_PARAM_TYPE_U16; - break; - case NLA_U32: - *param_type = DEVLINK_PARAM_TYPE_U32; - break; - case NLA_STRING: - *param_type = DEVLINK_PARAM_TYPE_STRING; - break; - case NLA_FLAG: - *param_type = DEVLINK_PARAM_TYPE_BOOL; - break; - default: - return -EINVAL; - } + *param_type = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE]); return 0; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 5c9d1798e830..082573ae6864 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -116,19 +116,15 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp) bool dsa_port_supports_hwtstamp(struct dsa_port *dp) { + struct kernel_hwtstamp_config config = {}; struct dsa_switch *ds = dp->ds; - struct ifreq ifr = {}; int err; if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set) return false; - /* "See through" shim implementations of the "get" method. - * Since we can't cook up a complete ioctl request structure, this will - * fail in copy_to_user() with -EFAULT, which hopefully is enough to - * detect a valid implementation. - */ - err = ds->ops->port_hwtstamp_get(ds, dp->index, &ifr); + /* "See through" shim implementations of the "get" method. */ + err = ds->ops->port_hwtstamp_get(ds, dp->index, &config); return err != -EOPNOTSUPP; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 804dc7dac4f2..e9334520c54a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -578,20 +578,6 @@ dsa_user_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, static int dsa_user_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_user_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int port = p->dp->index; - - /* Pass through to switch driver if it supports timestamping */ - switch (cmd) { - case SIOCGHWTSTAMP: - if (ds->ops->port_hwtstamp_get) - return ds->ops->port_hwtstamp_get(ds, port, ifr); - break; - case SIOCSHWTSTAMP: - if (ds->ops->port_hwtstamp_set) - return ds->ops->port_hwtstamp_set(ds, port, ifr); - break; - } return phylink_mii_ioctl(p->dp->pl, ifr, cmd); } @@ -2574,6 +2560,31 @@ static int dsa_user_fill_forward_path(struct net_device_path_ctx *ctx, return 0; } +static int dsa_user_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_hwtstamp_get) + return -EOPNOTSUPP; + + return ds->ops->port_hwtstamp_get(ds, dp->index, cfg); +} + +static int dsa_user_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_hwtstamp_set) + return -EOPNOTSUPP; + + return ds->ops->port_hwtstamp_set(ds, dp->index, cfg, extack); +} + static const struct net_device_ops dsa_user_netdev_ops = { .ndo_open = dsa_user_open, .ndo_stop = dsa_user_close, @@ -2595,6 +2606,8 @@ static const struct net_device_ops dsa_user_netdev_ops = { .ndo_vlan_rx_kill_vid = dsa_user_vlan_rx_kill_vid, .ndo_change_mtu = dsa_user_change_mtu, .ndo_fill_forward_path = dsa_user_fill_forward_path, + .ndo_hwtstamp_get = dsa_user_hwtstamp_get, + .ndo_hwtstamp_set = dsa_user_hwtstamp_set, }; static const struct device_type dsa_type = { diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 8262cc10f98d..39ec920f5de7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -978,6 +978,88 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr, return 0; } +static bool flow_type_hashable(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV4_FLOW: + case IPV6_FLOW: + case GTPU_V4_FLOW: + case GTPU_V6_FLOW: + case GTPC_V4_FLOW: + case GTPC_V6_FLOW: + case GTPC_TEID_V4_FLOW: + case GTPC_TEID_V6_FLOW: + case GTPU_EH_V4_FLOW: + case GTPU_EH_V6_FLOW: + case GTPU_UL_V4_FLOW: + case GTPU_UL_V6_FLOW: + case GTPU_DL_V4_FLOW: + case GTPU_DL_V6_FLOW: + return true; + } + + return false; +} + +/* When adding a new type, update the assert and, if it's hashable, add it to + * the flow_type_hashable switch case. + */ +static_assert(GTPU_DL_V6_FLOW + 1 == __FLOW_TYPE_COUNT); + +static int ethtool_check_xfrm_rxfh(u32 input_xfrm, u64 rxfh) +{ + /* Sanity check: if symmetric-xor/symmetric-or-xor is set, then: + * 1 - no other fields besides IP src/dst and/or L4 src/dst are set + * 2 - If src is set, dst must also be set + */ + if ((input_xfrm != RXH_XFRM_NO_CHANGE && + input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) && + ((rxfh & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) || + (!!(rxfh & RXH_IP_SRC) ^ !!(rxfh & RXH_IP_DST)) || + (!!(rxfh & RXH_L4_B_0_1) ^ !!(rxfh & RXH_L4_B_2_3)))) + return -EINVAL; + + return 0; +} + +static int ethtool_check_flow_types(struct net_device *dev, u32 input_xfrm) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc info = { + .cmd = ETHTOOL_GRXFH, + }; + int err; + u32 i; + + for (i = 0; i < __FLOW_TYPE_COUNT; i++) { + if (!flow_type_hashable(i)) + continue; + + info.flow_type = i; + err = ops->get_rxnfc(dev, &info, NULL); + if (err) + continue; + + err = ethtool_check_xfrm_rxfh(input_xfrm, info.data); + if (err) + return err; + } + + return 0; +} + static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { @@ -1012,16 +1094,9 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (rc) return rc; - /* Sanity check: if symmetric-xor/symmetric-or-xor is set, then: - * 1 - no other fields besides IP src/dst and/or L4 src/dst - * 2 - If src is set, dst must also be set - */ - if ((rxfh.input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) && - ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) || - (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || - (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) - return -EINVAL; + rc = ethtool_check_xfrm_rxfh(rxfh.input_xfrm, info.data); + if (rc) + return rc; } rc = ops->set_rxnfc(dev, &info); @@ -1413,6 +1488,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; + ret = ethtool_check_flow_types(dev, rxfh.input_xfrm); + if (ret) + return ret; + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); /* Check settings which may be global rather than per RSS-context */ diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c index 2816bb23c3ad..ad9b40034003 100644 --- a/net/ethtool/mm.c +++ b/net/ethtool/mm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2022-2023 NXP + * Copyright 2022-2025 NXP + * Copyright 2024 Furong Xu <0x1207@gmail.com> */ #include "common.h" #include "netlink.h" @@ -282,3 +283,279 @@ bool ethtool_dev_mm_supported(struct net_device *dev) return supported; } EXPORT_SYMBOL_GPL(ethtool_dev_mm_supported); + +static void ethtool_mmsv_configure_tx(struct ethtool_mmsv *mmsv, + bool tx_active) +{ + if (mmsv->ops->configure_tx) + mmsv->ops->configure_tx(mmsv, tx_active); +} + +static void ethtool_mmsv_configure_pmac(struct ethtool_mmsv *mmsv, + bool pmac_enabled) +{ + if (mmsv->ops->configure_pmac) + mmsv->ops->configure_pmac(mmsv, pmac_enabled); +} + +static void ethtool_mmsv_send_mpacket(struct ethtool_mmsv *mmsv, + enum ethtool_mpacket mpacket) +{ + if (mmsv->ops->send_mpacket) + mmsv->ops->send_mpacket(mmsv, mpacket); +} + +/** + * ethtool_mmsv_verify_timer - Timer for MAC Merge verification + * @t: timer_list struct containing private info + * + * Verify the MAC Merge capability in the local TX direction, by + * transmitting Verify mPackets up to 3 times. Wait until link + * partner responds with a Response mPacket, otherwise fail. + */ +static void ethtool_mmsv_verify_timer(struct timer_list *t) +{ + struct ethtool_mmsv *mmsv = from_timer(mmsv, t, verify_timer); + unsigned long flags; + bool rearm = false; + + spin_lock_irqsave(&mmsv->lock, flags); + + switch (mmsv->status) { + case ETHTOOL_MM_VERIFY_STATUS_INITIAL: + case ETHTOOL_MM_VERIFY_STATUS_VERIFYING: + if (mmsv->verify_retries != 0) { + ethtool_mmsv_send_mpacket(mmsv, ETHTOOL_MPACKET_VERIFY); + rearm = true; + } else { + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_FAILED; + } + + mmsv->verify_retries--; + break; + + case ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED: + ethtool_mmsv_configure_tx(mmsv, true); + break; + + default: + break; + } + + if (rearm) { + mod_timer(&mmsv->verify_timer, + jiffies + msecs_to_jiffies(mmsv->verify_time)); + } + + spin_unlock_irqrestore(&mmsv->lock, flags); +} + +static void ethtool_mmsv_verify_timer_arm(struct ethtool_mmsv *mmsv) +{ + if (mmsv->pmac_enabled && mmsv->tx_enabled && mmsv->verify_enabled && + mmsv->status != ETHTOOL_MM_VERIFY_STATUS_FAILED && + mmsv->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) { + timer_setup(&mmsv->verify_timer, ethtool_mmsv_verify_timer, 0); + mod_timer(&mmsv->verify_timer, jiffies); + } +} + +static void ethtool_mmsv_apply(struct ethtool_mmsv *mmsv) +{ + /* If verification is disabled, configure FPE right away. + * Otherwise let the timer code do it. + */ + if (!mmsv->verify_enabled) { + ethtool_mmsv_configure_pmac(mmsv, mmsv->pmac_enabled); + ethtool_mmsv_configure_tx(mmsv, mmsv->tx_enabled); + } else { + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_INITIAL; + mmsv->verify_retries = ETHTOOL_MM_MAX_VERIFY_RETRIES; + + if (netif_running(mmsv->dev)) + ethtool_mmsv_verify_timer_arm(mmsv); + } +} + +/** + * ethtool_mmsv_stop() - Stop MAC Merge Software Verification + * @mmsv: MAC Merge Software Verification state + * + * Drivers should call this method in a state where the hardware is + * about to lose state, like ndo_stop() or suspend(), and turning off + * MAC Merge features would be superfluous. Otherwise, prefer + * ethtool_mmsv_link_state_handle() with up=false. + */ +void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv) +{ + timer_shutdown_sync(&mmsv->verify_timer); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_stop); + +/** + * ethtool_mmsv_link_state_handle() - Inform MAC Merge Software Verification + * of link state changes + * @mmsv: MAC Merge Software Verification state + * @up: True if device carrier is up and able to pass verification packets + * + * Calling context is expected to be from a task, interrupts enabled. + */ +void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up) +{ + unsigned long flags; + + ethtool_mmsv_stop(mmsv); + + spin_lock_irqsave(&mmsv->lock, flags); + + if (up && mmsv->pmac_enabled) { + /* VERIFY process requires pMAC enabled when NIC comes up */ + ethtool_mmsv_configure_pmac(mmsv, true); + + /* New link => maybe new partner => new verification process */ + ethtool_mmsv_apply(mmsv); + } else { + /* Reset the reported verification state while the link is down */ + if (mmsv->verify_enabled) + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_INITIAL; + + /* No link or pMAC not enabled */ + ethtool_mmsv_configure_pmac(mmsv, false); + ethtool_mmsv_configure_tx(mmsv, false); + } + + spin_unlock_irqrestore(&mmsv->lock, flags); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_link_state_handle); + +/** + * ethtool_mmsv_event_handle() - Inform MAC Merge Software Verification + * of interrupt-based events + * @mmsv: MAC Merge Software Verification state + * @event: Event which took place (packet transmission or reception) + * + * Calling context expects to have interrupts disabled. + */ +void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv, + enum ethtool_mmsv_event event) +{ + /* This is interrupt context, just spin_lock() */ + spin_lock(&mmsv->lock); + + if (!mmsv->pmac_enabled) + goto unlock; + + switch (event) { + case ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET: + /* Link partner has sent verify mPacket */ + ethtool_mmsv_send_mpacket(mmsv, ETHTOOL_MPACKET_RESPONSE); + break; + case ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET: + /* Local device has sent verify mPacket */ + if (mmsv->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_VERIFYING; + break; + case ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET: + /* Link partner has sent response mPacket */ + if (mmsv->status == ETHTOOL_MM_VERIFY_STATUS_VERIFYING) + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED; + break; + } + +unlock: + spin_unlock(&mmsv->lock); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_event_handle); + +static bool ethtool_mmsv_is_tx_active(struct ethtool_mmsv *mmsv) +{ + /* TX is active if administratively enabled, and verification either + * succeeded, or was administratively disabled. + */ + return mmsv->tx_enabled && + (mmsv->status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED || + mmsv->status == ETHTOOL_MM_VERIFY_STATUS_DISABLED); +} + +/** + * ethtool_mmsv_get_mm() - get_mm() hook for MAC Merge Software Verification + * @mmsv: MAC Merge Software Verification state + * @state: see struct ethtool_mm_state + * + * Drivers are expected to call this from their ethtool_ops :: get_mm() + * method. + */ +void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv, + struct ethtool_mm_state *state) +{ + unsigned long flags; + + spin_lock_irqsave(&mmsv->lock, flags); + + state->max_verify_time = ETHTOOL_MM_MAX_VERIFY_TIME_MS; + state->verify_enabled = mmsv->verify_enabled; + state->pmac_enabled = mmsv->pmac_enabled; + state->verify_time = mmsv->verify_time; + state->tx_enabled = mmsv->tx_enabled; + state->verify_status = mmsv->status; + state->tx_active = ethtool_mmsv_is_tx_active(mmsv); + + spin_unlock_irqrestore(&mmsv->lock, flags); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_get_mm); + +/** + * ethtool_mmsv_set_mm() - set_mm() hook for MAC Merge Software Verification + * @mmsv: MAC Merge Software Verification state + * @cfg: see struct ethtool_mm_cfg + * + * Drivers are expected to call this from their ethtool_ops :: set_mm() + * method. + */ +void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg) +{ + unsigned long flags; + + /* Wait for the verification that's currently in progress to finish */ + ethtool_mmsv_stop(mmsv); + + spin_lock_irqsave(&mmsv->lock, flags); + + mmsv->verify_enabled = cfg->verify_enabled; + mmsv->pmac_enabled = cfg->pmac_enabled; + mmsv->verify_time = cfg->verify_time; + mmsv->tx_enabled = cfg->tx_enabled; + + if (!cfg->verify_enabled) + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + + ethtool_mmsv_apply(mmsv); + + spin_unlock_irqrestore(&mmsv->lock, flags); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_set_mm); + +/** + * ethtool_mmsv_init() - Initialize MAC Merge Software Verification state + * @mmsv: MAC Merge Software Verification state + * @dev: Pointer to network interface + * @ops: Methods for implementing the generic functionality + * + * The MAC Merge Software Verification is a timer- and event-based state + * machine intended for network interfaces which lack a hardware-based + * TX verification process (as per IEEE 802.3 clause 99.4.3). The timer + * is managed by the core code, whereas events are supplied by the + * driver explicitly calling one of the other API functions. + */ +void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev, + const struct ethtool_mmsv_ops *ops) +{ + mmsv->ops = ops; + mmsv->dev = dev; + mmsv->verify_retries = ETHTOOL_MM_MAX_VERIFY_RETRIES; + mmsv->verify_time = ETHTOOL_MM_MAX_VERIFY_TIME_MS; + mmsv->status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + timer_setup(&mmsv->verify_timer, ethtool_mmsv_verify_timer, 0); + spin_lock_init(&mmsv->lock); +} +EXPORT_SYMBOL_GPL(ethtool_mmsv_init); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 977beeaaa2f9..9de828df46cd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -357,6 +357,18 @@ struct ethnl_dump_ctx { unsigned long pos_ifindex; }; +/** + * struct ethnl_perphy_dump_ctx - context for dumpit() PHY-aware callbacks + * @ethnl_ctx: generic ethnl context + * @ifindex: For Filtered DUMP requests, the ifindex of the targeted netdev + * @pos_phyindex: iterator position for multi-msg DUMP + */ +struct ethnl_perphy_dump_ctx { + struct ethnl_dump_ctx ethnl_ctx; + unsigned int ifindex; + unsigned long pos_phyindex; +}; + static const struct ethnl_request_ops * ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_STRSET_GET] = ðnl_strset_request_ops, @@ -400,6 +412,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, [ETHTOOL_MSG_TSCONFIG_GET] = ðnl_tsconfig_request_ops, [ETHTOOL_MSG_TSCONFIG_SET] = ðnl_tsconfig_request_ops, + [ETHTOOL_MSG_PHY_GET] = ðnl_phy_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -407,6 +420,12 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) return (struct ethnl_dump_ctx *)cb->ctx; } +static struct ethnl_perphy_dump_ctx * +ethnl_perphy_dump_context(struct netlink_callback *cb) +{ + return (struct ethnl_perphy_dump_ctx *)cb->ctx; +} + /** * ethnl_default_parse() - Parse request message * @req_info: pointer to structure to put data into @@ -584,18 +603,19 @@ static int ethnl_default_dumpit(struct sk_buff *skb, { struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); struct net *net = sock_net(skb->sk); + netdevice_tracker dev_tracker; struct net_device *dev; int ret = 0; rcu_read_lock(); for_each_netdev_dump(net, dev, ctx->pos_ifindex) { - dev_hold(dev); + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); rcu_read_unlock(); ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb)); rcu_read_lock(); - dev_put(dev); + netdev_put(dev, &dev_tracker); if (ret < 0 && ret != -EOPNOTSUPP) { if (likely(skb->len)) @@ -662,6 +682,173 @@ free_req_info: return ret; } +/* per-PHY ->start() handler for GET requests */ +static int ethnl_perphy_start(struct netlink_callback *cb) +{ + struct ethnl_perphy_dump_ctx *phy_ctx = ethnl_perphy_dump_context(cb); + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_dump_ctx *ctx = &phy_ctx->ethnl_ctx; + struct ethnl_reply_data *reply_data; + const struct ethnl_request_ops *ops; + struct ethnl_req_info *req_info; + struct genlmsghdr *ghdr; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + ghdr = nlmsg_data(cb->nlh); + ops = ethnl_default_requests[ghdr->cmd]; + if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd)) + return -EOPNOTSUPP; + req_info = kzalloc(ops->req_info_size, GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL); + if (!reply_data) { + ret = -ENOMEM; + goto free_req_info; + } + + /* Unlike per-dev dump, don't ignore dev. The dump handler + * will notice it and dump PHYs from given dev. We only keep track of + * the dev's ifindex, .dumpit() will grab and release the netdev itself. + */ + ret = ethnl_default_parse(req_info, &info->info, ops, false); + if (req_info->dev) { + phy_ctx->ifindex = req_info->dev->ifindex; + netdev_put(req_info->dev, &req_info->dev_tracker); + req_info->dev = NULL; + } + if (ret < 0) + goto free_reply_data; + + ctx->ops = ops; + ctx->req_info = req_info; + ctx->reply_data = reply_data; + ctx->pos_ifindex = 0; + + return 0; + +free_reply_data: + kfree(reply_data); +free_req_info: + kfree(req_info); + + return ret; +} + +static int ethnl_perphy_dump_one_dev(struct sk_buff *skb, + struct ethnl_perphy_dump_ctx *ctx, + const struct genl_info *info) +{ + struct ethnl_dump_ctx *ethnl_ctx = &ctx->ethnl_ctx; + struct net_device *dev = ethnl_ctx->req_info->dev; + struct phy_device_node *pdn; + int ret; + + if (!dev->link_topo) + return 0; + + xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, + ctx->pos_phyindex) { + ethnl_ctx->req_info->phy_index = ctx->pos_phyindex; + + /* We can re-use the original dump_one as ->prepare_data in + * commands use ethnl_req_get_phydev(), which gets the PHY from + * the req_info->phy_index + */ + ret = ethnl_default_dump_one(skb, dev, ethnl_ctx, info); + if (ret) + return ret; + } + + ctx->pos_phyindex = 0; + + return 0; +} + +static int ethnl_perphy_dump_all_dev(struct sk_buff *skb, + struct ethnl_perphy_dump_ctx *ctx, + const struct genl_info *info) +{ + struct ethnl_dump_ctx *ethnl_ctx = &ctx->ethnl_ctx; + struct net *net = sock_net(skb->sk); + netdevice_tracker dev_tracker; + struct net_device *dev; + int ret = 0; + + rcu_read_lock(); + for_each_netdev_dump(net, dev, ethnl_ctx->pos_ifindex) { + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); + rcu_read_unlock(); + + /* per-PHY commands use ethnl_req_get_phydev(), which needs the + * net_device in the req_info + */ + ethnl_ctx->req_info->dev = dev; + ret = ethnl_perphy_dump_one_dev(skb, ctx, info); + + rcu_read_lock(); + netdev_put(dev, &dev_tracker); + ethnl_ctx->req_info->dev = NULL; + + if (ret < 0 && ret != -EOPNOTSUPP) { + if (likely(skb->len)) + ret = skb->len; + break; + } + ret = 0; + } + rcu_read_unlock(); + + return ret; +} + +/* per-PHY ->dumpit() handler for GET requests. */ +static int ethnl_perphy_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct ethnl_perphy_dump_ctx *ctx = ethnl_perphy_dump_context(cb); + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_dump_ctx *ethnl_ctx = &ctx->ethnl_ctx; + int ret = 0; + + if (ctx->ifindex) { + netdevice_tracker dev_tracker; + struct net_device *dev; + + dev = netdev_get_by_index(genl_info_net(&info->info), + ctx->ifindex, &dev_tracker, + GFP_KERNEL); + if (!dev) + return -ENODEV; + + ethnl_ctx->req_info->dev = dev; + ret = ethnl_perphy_dump_one_dev(skb, ctx, genl_info_dump(cb)); + + if (ret < 0 && ret != -EOPNOTSUPP && likely(skb->len)) + ret = skb->len; + + netdev_put(dev, &dev_tracker); + } else { + ret = ethnl_perphy_dump_all_dev(skb, ctx, genl_info_dump(cb)); + } + + return ret; +} + +/* per-PHY ->done() handler for GET requests */ +static int ethnl_perphy_done(struct netlink_callback *cb) +{ + struct ethnl_perphy_dump_ctx *ctx = ethnl_perphy_dump_context(cb); + struct ethnl_dump_ctx *ethnl_ctx = &ctx->ethnl_ctx; + + kfree(ethnl_ctx->reply_data); + kfree(ethnl_ctx->req_info); + + return 0; +} + /* default ->done() handler for GET requests */ static int ethnl_default_done(struct netlink_callback *cb) { @@ -1200,9 +1387,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_PSE_GET, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_perphy_start, + .dumpit = ethnl_perphy_dumpit, + .done = ethnl_perphy_done, .policy = ethnl_pse_get_policy, .maxattr = ARRAY_SIZE(ethnl_pse_get_policy) - 1, }, @@ -1224,9 +1411,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_PLCA_GET_CFG, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_perphy_start, + .dumpit = ethnl_perphy_dumpit, + .done = ethnl_perphy_done, .policy = ethnl_plca_get_cfg_policy, .maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1, }, @@ -1240,9 +1427,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_PLCA_GET_STATUS, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_perphy_start, + .dumpit = ethnl_perphy_dumpit, + .done = ethnl_perphy_done, .policy = ethnl_plca_get_status_policy, .maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1, }, @@ -1271,10 +1458,10 @@ static const struct genl_ops ethtool_genl_ops[] = { }, { .cmd = ETHTOOL_MSG_PHY_GET, - .doit = ethnl_phy_doit, - .start = ethnl_phy_start, - .dumpit = ethnl_phy_dumpit, - .done = ethnl_phy_done, + .doit = ethnl_default_doit, + .start = ethnl_perphy_start, + .dumpit = ethnl_perphy_dumpit, + .done = ethnl_perphy_done, .policy = ethnl_phy_get_policy, .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, }, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ec6ab5443a6f..91b953924af3 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -499,10 +499,6 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info); int ethnl_rss_dump_start(struct netlink_callback *cb); int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb); -int ethnl_phy_start(struct netlink_callback *cb); -int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); -int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); -int ethnl_phy_done(struct netlink_callback *cb); int ethnl_tsinfo_start(struct netlink_callback *cb); int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_tsinfo_done(struct netlink_callback *cb); diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 1f590e8d75ed..68372bef4b2f 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -12,304 +12,154 @@ #include <net/netdev_lock.h> struct phy_req_info { - struct ethnl_req_info base; - struct phy_device_node *pdn; + struct ethnl_req_info base; }; -#define PHY_REQINFO(__req_base) \ - container_of(__req_base, struct phy_req_info, base) +struct phy_reply_data { + struct ethnl_reply_data base; + u32 phyindex; + char *drvname; + char *name; + unsigned int upstream_type; + char *upstream_sfp_name; + unsigned int upstream_index; + char *downstream_sfp_name; +}; + +#define PHY_REPDATA(__reply_base) \ + container_of(__reply_base, struct phy_reply_data, base) const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = { [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; -/* Caller holds rtnl */ -static ssize_t -ethnl_phy_reply_size(const struct ethnl_req_info *req_base, - struct netlink_ext_ack *extack) +static int phy_reply_size(const struct ethnl_req_info *req_info, + const struct ethnl_reply_data *reply_data) { - struct phy_req_info *req_info = PHY_REQINFO(req_base); - struct phy_device_node *pdn = req_info->pdn; - struct phy_device *phydev = pdn->phy; + struct phy_reply_data *rep_data = PHY_REPDATA(reply_data); size_t size = 0; - ASSERT_RTNL(); - /* ETHTOOL_A_PHY_INDEX */ size += nla_total_size(sizeof(u32)); /* ETHTOOL_A_DRVNAME */ - if (phydev->drv) - size += nla_total_size(strlen(phydev->drv->name) + 1); + if (rep_data->drvname) + size += nla_total_size(strlen(rep_data->drvname) + 1); /* ETHTOOL_A_NAME */ - size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1); + size += nla_total_size(strlen(rep_data->name) + 1); /* ETHTOOL_A_PHY_UPSTREAM_TYPE */ size += nla_total_size(sizeof(u32)); - if (phy_on_sfp(phydev)) { - const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus); - - /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */ - if (upstream_sfp_name) - size += nla_total_size(strlen(upstream_sfp_name) + 1); + /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */ + if (rep_data->upstream_sfp_name) + size += nla_total_size(strlen(rep_data->upstream_sfp_name) + 1); - /* ETHTOOL_A_PHY_UPSTREAM_INDEX */ + /* ETHTOOL_A_PHY_UPSTREAM_INDEX */ + if (rep_data->upstream_index) size += nla_total_size(sizeof(u32)); - } /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */ - if (phydev->sfp_bus) { - const char *sfp_name = sfp_get_name(phydev->sfp_bus); - - if (sfp_name) - size += nla_total_size(strlen(sfp_name) + 1); - } + if (rep_data->downstream_sfp_name) + size += nla_total_size(strlen(rep_data->downstream_sfp_name) + 1); return size; } -static int -ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb) +static int phy_prepare_data(const struct ethnl_req_info *req_info, + struct ethnl_reply_data *reply_data, + const struct genl_info *info) { - struct phy_req_info *req_info = PHY_REQINFO(req_base); - struct phy_device_node *pdn = req_info->pdn; - struct phy_device *phydev = pdn->phy; - enum phy_upstream ptype; + struct phy_link_topology *topo = reply_data->dev->link_topo; + struct phy_reply_data *rep_data = PHY_REPDATA(reply_data); + struct nlattr **tb = info->attrs; + struct phy_device_node *pdn; + struct phy_device *phydev; - ptype = pdn->upstream_type; + /* RTNL is held by the caller */ + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER, + info->extack); + if (IS_ERR_OR_NULL(phydev)) + return -EOPNOTSUPP; - if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) || - nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) || - nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype)) - return -EMSGSIZE; + pdn = xa_load(&topo->phys, phydev->phyindex); + if (!pdn) + return -EOPNOTSUPP; - if (phydev->drv && - nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name)) - return -EMSGSIZE; + rep_data->phyindex = phydev->phyindex; + rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL); + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + rep_data->upstream_type = pdn->upstream_type; - if (ptype == PHY_UPSTREAM_PHY) { + if (pdn->upstream_type == PHY_UPSTREAM_PHY) { struct phy_device *upstream = pdn->upstream.phydev; - const char *sfp_upstream_name; - - /* Parent index */ - if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex)) - return -EMSGSIZE; - - if (pdn->parent_sfp_bus) { - sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus); - if (sfp_upstream_name && - nla_put_string(skb, ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, - sfp_upstream_name)) - return -EMSGSIZE; - } - } - - if (phydev->sfp_bus) { - const char *sfp_name = sfp_get_name(phydev->sfp_bus); - - if (sfp_name && - nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, - sfp_name)) - return -EMSGSIZE; + rep_data->upstream_index = upstream->phyindex; } - return 0; -} - -static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, - struct nlattr **tb, - struct netlink_ext_ack *extack) -{ - struct phy_link_topology *topo = req_base->dev->link_topo; - struct phy_req_info *req_info = PHY_REQINFO(req_base); - struct phy_device *phydev; + if (pdn->parent_sfp_bus) + rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus), + GFP_KERNEL); - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER, - extack); - if (!phydev) - return 0; - - if (IS_ERR(phydev)) - return PTR_ERR(phydev); - - if (!topo) - return 0; - - req_info->pdn = xa_load(&topo->phys, phydev->phyindex); + if (phydev->sfp_bus) + rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus), + GFP_KERNEL); return 0; } -int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) +static int phy_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_info, + const struct ethnl_reply_data *reply_data) { - struct phy_req_info req_info = {}; - struct nlattr **tb = info->attrs; - struct sk_buff *rskb; - void *reply_payload; - int reply_len; - int ret; - - ret = ethnl_parse_header_dev_get(&req_info.base, - tb[ETHTOOL_A_PHY_HEADER], - genl_info_net(info), info->extack, - true); - if (ret < 0) - return ret; - - rtnl_lock(); - netdev_lock_ops(req_info.base.dev); - - ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack); - if (ret < 0) - goto err_unlock; - - /* No PHY, return early */ - if (!req_info.pdn) - goto err_unlock; - - ret = ethnl_phy_reply_size(&req_info.base, info->extack); - if (ret < 0) - goto err_unlock; - reply_len = ret + ethnl_reply_header_size(); - - rskb = ethnl_reply_init(reply_len, req_info.base.dev, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_A_PHY_HEADER, - info, &reply_payload); - if (!rskb) { - ret = -ENOMEM; - goto err_unlock; - } - - ret = ethnl_phy_fill_reply(&req_info.base, rskb); - if (ret) - goto err_free_msg; + struct phy_reply_data *rep_data = PHY_REPDATA(reply_data); - netdev_unlock_ops(req_info.base.dev); - rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info.base); - genlmsg_end(rskb, reply_payload); - - return genlmsg_reply(rskb, info); - -err_free_msg: - nlmsg_free(rskb); -err_unlock: - netdev_unlock_ops(req_info.base.dev); - rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info.base); - return ret; -} - -struct ethnl_phy_dump_ctx { - struct phy_req_info *phy_req_info; - unsigned long ifindex; - unsigned long phy_index; -}; - -int ethnl_phy_start(struct netlink_callback *cb) -{ - const struct genl_info *info = genl_info_dump(cb); - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - int ret; - - BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); - - ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL); - if (!ctx->phy_req_info) - return -ENOMEM; - - ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base, - info->attrs[ETHTOOL_A_PHY_HEADER], - sock_net(cb->skb->sk), cb->extack, - false); - ctx->ifindex = 0; - ctx->phy_index = 0; - - if (ret) - kfree(ctx->phy_req_info); + if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, rep_data->phyindex) || + nla_put_string(skb, ETHTOOL_A_PHY_NAME, rep_data->name) || + nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, rep_data->upstream_type)) + return -EMSGSIZE; - return ret; -} + if (rep_data->drvname && + nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, rep_data->drvname)) + return -EMSGSIZE; -int ethnl_phy_done(struct netlink_callback *cb) -{ - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; + if (rep_data->upstream_index && + nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, + rep_data->upstream_index)) + return -EMSGSIZE; - if (ctx->phy_req_info->base.dev) - ethnl_parse_header_dev_put(&ctx->phy_req_info->base); + if (rep_data->upstream_sfp_name && + nla_put_string(skb, ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, + rep_data->upstream_sfp_name)) + return -EMSGSIZE; - kfree(ctx->phy_req_info); + if (rep_data->downstream_sfp_name && + nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, + rep_data->downstream_sfp_name)) + return -EMSGSIZE; return 0; } -static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev, - struct netlink_callback *cb) +static void phy_cleanup_data(struct ethnl_reply_data *reply_data) { - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - struct phy_req_info *pri = ctx->phy_req_info; - struct phy_device_node *pdn; - int ret = 0; - void *ehdr; - - if (!dev->link_topo) - return 0; - - xa_for_each_start(&dev->link_topo->phys, ctx->phy_index, pdn, ctx->phy_index) { - ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_PHY_GET_REPLY); - if (!ehdr) { - ret = -EMSGSIZE; - break; - } - - ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_PHY_HEADER); - if (ret < 0) { - genlmsg_cancel(skb, ehdr); - break; - } - - pri->pdn = pdn; - ret = ethnl_phy_fill_reply(&pri->base, skb); - if (ret < 0) { - genlmsg_cancel(skb, ehdr); - break; - } - - genlmsg_end(skb, ehdr); - } + struct phy_reply_data *rep_data = PHY_REPDATA(reply_data); - return ret; + kfree(rep_data->drvname); + kfree(rep_data->name); + kfree(rep_data->upstream_sfp_name); + kfree(rep_data->downstream_sfp_name); } -int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - struct net *net = sock_net(skb->sk); - struct net_device *dev; - int ret = 0; - - rtnl_lock(); - - if (ctx->phy_req_info->base.dev) { - dev = ctx->phy_req_info->base.dev; - netdev_lock_ops(dev); - ret = ethnl_phy_dump_one_dev(skb, dev, cb); - netdev_unlock_ops(dev); - } else { - for_each_netdev_dump(net, dev, ctx->ifindex) { - netdev_lock_ops(dev); - ret = ethnl_phy_dump_one_dev(skb, dev, cb); - netdev_unlock_ops(dev); - if (ret) - break; - - ctx->phy_index = 0; - } - } - rtnl_unlock(); - - return ret; -} +const struct ethnl_request_ops ethnl_phy_request_ops = { + .request_cmd = ETHTOOL_MSG_PHY_GET, + .reply_cmd = ETHTOOL_MSG_PHY_GET_REPLY, + .hdr_attr = ETHTOOL_A_PHY_HEADER, + .req_info_size = sizeof(struct phy_req_info), + .reply_data_size = sizeof(struct phy_reply_data), + + .prepare_data = phy_prepare_data, + .reply_size = phy_reply_size, + .fill_reply = phy_fill_reply, + .cleanup_data = phy_cleanup_data, +}; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 1b1b700ec05e..0d1e56965af0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -761,6 +761,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + if (protocol_version == PRP_V1) { + eth_hw_addr_set(slave[1], slave[0]->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, slave[1]); + } + if (interlink) { res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack); if (res) diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index d7ae32473c41..192893c3f2ec 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -78,6 +78,15 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, eth_hw_addr_set(master->dev, dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); + + if (hsr->prot_version == PRP_V1) { + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) { + eth_hw_addr_set(port->dev, dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, + port->dev); + } + } } /* Make sure we recognize frames from ourselves in hsr_rcv() */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 1bc47b17a296..135ec5fce019 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -155,6 +155,7 @@ struct hsr_port { struct hsr_priv *hsr; enum hsr_port_type type; struct rcu_head rcu; + unsigned char original_macaddress[ETH_ALEN]; }; struct hsr_frame_info; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 2a802a5de2ac..b87b6a6fe070 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -196,6 +196,7 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, port->hsr = hsr; port->dev = dev; port->type = type; + ether_addr_copy(port->original_macaddress, dev->dev_addr); if (type != HSR_PT_MASTER) { res = hsr_portdev_setup(hsr, dev, port, extack); @@ -232,6 +233,7 @@ void hsr_del_port(struct hsr_port *port) if (!port->hsr->fwd_offloaded) dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); + eth_hw_addr_set(port->dev, port->original_macaddress); } kfree_rcu(port, rcu); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 6d2c97f8e9ef..12850a277251 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -425,7 +425,7 @@ config INET_DIAG tristate "INET: socket monitoring interface" default y help - Support for INET (TCP, DCCP, etc) socket monitoring interface used by + Support for INET (TCP, UDP, etc) socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable at: diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5df1f1325259..76e38092cd8a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1328,10 +1328,7 @@ int inet_sk_rebuild_header(struct sock *sk) /* Routing failed... */ sk->sk_route_caps = 0; - /* - * Other protocols have to map its equivalent state to TCP_SYN_SENT. - * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme - */ + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 77e5705ac799..c47d3828d4f6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1792,12 +1792,12 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, struct ifaddrmsg *ifm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request"); return -EINVAL; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3f4e629998fa..57f088e5540e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -948,12 +948,12 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, if (filter->rtnl_held) ASSERT_RTNL(); - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); return -EINVAL; } - rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_scope) { NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f68bb9e34c34..dabe2b7044ab 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -365,7 +365,7 @@ static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net, static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits) { /* The second half is used for prefsrc */ - return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head *), + return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head), GFP_KERNEL); } @@ -617,10 +617,12 @@ int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, { int err; - nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, - gfp_flags); - if (!nhc->nhc_pcpu_rth_output) - return -ENOMEM; + if (!nhc->nhc_pcpu_rth_output) { + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; + } if (encap) { struct lwtunnel_state *lwtstate; @@ -2168,34 +2170,52 @@ static bool fib_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -void fib_select_multipath(struct fib_result *res, int hash) +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4) { struct fib_info *fi = res->fi; struct net *net = fi->fib_net; - bool first = false; + bool found = false; + bool use_neigh; + __be32 saddr; if (unlikely(res->fi->nh)) { nexthop_path_fib_result(res, hash); return; } + use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh); + saddr = fl4 ? fl4->saddr : 0; + change_nexthops(fi) { - if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { - if (!fib_good_nh(nexthop_nh)) - continue; - if (!first) { - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - first = true; - } + int nh_upper_bound; + + /* Nexthops without a carrier are assigned an upper bound of + * minus one when "ignore_routes_with_linkdown" is set. + */ + nh_upper_bound = atomic_read(&nexthop_nh->fib_nh_upper_bound); + if (nh_upper_bound == -1 || + (use_neigh && !fib_good_nh(nexthop_nh))) + continue; + + if (!found) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + found = !saddr || nexthop_nh->nh_saddr == saddr; } - if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) + if (hash > nh_upper_bound) continue; - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - return; + if (!saddr || nexthop_nh->nh_saddr == saddr) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + return; + } + + if (found) + return; + } endfor_nexthops(fi); } #endif @@ -2210,7 +2230,7 @@ void fib_select_path(struct net *net, struct fib_result *res, if (fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(net, fl4, skb, NULL); - fib_select_multipath(res, h); + fib_select_multipath(res, h, fl4); } else #endif diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 6701a98d9a9f..dafd68f3436a 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -199,7 +199,7 @@ static const struct net_protocol net_gre_protocol = { static int __init gre_init(void) { - pr_info("GRE over IPv4 demultiplexor driver\n"); + pr_info("GRE over IPv4 demultiplexer driver\n"); if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { pr_err("can't add protocol\n"); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dd5cf8914a28..20915895bdaa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -330,7 +330,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, struct inet_bind2_bucket **tb2_ret, struct inet_bind_hashbucket **head2_ret, int *port_ret) { - struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); int i, low, high, attempt_half, port, l3mdev; struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); @@ -512,10 +512,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, */ int inet_csk_get_port(struct sock *sk, unsigned short snum) { - struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; bool found_port = false, check_bind_conflict = true; bool bhash_created = false, bhash2_created = false; + struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); int ret = -EADDRINUSE, port = snum, l3mdev; struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2 = NULL; @@ -767,7 +767,6 @@ void inet_csk_init_xmit_timers(struct sock *sk, timer_setup(&sk->sk_timer, keepalive_handler, 0); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } -EXPORT_SYMBOL(inet_csk_init_xmit_timers); void inet_csk_clear_xmit_timers(struct sock *sk) { @@ -780,7 +779,6 @@ void inet_csk_clear_xmit_timers(struct sock *sk) sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &sk->sk_timer); } -EXPORT_SYMBOL(inet_csk_clear_xmit_timers); void inet_csk_clear_xmit_timers_sync(struct sock *sk) { @@ -831,7 +829,6 @@ no_route: __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } -EXPORT_SYMBOL_GPL(inet_csk_route_req); struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, @@ -898,7 +895,6 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) req->num_retrans++; return err; } -EXPORT_SYMBOL(inet_rtx_syn_ack); static struct request_sock * reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, @@ -1026,9 +1022,10 @@ static bool reqsk_queue_unlink(struct request_sock *req) bool found = false; if (sk_hashed(sk)) { - struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); - spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); + struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); + spinlock_t *lock; + lock = inet_ehash_lockp(hashinfo, req->rsk_hash); spin_lock(lock); found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); @@ -1058,14 +1055,13 @@ bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) { return __inet_csk_reqsk_queue_drop(sk, req, false); } -EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) { inet_csk_reqsk_queue_drop(sk, req); reqsk_put(req); } -EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); +EXPORT_IPV6_MOD(inet_csk_reqsk_queue_drop_and_put); static void reqsk_timer_handler(struct timer_list *t) { @@ -1209,7 +1205,6 @@ bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, inet_csk_reqsk_queue_added(sk); return true; } -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, const gfp_t priority) @@ -1290,7 +1285,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, return newsk; } -EXPORT_SYMBOL_GPL(inet_csk_clone_lock); /* * At this point, there should be no process reference to this @@ -1322,7 +1316,7 @@ void inet_csk_destroy_sock(struct sock *sk) EXPORT_SYMBOL(inet_csk_destroy_sock); /* This function allows to force a closure of a socket after the call to - * tcp/dccp_create_openreq_child(). + * tcp_create_openreq_child(). */ void inet_csk_prepare_forced_close(struct sock *sk) __releases(&sk->sk_lock.slock) @@ -1380,7 +1374,6 @@ int inet_csk_listen_start(struct sock *sk) inet_sk_set_state(sk, TCP_CLOSE); return err; } -EXPORT_SYMBOL_GPL(inet_csk_listen_start); static void inet_child_forget(struct sock *sk, struct request_sock *req, struct sock *child) @@ -1475,7 +1468,6 @@ child_put: sock_put(child); return NULL; } -EXPORT_SYMBOL(inet_csk_complete_hashdance); /* * This routine closes sockets which have been at least partially @@ -1590,4 +1582,3 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) out: return dst; } -EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c2bb91d9e9ff..1d1d6ad53f4c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -160,7 +160,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; -#ifdef CONFIG_SOCK_CGROUP_DATA +#ifdef CONFIG_CGROUP_NET_CLASSID classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif /* Fallback to socket priority if class id isn't set. @@ -1369,8 +1369,6 @@ static int inet_diag_type2proto(int type) switch (type) { case TCPDIAG_GETSOCK: return IPPROTO_TCP; - case DCCPDIAG_GETSOCK: - return IPPROTO_DCCP; default: return 0; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5bf163f756e9..da85cc30e382 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -176,7 +176,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, */ static void __inet_put_port(struct sock *sk) { - struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); struct inet_bind_bucket *tb; @@ -215,7 +215,7 @@ EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { - struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *table = tcp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; @@ -668,7 +668,7 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_ehash_bucket *head; struct hlist_nulls_head *list; spinlock_t *lock; @@ -713,7 +713,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) } return ok; } -EXPORT_SYMBOL_GPL(inet_ehash_nolisten); +EXPORT_IPV6_MOD(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) @@ -740,7 +740,7 @@ static int inet_reuseport_add_sock(struct sock *sk, int __inet_hash(struct sock *sk, struct sock *osk) { - struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; @@ -771,7 +771,7 @@ unlock: return err; } -EXPORT_SYMBOL(__inet_hash); +EXPORT_IPV6_MOD(__inet_hash); int inet_hash(struct sock *sk) { @@ -782,11 +782,10 @@ int inet_hash(struct sock *sk) return err; } -EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); if (sk_unhashed(sk)) return; @@ -823,7 +822,7 @@ void inet_unhash(struct sock *sk) spin_unlock_bh(lock); } } -EXPORT_SYMBOL_GPL(inet_unhash); +EXPORT_IPV6_MOD(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, @@ -874,7 +873,7 @@ inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { - struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) @@ -902,7 +901,7 @@ static void inet_update_saddr(struct sock *sk, void *saddr, int family) static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { - struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); @@ -982,14 +981,14 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } -EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); +EXPORT_IPV6_MOD(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } -EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); +EXPORT_IPV6_MOD(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') @@ -1214,7 +1213,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet_check_established); } -EXPORT_SYMBOL_GPL(inet_hash_connect); static void init_hashinfo_lhash2(struct inet_hashinfo *h) { @@ -1265,7 +1263,6 @@ int inet_hashinfo2_init_mod(struct inet_hashinfo *h) init_hashinfo_lhash2(h); return 0; } -EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { @@ -1305,7 +1302,6 @@ set_mask: hashinfo->ehash_locks_mask = nblocks - 1; return 0; } -EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, unsigned int ehash_entries) @@ -1341,7 +1337,6 @@ free_hashinfo: err: return NULL; } -EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc); void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) { @@ -1352,4 +1347,3 @@ void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) vfree(hashinfo->ehash); kfree(hashinfo); } -EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index aded4bf1bc16..67efe9501581 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -166,7 +166,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, spin_unlock(lock); local_bh_enable(); } -EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); static void tw_timer_handler(struct timer_list *t) { @@ -223,7 +222,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, return tw; } -EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* These are always called from BH context. See callers in * tcp_input.c to verify this. @@ -306,7 +304,6 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } -EXPORT_SYMBOL_GPL(__inet_twsk_schedule); /* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo) @@ -365,4 +362,3 @@ restart: rcu_read_unlock(); } } -EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 26d15f907551..f5b9004d6938 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1066,16 +1066,15 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net, - struct list_head *dev_to_kill) +static void __net_exit ipgre_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops, - dev_to_kill); + ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit_batch_rtnl = ipgre_exit_batch_rtnl, + .exit_rtnl = ipgre_exit_rtnl, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1752,16 +1751,15 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net, - struct list_head *dev_to_kill) +static void __net_exit ipgre_tap_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops, - dev_to_kill); + ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl, + .exit_rtnl = ipgre_tap_exit_rtnl, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1772,16 +1770,15 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) +static void __net_exit erspan_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops, - dev_to_kill); + ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit_batch_rtnl = erspan_exit_batch_rtnl, + .exit_rtnl = erspan_exit_rtnl, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e18d7ec5062..a2705d454fd6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1014,7 +1014,8 @@ static int __ip_append_data(struct sock *sk, uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb), + false); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 1024f961ec9a..678b8f96e3e9 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -243,11 +243,11 @@ static struct net_device *__ip_tunnel_create(struct net *net, if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; - strscpy(name, parms->name, IFNAMSIZ); + strscpy(name, parms->name); } else { if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - strcpy(name, ops->kind); + strscpy(name, ops->kind); strcat(name, "%d"); } @@ -1174,13 +1174,16 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, - struct list_head *head, - struct rtnl_link_ops *ops) +void ip_tunnel_delete_net(struct net *net, unsigned int id, + struct rtnl_link_ops *ops, + struct list_head *head) { + struct ip_tunnel_net *itn = net_generic(net, id); struct net_device *dev, *aux; int h; + ASSERT_RTNL_NET(net); + for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == ops) unregister_netdevice_queue(dev, head); @@ -1198,21 +1201,7 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, unregister_netdevice_queue(t->dev, head); } } - -void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, - struct rtnl_link_ops *ops, - struct list_head *dev_to_kill) -{ - struct ip_tunnel_net *itn; - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) { - itn = net_generic(net, id); - ip_tunnel_destroy(net, itn, dev_to_kill, ops); - } -} -EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); int ip_tunnel_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 159b4473290e..686e4f3d83aa 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -523,16 +523,15 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net, - struct list_head *dev_to_kill) +static void __net_exit vti_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops, - dev_to_kill); + ip_tunnel_delete_net(net, vti_net_id, &vti_link_ops, dev_to_kill); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit_batch_rtnl = vti_exit_batch_rtnl, + .exit_rtnl = vti_exit_rtnl, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bab0bf90c908..3e03af073a1c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -604,16 +604,15 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net, - struct list_head *dev_to_kill) +static void __net_exit ipip_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops, - dev_to_kill); + ip_tunnel_delete_net(net, ipip_net_id, &ipip_link_ops, dev_to_kill); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit_batch_rtnl = ipip_exit_batch_rtnl, + .exit_rtnl = ipip_exit_rtnl, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a8b04d4abcaa..3b5677d8467e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2511,7 +2511,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, struct rtmsg *rtm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); return -EINVAL; } @@ -2520,7 +2521,6 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); - rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || @@ -2836,7 +2836,8 @@ static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump"); return -EINVAL; } @@ -2846,7 +2847,6 @@ static int ipmr_valid_dumplink(const struct nlmsghdr *nlh, return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request"); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 467151517023..823e4a783d2b 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -541,6 +541,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); INIT_LIST_HEAD(&nh->fdb_list); + spin_lock_init(&nh->lock); } return nh; } @@ -1555,12 +1556,12 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, if (nh->is_group) { struct nh_group *nhg; - nhg = rtnl_dereference(nh->nh_grp); + nhg = rcu_dereference_rtnl(nh->nh_grp); if (nhg->has_v4) goto no_v4_nh; is_fdb_nh = nhg->fdb_nh; } else { - nhi = rtnl_dereference(nh->nh_info); + nhi = rcu_dereference_rtnl(nh->nh_info); if (nhi->family == AF_INET) goto no_v4_nh; is_fdb_nh = nhi->fdb_nh; @@ -2118,7 +2119,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) /* not called for nexthop replace */ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) { - struct fib6_info *f6i, *tmp; + struct fib6_info *f6i; bool do_flush = false; struct fib_info *fi; @@ -2129,13 +2130,24 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) if (do_flush) fib_flush(net); - /* ip6_del_rt removes the entry from this list hence the _safe */ - list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { + spin_lock_bh(&nh->lock); + + nh->dead = true; + + while (!list_empty(&nh->f6i_list)) { + f6i = list_first_entry(&nh->f6i_list, typeof(*f6i), nh_list); + /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); + + spin_unlock_bh(&nh->lock); ipv6_stub->ip6_del_rt(net, f6i, !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); + + spin_lock_bh(&nh->lock); } + + spin_unlock_bh(&nh->lock); } static void __remove_nexthop(struct net *net, struct nexthop *nh, @@ -4040,14 +4052,11 @@ out: } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) +static void __net_exit nexthop_net_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) - flush_all_nexthops(net); + ASSERT_RTNL_NET(net); + flush_all_nexthops(net); } static void __net_exit nexthop_net_exit(struct net *net) @@ -4072,7 +4081,7 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, .exit = nexthop_net_exit, - .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, + .exit_rtnl = nexthop_net_exit_rtnl, }; static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 10cbeb76c274..ea2f01584379 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -191,6 +191,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), + SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED), SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 753704f75b2c..fccb05fb3a79 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = { EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); +#ifndef CONFIG_PREEMPT_RT #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) +#else +#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field) +#endif #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) @@ -2037,8 +2041,12 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, hash_keys.addrs.v4addrs.dst = fl4->daddr; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) hash_keys.basic.ip_proto = fl4->flowi4_proto; - if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) - hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; + } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; @@ -2093,7 +2101,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; - hash_keys.ports.src = fl4->fl4_sport; + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } @@ -2154,7 +2165,7 @@ ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, if (res->fi && fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); - fib_select_multipath(res, h); + fib_select_multipath(res, h, NULL); IPCB(skb)->flags |= IPSKB_MULTIPATH; } #endif @@ -2699,8 +2710,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, if (fl4->saddr) { if (ipv4_is_multicast(fl4->saddr) || - ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) { + ipv4_is_lbcast(fl4->saddr)) { rth = ERR_PTR(-EINVAL); goto out; } @@ -3206,7 +3216,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, struct rtmsg *rtm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for route get request"); return -EINVAL; @@ -3216,7 +3227,6 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); - rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || rtm->rtm_table || rtm->rtm_protocol || diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6edc441b3702..0ae265d39184 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1059,6 +1059,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { + struct net_devmem_dmabuf_binding *binding = NULL; struct tcp_sock *tp = tcp_sk(sk); struct ubuf_info *uarg = NULL; struct sk_buff *skb; @@ -1066,11 +1067,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; int process_backlog = 0; + bool sockc_valid = true; int zc = 0; long timeo; flags = msg->msg_flags; + sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) }; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) + /* Don't return error until MSG_FASTOPEN has been + * processed; that may succeed even if the cmsg is + * invalid. + */ + sockc_valid = false; + } + if ((flags & MSG_ZEROCOPY) && size) { if (msg->msg_ubuf) { uarg = msg->msg_ubuf; @@ -1078,7 +1091,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) zc = MSG_ZEROCOPY; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); - uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb), + sockc_valid && !!sockc.dmabuf_id); if (!uarg) { err = -ENOBUFS; goto out_err; @@ -1087,12 +1101,27 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) zc = MSG_ZEROCOPY; else uarg_to_msgzc(uarg)->zerocopy = 0; + + if (sockc_valid && sockc.dmabuf_id) { + binding = net_devmem_get_binding(sk, sockc.dmabuf_id); + if (IS_ERR(binding)) { + err = PTR_ERR(binding); + binding = NULL; + goto out_err; + } + } } } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) { if (sk->sk_route_caps & NETIF_F_SG) zc = MSG_SPLICE_PAGES; } + if (sockc_valid && sockc.dmabuf_id && + (!(flags & MSG_ZEROCOPY) || !sock_flag(sk, SOCK_ZEROCOPY))) { + err = -EINVAL; + goto out_err; + } + if (unlikely(flags & MSG_FASTOPEN || inet_test_bit(DEFER_CONNECT, sk)) && !tp->repair) { @@ -1131,13 +1160,10 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } - sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)}; - if (msg->msg_controllen) { - err = sock_cmsg_send(sk, msg, &sockc); - if (unlikely(err)) { + if (!sockc_valid) { + if (!err) err = -EINVAL; - goto out_err; - } + goto out_err; } /* This should be in poll */ @@ -1160,6 +1186,8 @@ restart: if (skb) copy = size_goal - skb->len; + trace_tcp_sendmsg_locked(sk, msg, skb, size_goal); + if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; @@ -1256,7 +1284,8 @@ new_segment: goto wait_for_space; } - err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); + err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg, + binding); if (err == -EMSGSIZE || err == -EEXIST) { tcp_mark_push(tp, skb); goto new_segment; @@ -1337,6 +1366,8 @@ out_nopush: /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ if (uarg && !msg->msg_ubuf) net_zcopy_put(uarg); + if (binding) + net_devmem_dmabuf_binding_put(binding); return copied + copied_syn; do_error: @@ -1354,6 +1385,9 @@ out_err: sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } + if (binding) + net_devmem_dmabuf_binding_put(binding); + return err; } EXPORT_SYMBOL_GPL(tcp_sendmsg_locked); @@ -3407,6 +3441,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rack.reo_wnd_persist = 0; tp->rack.dsack_seen = 0; tp->syn_data_acked = 0; + tp->syn_fastopen_child = 0; tp->rx_opt.saw_tstamp = 0; tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; @@ -4162,6 +4197,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; if (tp->tcp_usec_ts) info->tcpi_options |= TCPI_OPT_USEC_TS; + if (tp->syn_fastopen_child) + info->tcpi_options |= TCPI_OPT_TFO_CHILD; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 1a6b1bc54245..9b83d639b5ac 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -401,6 +401,7 @@ fastopen: } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + tcp_sk(child)->syn_fastopen_child = 1; return child; } NET_INC_STATS(sock_net(sk), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8cce0d5489da..d5b5c32115d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2417,7 +2417,8 @@ do_time_wait: goto csum_error; } - tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, + &drop_reason); switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(net, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fb9349be36b8..43d7852ce07e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -97,7 +97,8 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq, */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th, u32 *tw_isn) + const struct tcphdr *th, u32 *tw_isn, + enum skb_drop_reason *drop_reason) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt); @@ -245,8 +246,10 @@ kill: return TCP_TW_SYN; } - if (paws_reject) - __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); + if (paws_reject) { + *drop_reason = SKB_DROP_REASON_TCP_RFC7323_TW_PAWS; + __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWS_TW_REJECTED); + } if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2742cc7602bb..358b49caa7b9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -93,6 +93,7 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/slab.h> +#include <linux/sock_diag.h> #include <net/tcp_states.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> @@ -1942,8 +1943,8 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, error = -EAGAIN; do { spin_lock_bh(&queue->lock); - skb = __skb_try_recv_from_queue(sk, queue, flags, off, - err, &last); + skb = __skb_try_recv_from_queue(queue, flags, off, err, + &last); if (skb) { if (!(flags & MSG_PEEK)) udp_skb_destructor(sk, skb); @@ -1964,8 +1965,8 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, queue); - skb = __skb_try_recv_from_queue(sk, queue, flags, off, - err, &last); + skb = __skb_try_recv_from_queue(queue, flags, off, err, + &last); if (skb && !(flags & MSG_PEEK)) udp_skb_dtor_locked(sk, skb); spin_unlock(&sk_queue->lock); @@ -2897,20 +2898,40 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (udp_test_bit(ENCAP_ENABLED, sk)) + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udp_encap_needed_key); + udp_tunnel_cleanup_gro(sk); + } } } +typedef struct sk_buff *(*udp_gro_receive_t)(struct sock *sk, + struct list_head *head, + struct sk_buff *skb); + static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, struct sock *sk) { #ifdef CONFIG_XFRM + udp_gro_receive_t new_gro_receive; + if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { - if (family == AF_INET) - WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); - else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) - WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv); + if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + new_gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv; + else + new_gro_receive = xfrm4_gro_udp_encap_rcv; + + if (udp_sk(sk)->gro_receive != new_gro_receive) { + /* + * With IPV6_ADDRFORM the gro callback could change + * after being set, unregister the old one, if valid. + */ + if (udp_sk(sk)->gro_receive) + udp_tunnel_update_gro_rcv(sk, false); + + WRITE_ONCE(udp_sk(sk)->gro_receive, new_gro_receive); + udp_tunnel_update_gro_rcv(sk, true); + } } #endif } @@ -2960,6 +2981,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_ENCAP: + sockopt_lock_sock(sk); switch (val) { case 0: #ifdef CONFIG_XFRM @@ -2983,6 +3005,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, err = -ENOPROTOOPT; break; } + sockopt_release_sock(sk); break; case UDP_NO_CHECK6_TX: @@ -3000,13 +3023,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - + sockopt_lock_sock(sk); /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk); + sockopt_release_sock(sk); break; /* @@ -3390,34 +3414,55 @@ struct bpf_iter__udp { int bucket __aligned(8); }; +union bpf_udp_iter_batch_item { + struct sock *sk; + __u64 cookie; +}; + struct bpf_udp_iter_state { struct udp_iter_state state; unsigned int cur_sk; unsigned int end_sk; unsigned int max_sk; - int offset; - struct sock **batch; - bool st_bucket_done; + union bpf_udp_iter_batch_item *batch; }; static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, - unsigned int new_batch_sz); + unsigned int new_batch_sz, gfp_t flags); +static struct sock *bpf_iter_udp_resume(struct sock *first_sk, + union bpf_udp_iter_batch_item *cookies, + int n_cookies) +{ + struct sock *sk = NULL; + int i; + + for (i = 0; i < n_cookies; i++) { + sk = first_sk; + udp_portaddr_for_each_entry_from(sk) + if (cookies[i].cookie == atomic64_read(&sk->sk_cookie)) + goto done; + } +done: + return sk; +} + static struct sock *bpf_iter_udp_batch(struct seq_file *seq) { struct bpf_udp_iter_state *iter = seq->private; struct udp_iter_state *state = &iter->state; + unsigned int find_cookie, end_cookie; struct net *net = seq_file_net(seq); - int resume_bucket, resume_offset; struct udp_table *udptable; unsigned int batch_sks = 0; - bool resized = false; + int resume_bucket; + int resizes = 0; struct sock *sk; + int err = 0; resume_bucket = state->bucket; - resume_offset = iter->offset; /* The current batch is done, so advance the bucket. */ - if (iter->st_bucket_done) + if (iter->cur_sk == iter->end_sk) state->bucket++; udptable = udp_get_table_seq(seq, net); @@ -3430,62 +3475,89 @@ again: * before releasing the bucket lock. This allows BPF programs that are * called in seq_show to acquire the bucket lock if needed. */ + find_cookie = iter->cur_sk; + end_cookie = iter->end_sk; iter->cur_sk = 0; iter->end_sk = 0; - iter->st_bucket_done = false; batch_sks = 0; for (; state->bucket <= udptable->mask; state->bucket++) { struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot; if (hlist_empty(&hslot2->head)) - continue; + goto next_bucket; - iter->offset = 0; spin_lock_bh(&hslot2->lock); - udp_portaddr_for_each_entry(sk, &hslot2->head) { + sk = hlist_entry_safe(hslot2->head.first, struct sock, + __sk_common.skc_portaddr_node); + /* Resume from the first (in iteration order) unseen socket from + * the last batch that still exists in resume_bucket. Most of + * the time this will just be where the last iteration left off + * in resume_bucket unless that socket disappeared between + * reads. + */ + if (state->bucket == resume_bucket) + sk = bpf_iter_udp_resume(sk, &iter->batch[find_cookie], + end_cookie - find_cookie); +fill_batch: + udp_portaddr_for_each_entry_from(sk) { if (seq_sk_match(seq, sk)) { - /* Resume from the last iterated socket at the - * offset in the bucket before iterator was stopped. - */ - if (state->bucket == resume_bucket && - iter->offset < resume_offset) { - ++iter->offset; - continue; - } if (iter->end_sk < iter->max_sk) { sock_hold(sk); - iter->batch[iter->end_sk++] = sk; + iter->batch[iter->end_sk++].sk = sk; } batch_sks++; } } + + /* Allocate a larger batch and try again. */ + if (unlikely(resizes <= 1 && iter->end_sk && + iter->end_sk != batch_sks)) { + resizes++; + + /* First, try with GFP_USER to maximize the chances of + * grabbing more memory. + */ + if (resizes == 1) { + spin_unlock_bh(&hslot2->lock); + err = bpf_iter_udp_realloc_batch(iter, + batch_sks * 3 / 2, + GFP_USER); + if (err) + return ERR_PTR(err); + /* Start over. */ + goto again; + } + + /* Next, hold onto the lock, so the bucket doesn't + * change while we get the rest of the sockets. + */ + err = bpf_iter_udp_realloc_batch(iter, batch_sks, + GFP_NOWAIT); + if (err) { + spin_unlock_bh(&hslot2->lock); + return ERR_PTR(err); + } + + /* Pick up where we left off. */ + sk = iter->batch[iter->end_sk - 1].sk; + sk = hlist_entry_safe(sk->__sk_common.skc_portaddr_node.next, + struct sock, + __sk_common.skc_portaddr_node); + batch_sks = iter->end_sk; + goto fill_batch; + } + spin_unlock_bh(&hslot2->lock); if (iter->end_sk) break; +next_bucket: + resizes = 0; } - /* All done: no batch made. */ - if (!iter->end_sk) - return NULL; - - if (iter->end_sk == batch_sks) { - /* Batching is done for the current bucket; return the first - * socket to be iterated from the batch. - */ - iter->st_bucket_done = true; - goto done; - } - if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { - resized = true; - /* After allocating a larger batch, retry one more time to grab - * the whole bucket. - */ - goto again; - } -done: - return iter->batch[0]; + WARN_ON_ONCE(iter->end_sk != batch_sks); + return iter->end_sk ? iter->batch[0].sk : NULL; } static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -3496,16 +3568,14 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Whenever seq_next() is called, the iter->cur_sk is * done with seq_show(), so unref the iter->cur_sk. */ - if (iter->cur_sk < iter->end_sk) { - sock_put(iter->batch[iter->cur_sk++]); - ++iter->offset; - } + if (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++].sk); /* After updating iter->cur_sk, check if there are more sockets * available in the current bucket batch. */ if (iter->cur_sk < iter->end_sk) - sk = iter->batch[iter->cur_sk]; + sk = iter->batch[iter->cur_sk].sk; else /* Prepare a new batch. */ sk = bpf_iter_udp_batch(seq); @@ -3569,8 +3639,19 @@ unlock: static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) { - while (iter->cur_sk < iter->end_sk) - sock_put(iter->batch[iter->cur_sk++]); + union bpf_udp_iter_batch_item *item; + unsigned int cur_sk = iter->cur_sk; + __u64 cookie; + + /* Remember the cookies of the sockets we haven't seen yet, so we can + * pick up where we left off next time around. + */ + while (cur_sk < iter->end_sk) { + item = &iter->batch[cur_sk++]; + cookie = sock_gen_cookie(item->sk); + sock_put(item->sk); + item->cookie = cookie; + } } static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) @@ -3586,10 +3667,8 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) (void)udp_prog_seq_show(prog, &meta, v, 0, 0); } - if (iter->cur_sk < iter->end_sk) { + if (iter->cur_sk < iter->end_sk) bpf_iter_udp_put_batch(iter); - iter->st_bucket_done = false; - } } static const struct seq_operations bpf_iter_udp_seq_ops = { @@ -3810,6 +3889,15 @@ fallback: static int __net_init udp_pernet_init(struct net *net) { +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + int i; + + /* No tunnel is configured */ + for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) { + INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list); + RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL); + } +#endif udp_sysctl_init(net); udp_set_table(net); @@ -3831,16 +3919,19 @@ DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, - unsigned int new_batch_sz) + unsigned int new_batch_sz, gfp_t flags) { - struct sock **new_batch; + union bpf_udp_iter_batch_item *new_batch; new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), - GFP_USER | __GFP_NOWARN); + flags | __GFP_NOWARN); if (!new_batch) return -ENOMEM; - bpf_iter_udp_put_batch(iter); + if (flags != GFP_NOWAIT) + bpf_iter_udp_put_batch(iter); + + memcpy(new_batch, iter->batch, sizeof(*iter->batch) * iter->end_sk); kvfree(iter->batch); iter->batch = new_batch; iter->max_sk = new_batch_sz; @@ -3859,10 +3950,12 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) if (ret) return ret; - ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ, GFP_USER); if (ret) bpf_iter_fini_seq_net(priv_data); + iter->state.bucket = -1; + return ret; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9a8142ccbabe..9c775f8aa438 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -12,6 +12,169 @@ #include <net/udp.h> #include <net/protocol.h> #include <net/inet_common.h> +#include <net/udp_tunnel.h> + +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + +/* + * Dummy GRO tunnel callback, exists mainly to avoid dangling/NULL + * values for the udp tunnel static call. + */ +static struct sk_buff *dummy_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +typedef struct sk_buff *(*udp_tunnel_gro_rcv_t)(struct sock *sk, + struct list_head *head, + struct sk_buff *skb); + +struct udp_tunnel_type_entry { + udp_tunnel_gro_rcv_t gro_receive; + refcount_t count; +}; + +#define UDP_MAX_TUNNEL_TYPES (IS_ENABLED(CONFIG_GENEVE) + \ + IS_ENABLED(CONFIG_VXLAN) * 2 + \ + IS_ENABLED(CONFIG_NET_FOU) * 2 + \ + IS_ENABLED(CONFIG_XFRM) * 2) + +DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); +static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); +static struct mutex udp_tunnel_gro_type_lock; +static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; +static unsigned int udp_tunnel_gro_type_nr; +static DEFINE_SPINLOCK(udp_tunnel_gro_lock); + +void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) +{ + bool is_ipv6 = sk->sk_family == AF_INET6; + struct udp_sock *tup, *up = udp_sk(sk); + struct udp_tunnel_gro *udp_tunnel_gro; + + spin_lock(&udp_tunnel_gro_lock); + udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6]; + if (add) + hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list); + else if (up->tunnel_list.pprev) + hlist_del_init(&up->tunnel_list); + + if (udp_tunnel_gro->list.first && + !udp_tunnel_gro->list.first->next) { + tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock, + tunnel_list); + + rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup); + } else { + RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL); + } + + spin_unlock(&udp_tunnel_gro_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup); + +void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) +{ + struct udp_tunnel_type_entry *cur = NULL; + struct udp_sock *up = udp_sk(sk); + int i, old_gro_type_nr; + + if (!UDP_MAX_TUNNEL_TYPES || !up->gro_receive) + return; + + mutex_lock(&udp_tunnel_gro_type_lock); + + /* Check if the static call is permanently disabled. */ + if (udp_tunnel_gro_type_nr > UDP_MAX_TUNNEL_TYPES) + goto out; + + for (i = 0; i < udp_tunnel_gro_type_nr; i++) + if (udp_tunnel_gro_types[i].gro_receive == up->gro_receive) + cur = &udp_tunnel_gro_types[i]; + + old_gro_type_nr = udp_tunnel_gro_type_nr; + if (add) { + /* + * Update the matching entry, if found, or add a new one + * if needed + */ + if (cur) { + refcount_inc(&cur->count); + goto out; + } + + if (unlikely(udp_tunnel_gro_type_nr == UDP_MAX_TUNNEL_TYPES)) { + pr_err_once("Too many UDP tunnel types, please increase UDP_MAX_TUNNEL_TYPES\n"); + /* Ensure static call will never be enabled */ + udp_tunnel_gro_type_nr = UDP_MAX_TUNNEL_TYPES + 1; + } else { + cur = &udp_tunnel_gro_types[udp_tunnel_gro_type_nr++]; + refcount_set(&cur->count, 1); + cur->gro_receive = up->gro_receive; + } + } else { + /* + * The stack cleanups only successfully added tunnel, the + * lookup on removal should never fail. + */ + if (WARN_ON_ONCE(!cur)) + goto out; + + if (!refcount_dec_and_test(&cur->count)) + goto out; + + /* Avoid gaps, so that the enable tunnel has always id 0 */ + *cur = udp_tunnel_gro_types[--udp_tunnel_gro_type_nr]; + } + + if (udp_tunnel_gro_type_nr == 1) { + static_call_update(udp_tunnel_gro_rcv, + udp_tunnel_gro_types[0].gro_receive); + static_branch_enable(&udp_tunnel_static_call); + } else if (old_gro_type_nr == 1) { + static_branch_disable(&udp_tunnel_static_call); + static_call_update(udp_tunnel_gro_rcv, dummy_gro_rcv); + } + +out: + mutex_unlock(&udp_tunnel_gro_type_lock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); + +static void udp_tunnel_gro_init(void) +{ + mutex_init(&udp_tunnel_gro_type_lock); +} + +static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (static_branch_likely(&udp_tunnel_static_call)) { + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + return static_call(udp_tunnel_gro_rcv)(sk, head, skb); + } + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); +} + +#else + +static void udp_tunnel_gro_init(void) {} + +static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); +} + +#endif static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, @@ -681,7 +844,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); + pp = udp_tunnel_gro_rcv(sk, head, skb); out: skb_gro_flush_final(skb, pp, flush); @@ -694,8 +857,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct iphdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); + struct sock *sk; int iif, sdif; + sk = udp_tunnel_sk(net, false); + if (sk && dport == htons(sk->sk_num)) + return sk; + inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, @@ -826,5 +994,7 @@ int __init udpv4_offload_init(void) .gro_complete = udp4_gro_complete, }, }; + + udp_tunnel_gro_init(); return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 619a53eb672d..2326548997d3 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -58,6 +58,15 @@ error: } EXPORT_SYMBOL(udp_sock_create4); +static bool sk_saddr_any(struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + return ipv6_addr_any(&sk->sk_v6_rcv_saddr); +#else + return !sk->sk_rcv_saddr; +#endif +} + void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg) { @@ -80,6 +89,12 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sk); + + udp_tunnel_update_gro_rcv(sk, true); + + if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sk) && + sk->sk_kern_sock) + udp_tunnel_update_gro_lookup(net, sk, true); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c6b22170dc49..43b19adfbf88 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5349,12 +5349,12 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, struct ifaddrmsg *ifm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request"); return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request"); return -EINVAL; @@ -5487,7 +5487,8 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, struct ifaddrmsg *ifm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); return -EINVAL; } @@ -5496,7 +5497,6 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, extack); - ifm = nlmsg_data(nlh); if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); return -EINVAL; @@ -6115,7 +6115,8 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh, { struct ifinfomsg *ifm; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + ifm = nlmsg_payload(nlh, sizeof(*ifm)); + if (!ifm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request"); return -EINVAL; } @@ -6125,7 +6126,6 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh, return -EINVAL; } - ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request"); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ab054f329e12..fb63ffbcfc64 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -473,12 +473,12 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, { struct ifaddrlblmsg *ifal; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + ifal = nlmsg_payload(nlh, sizeof(*ifal)); + if (!ifal) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request"); return -EINVAL; } - ifal = nlmsg_data(nlh); if (ifal->__ifal_reserved || ifal->ifal_prefixlen || ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request"); @@ -543,7 +543,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, struct ifaddrlblmsg *ifal; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + ifal = nlmsg_payload(nlh, sizeof(*ifal)); + if (!ifal) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); return -EINVAL; } @@ -552,7 +553,6 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, extack); - ifal = nlmsg_data(nlh); if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); return -EINVAL; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f60ec8b0f8ea..acaff1296783 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -715,6 +715,7 @@ const struct proto_ops inet6_stream_ops = { #endif .set_rcvlowat = tcp_set_rcvlowat, }; +EXPORT_SYMBOL_GPL(inet6_stream_ops); const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, @@ -881,7 +882,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, } return false; } -EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index dbcf556a35bb..8f500eaf33cf 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,7 +54,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, return dst; } -EXPORT_SYMBOL(inet6_csk_route_req); static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) @@ -137,4 +136,3 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; } -EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 09065187378e..40df8bdfaacd 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -38,6 +38,7 @@ struct ioam6_lwt_freq { }; struct ioam6_lwt { + struct dst_entry null_dst; struct dst_cache cache; struct ioam6_lwt_freq freq; atomic_t pkt_cnt; @@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err) goto free_lwt; + /* This "fake" dst_entry will be stored in a dst_cache, which will call + * dst_hold() and dst_release() on it. We must ensure that dst_destroy() + * will never be called. For that, its initial refcount is 1 and +1 when + * it is stored in the cache. Then, +1/-1 each time we read the cache + * and release it. Long story short, we're fine. + */ + dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); + atomic_set(&ilwt->pkt_cnt, 0); ilwt->freq.k = freq_k; ilwt->freq.n = freq_n; @@ -336,7 +345,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; struct ioam6_lwt *ilwt; int err = -EINVAL; u32 pkt_cnt; @@ -344,7 +354,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ilwt = ioam6_lwt_state(dst->lwtstate); + ilwt = ioam6_lwt_state(orig_dst->lwtstate); /* Check for insertion frequency (i.e., "k over n" insertions) */ pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt); @@ -352,9 +362,20 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto out; local_bh_disable(); - cache_dst = dst_cache_get(&ilwt->cache); + dst = dst_cache_get(&ilwt->cache); local_bh_enable(); + /* This is how we notify that the destination does not change after + * transformation and that we need to use orig_dst instead of the cache + */ + if (dst == &ilwt->null_dst) { + dst_release(dst); + + dst = orig_dst; + /* keep refcount balance: dst_release() is called at the end */ + dst_hold(dst); + } + switch (ilwt->mode) { case IOAM6_IPTUNNEL_MODE_INLINE: do_inline: @@ -362,7 +383,7 @@ do_inline: if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) goto out; - err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst); + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst); if (unlikely(err)) goto drop; @@ -372,7 +393,7 @@ do_encap: /* Encapsulation (ip6ip6) */ err = ioam6_do_encap(net, skb, &ilwt->tuninfo, ilwt->has_tunsrc, &ilwt->tunsrc, - &ilwt->tundst, cache_dst); + &ilwt->tundst, dst); if (unlikely(err)) goto drop; @@ -390,7 +411,7 @@ do_encap: goto drop; } - if (unlikely(!cache_dst)) { + if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; @@ -401,20 +422,27 @@ do_encap: fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = hdr->nexthdr; - cache_dst = ip6_route_output(net, NULL, &fl6); - if (cache_dst->error) { - err = cache_dst->error; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; goto drop; } - /* cache only if we don't create a dst reference loop */ - if (dst->lwtstate != cache_dst->lwtstate) { - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); - } - - err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); + /* If the destination is the same after transformation (which is + * a valid use case for IOAM), then we don't want to add it to + * the cache in order to avoid a reference loop. Instead, we add + * our fake dst_entry to the cache as a way to detect this case. + * Otherwise, we add the resolved destination to the cache. + */ + local_bh_disable(); + if (orig_dst->lwtstate == dst->lwtstate) + dst_cache_set_ip6(&ilwt->cache, + &ilwt->null_dst, &fl6.saddr); + else + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); + local_bh_enable(); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; } @@ -422,22 +450,26 @@ do_encap: /* avoid lwtunnel_output() reentry loop when destination is the same * after transformation (e.g., with the inline mode) */ - if (dst->lwtstate != cache_dst->lwtstate) { + if (orig_dst->lwtstate != dst->lwtstate) { skb_dst_drop(skb); - skb_dst_set(skb, cache_dst); + skb_dst_set(skb, dst); return dst_output(net, sk, skb); } out: - dst_release(cache_dst); - return dst->lwtstate->orig_output(net, sk, skb); + dst_release(dst); + return orig_dst->lwtstate->orig_output(net, sk, skb); drop: - dst_release(cache_dst); + dst_release(dst); kfree_skb(skb); return err; } static void ioam6_destroy_state(struct lwtunnel_state *lwt) { + /* Since the refcount of per-cpu dst_entry caches will never be 0 (see + * why above) when our "fake" dst_entry is used, it is not necessary to + * remove them before calling dst_cache_destroy() + */ dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bf727149fdec..1f860340690c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -249,19 +249,33 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) struct fib6_table *fib6_new_table(struct net *net, u32 id) { - struct fib6_table *tb; + struct fib6_table *tb, *new_tb; if (id == 0) id = RT6_TABLE_MAIN; + tb = fib6_get_table(net, id); if (tb) return tb; - tb = fib6_alloc_table(net, id); - if (tb) - fib6_link_table(net, tb); + new_tb = fib6_alloc_table(net, id); + if (!new_tb) + return NULL; + + spin_lock_bh(&net->ipv6.fib_table_hash_lock); + + tb = fib6_get_table(net, id); + if (unlikely(tb)) { + spin_unlock_bh(&net->ipv6.fib_table_hash_lock); + kfree(new_tb); + return tb; + } - return tb; + fib6_link_table(net, new_tb); + + spin_unlock_bh(&net->ipv6.fib_table_hash_lock); + + return new_tb; } EXPORT_SYMBOL_GPL(fib6_new_table); @@ -1034,8 +1048,14 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, rt6_flush_exceptions(rt); fib6_drop_pcpu_from(rt, table); - if (rt->nh && !list_empty(&rt->nh_list)) - list_del_init(&rt->nh_list); + if (rt->nh) { + spin_lock(&rt->nh->lock); + + if (!list_empty(&rt->nh_list)) + list_del_init(&rt->nh_list); + + spin_unlock(&rt->nh->lock); + } if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split @@ -1069,8 +1089,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, */ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, - struct nl_info *info, - struct netlink_ext_ack *extack) + struct nl_info *info, struct netlink_ext_ack *extack, + struct list_head *purge_list) { struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&rt->fib6_table->tb6_lock)); @@ -1294,10 +1314,9 @@ add: } nsiblings = iter->fib6_nsiblings; iter->fib6_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); + list_add(&iter->purge_link, purge_list); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - fib6_info_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ @@ -1310,10 +1329,9 @@ add: if (rt6_qualify_for_ecmp(iter)) { *ins = iter->fib6_next; iter->fib6_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); + list_add(&iter->purge_link, purge_list); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - fib6_info_release(iter); nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { @@ -1329,6 +1347,28 @@ add: return 0; } +static int fib6_add_rt2node_nh(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack, + struct list_head *purge_list) +{ + int err; + + spin_lock(&rt->nh->lock); + + if (rt->nh->dead) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + err = -EINVAL; + } else { + err = fib6_add_rt2node(fn, rt, info, extack, purge_list); + if (!err) + list_add(&rt->nh_list, &rt->nh->f6i_list); + } + + spin_unlock(&rt->nh->lock); + + return err; +} + static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && @@ -1383,6 +1423,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; + LIST_HEAD(purge_list); struct fib6_node *fn; #ifdef CONFIG_IPV6_SUBTREES struct fib6_node *pn = NULL; @@ -1485,10 +1526,19 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, extack); + if (rt->nh) + err = fib6_add_rt2node_nh(fn, rt, info, extack, &purge_list); + else + err = fib6_add_rt2node(fn, rt, info, extack, &purge_list); if (!err) { - if (rt->nh) - list_add(&rt->nh_list, &rt->nh->f6i_list); + struct fib6_info *iter, *next; + + list_for_each_entry_safe(iter, next, &purge_list, purge_link) { + list_del(&iter->purge_link); + fib6_purge_rt(iter, fn, info->nl_net); + fib6_info_release(iter); + } + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); if (rt->fib6_flags & RTF_EXPIRES) @@ -2423,6 +2473,8 @@ static int __net_init fib6_net_init(struct net *net) if (!net->ipv6.fib_table_hash) goto out_rt6_stats; + spin_lock_init(&net->ipv6.fib_table_hash_lock); + net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), GFP_KERNEL); if (!net->ipv6.fib6_main_tbl) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 957ca98fa70f..2dc9dcffe2ca 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1570,7 +1570,7 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = { .flags = INET6_PROTO_FINAL, }; -static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) +static void __net_exit ip6gre_exit_rtnl_net(struct net *net, struct list_head *head) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct net_device *dev, *aux; @@ -1587,16 +1587,16 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { struct ip6_tnl *t; - t = rtnl_dereference(ign->tunnels[prio][h]); + t = rtnl_net_dereference(net, ign->tunnels[prio][h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) - unregister_netdevice_queue(t->dev, - head); - t = rtnl_dereference(t->next); + unregister_netdevice_queue(t->dev, head); + + t = rtnl_net_dereference(net, t->next); } } } @@ -1640,19 +1640,9 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) -{ - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) - ip6gre_destroy_tunnels(net, dev_to_kill); -} - static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit_batch_rtnl = ip6gre_exit_batch_rtnl, + .exit_rtnl = ip6gre_exit_rtnl_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 581bc6289081..7bd29a9ff0db 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -259,7 +259,7 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk) } /* - * xmit an sk_buff (used by TCP, SCTP and DCCP) + * xmit an sk_buff (used by TCP and SCTP) * Note : socket lock is not held for SYNACK packets, but might be modified * by calls to skb_set_owner_w() and ipv6_local_error(), * which are using proper atomic operations or spinlocks. @@ -1524,7 +1524,8 @@ emsgsize: uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); + uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb), + false); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a04dd1bb4b19..894d3158a6f0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2210,7 +2210,7 @@ static struct xfrm6_tunnel mplsip6_handler __read_mostly = { .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) +static void __net_exit ip6_tnl_exit_rtnl_net(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; @@ -2222,25 +2222,27 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { - t = rtnl_dereference(ip6n->tnls_r_l[h]); + t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); - t = rtnl_dereference(t->next); + + t = rtnl_net_dereference(net, t->next); } } - t = rtnl_dereference(ip6n->tnls_wc[0]); + t = rtnl_net_dereference(net, ip6n->tnls_wc[0]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); - t = rtnl_dereference(t->next); + + t = rtnl_net_dereference(net, t->next); } } @@ -2287,19 +2289,9 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) -{ - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) - ip6_tnl_destroy_tunnels(net, dev_to_kill); -} - static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl, + .exit_rtnl = ip6_tnl_exit_rtnl_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 09ec4b0ad7dc..40464a88bca6 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1112,21 +1112,21 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; -static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, - struct list_head *list) +static void __net_exit vti6_exit_rtnl_net(struct net *net, struct list_head *list) { - int h; + struct vti6_net *ip6n = net_generic(net, vti6_net_id); struct ip6_tnl *t; + int h; for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { - t = rtnl_dereference(ip6n->tnls_r_l[h]); + t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]); while (t) { unregister_netdevice_queue(t->dev, list); - t = rtnl_dereference(t->next); + t = rtnl_net_dereference(net, t->next); } } - t = rtnl_dereference(ip6n->tnls_wc[0]); + t = rtnl_net_dereference(net, ip6n->tnls_wc[0]); if (t) unregister_netdevice_queue(t->dev, list); } @@ -1170,22 +1170,9 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) -{ - struct vti6_net *ip6n; - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) { - ip6n = net_generic(net, vti6_net_id); - vti6_destroy_tunnels(ip6n, dev_to_kill); - } -} - static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit_batch_rtnl = vti6_exit_batch_rtnl, + .exit_rtnl = vti6_exit_rtnl_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 581ce055bf52..4541836ee3da 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct ip6_fraglist_iter iter; struct sk_buff *frag2; - if (first_len - hlen > mtu || - skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag2) { - if (frag2->len > mtu || - skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) + if (frag2->len > mtu) goto blackhole; /* Partially cloned skb? */ - if (skb_shared(frag2)) + if (skb_shared(frag2) || + skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96f1621e2381..44300962230b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2492,8 +2492,12 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net, hash_keys.basic.ip_proto = fl6->flowi6_proto; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); - if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) - hash_keys.ports.src = fl6->fl6_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { + if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl6->fl6_sport; + } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl6->fl6_dport; @@ -2547,7 +2551,10 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; hash_keys.addrs.v6addrs.src = fl6->saddr; hash_keys.addrs.v6addrs.dst = fl6->daddr; - hash_keys.ports.src = fl6->fl6_sport; + if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl6->fl6_sport; hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } @@ -3665,10 +3672,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, goto out; pcpu_alloc: - fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); if (!fib6_nh->rt6i_pcpu) { - err = -ENOMEM; - goto out; + fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!fib6_nh->rt6i_pcpu) { + err = -ENOMEM; + goto out; + } } fib6_nh->fib_nh_dev = dev; @@ -3728,62 +3737,33 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) } } -static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, - gfp_t gfp_flags, - struct netlink_ext_ack *extack) +static int fib6_nh_prealloc_percpu(struct fib6_nh *fib6_nh, gfp_t gfp_flags) { - struct net *net = cfg->fc_nlinfo.nl_net; - struct fib6_info *rt = NULL; - struct nexthop *nh = NULL; - struct fib6_table *table; - struct fib6_nh *fib6_nh; - int err = -EINVAL; - int addr_type; + struct fib_nh_common *nhc = &fib6_nh->nh_common; - /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); - goto out; - } + fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!fib6_nh->rt6i_pcpu) + return -ENOMEM; - /* RTF_CACHE is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_CACHE) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); - goto out; + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) { + free_percpu(fib6_nh->rt6i_pcpu); + return -ENOMEM; } - if (cfg->fc_type > RTN_MAX) { - NL_SET_ERR_MSG(extack, "Invalid route type"); - goto out; - } + return 0; +} - if (cfg->fc_dst_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid prefix length"); - goto out; - } - if (cfg->fc_src_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid source address length"); - goto out; - } -#ifndef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, - "Specifying source address requires IPV6_SUBTREES to be enabled"); - goto out; - } -#endif - if (cfg->fc_nh_id) { - nh = nexthop_find_by_id(net, cfg->fc_nh_id); - if (!nh) { - NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); - goto out; - } - err = fib6_check_nexthop(nh, cfg, extack); - if (err) - goto out; - } +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, + gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + struct net *net = cfg->fc_nlinfo.nl_net; + struct fib6_table *table; + struct fib6_info *rt; + int err; - err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { table = fib6_get_table(net, cfg->fc_table); @@ -3794,22 +3774,28 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, } else { table = fib6_new_table(net, cfg->fc_table); } + if (!table) { + err = -ENOBUFS; + goto err; + } - if (!table) - goto out; - - err = -ENOMEM; - rt = fib6_info_alloc(gfp_flags, !nh); - if (!rt) - goto out; + rt = fib6_info_alloc(gfp_flags, !cfg->fc_nh_id); + if (!rt) { + err = -ENOMEM; + goto err; + } rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(rt->fib6_metrics)) { err = PTR_ERR(rt->fib6_metrics); - /* Do not leave garbage there. */ - rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - goto out_free; + goto free; + } + + if (!cfg->fc_nh_id) { + err = fib6_nh_prealloc_percpu(&rt->fib6_nh[0], gfp_flags); + if (err) + goto free_metrics; } if (cfg->fc_flags & RTF_ADDRCONF) @@ -3817,12 +3803,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (cfg->fc_flags & RTF_EXPIRES) fib6_set_expires(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + clock_t_to_jiffies(cfg->fc_expires)); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; - rt->fib6_protocol = cfg->fc_protocol; + rt->fib6_protocol = cfg->fc_protocol; rt->fib6_table = table; rt->fib6_metric = cfg->fc_metric; rt->fib6_type = cfg->fc_type ? : RTN_UNICAST; @@ -3835,23 +3821,51 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - if (nh) { - if (rt->fib6_src.plen) { - NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + return rt; +free_metrics: + ip_fib_metrics_put(rt->fib6_metrics); +free: + kfree(rt); +err: + return ERR_PTR(err); +} + +static int ip6_route_info_create_nh(struct fib6_info *rt, + struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + struct net *net = cfg->fc_nlinfo.nl_net; + struct fib6_nh *fib6_nh; + int err; + + if (cfg->fc_nh_id) { + struct nexthop *nh; + + nh = nexthop_find_by_id(net, cfg->fc_nh_id); + if (!nh) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); goto out_free; } + + err = fib6_check_nexthop(nh, cfg, extack); + if (err) + goto out_free; + if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); err = -ENOENT; goto out_free; } + rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); } else { - err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); + int addr_type; + + err = fib6_nh_init(net, rt->fib6_nh, cfg, GFP_ATOMIC, extack); if (err) - goto out; + goto out_release; fib6_nh = rt->fib6_nh; @@ -3870,21 +3884,20 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; - goto out; + goto out_release; } rt->fib6_prefsrc.addr = cfg->fc_prefsrc; rt->fib6_prefsrc.plen = 128; - } else - rt->fib6_prefsrc.plen = 0; + } - return rt; -out: + return 0; +out_release: fib6_info_release(rt); - return ERR_PTR(err); + return err; out_free: ip_fib_metrics_put(rt->fib6_metrics); kfree(rt); - return ERR_PTR(err); + return err; } int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, @@ -3897,8 +3910,16 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, if (IS_ERR(rt)) return PTR_ERR(rt); + rcu_read_lock(); + + err = ip6_route_info_create_nh(rt, cfg, extack); + if (err) + goto unlock; + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); fib6_info_release(rt); +unlock: + rcu_read_unlock(); return err; } @@ -4125,9 +4146,9 @@ static int ip6_route_del(struct fib6_config *cfg, if (rt->nh) { if (!fib6_info_hold_safe(rt)) continue; - rcu_read_unlock(); - return __ip6_del_rt(rt, &cfg->fc_nlinfo); + err = __ip6_del_rt(rt, &cfg->fc_nlinfo); + break; } if (cfg->fc_nh_id) continue; @@ -4142,13 +4163,13 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (!fib6_info_hold_safe(rt)) continue; - rcu_read_unlock(); /* if gateway was specified only delete the one hop */ if (cfg->fc_flags & RTF_GATEWAY) - return __ip6_del_rt(rt, &cfg->fc_nlinfo); - - return __ip6_del_rt_siblings(rt, cfg); + err = __ip6_del_rt(rt, &cfg->fc_nlinfo); + else + err = __ip6_del_rt_siblings(rt, cfg); + break; } } rcu_read_unlock(); @@ -4482,6 +4503,53 @@ void rt6_purge_dflt_routers(struct net *net) rcu_read_unlock(); } +static int fib6_config_validate(struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); + goto errout; + } + + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto errout; + } + + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto errout; + } + + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto errout; + } + +#ifdef CONFIG_IPV6_SUBTREES + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); + goto errout; + } + + if (cfg->fc_nh_id && cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto errout; + } +#else + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); + goto errout; + } +#endif + return 0; +errout: + return -EINVAL; +} + static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) @@ -4517,9 +4585,12 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) rtmsg_to_fib6_config(net, rtmsg, &cfg); - rtnl_lock(); switch (cmd) { case SIOCADDRT: + err = fib6_config_validate(&cfg, NULL); + if (err) + break; + /* Only do the default setting of fc_metric in route adding */ if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; @@ -4529,7 +4600,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) err = ip6_route_del(&cfg, NULL); break; } - rtnl_unlock(); + return err; } @@ -4619,6 +4690,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, .fc_ignore_dev_down = true, }; struct fib6_info *f6i; + int err; if (anycast) { cfg.fc_type = RTN_ANYCAST; @@ -4629,14 +4701,19 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, extack); - if (!IS_ERR(f6i)) { - f6i->dst_nocount = true; + if (IS_ERR(f6i)) + return f6i; - if (!anycast && - (READ_ONCE(net->ipv6.devconf_all->disable_policy) || - READ_ONCE(idev->cnf.disable_policy))) - f6i->dst_nopolicy = true; - } + err = ip6_route_info_create_nh(f6i, &cfg, extack); + if (err) + return ERR_PTR(err); + + f6i->dst_nocount = true; + + if (!anycast && + (READ_ONCE(net->ipv6.devconf_all->disable_policy) || + READ_ONCE(idev->cnf.disable_policy))) + f6i->dst_nopolicy = true; return f6i; } @@ -5051,12 +5128,61 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_FLOWLABEL] = { .type = NLA_BE32 }, }; +static int rtm_to_fib6_multipath_config(struct fib6_config *cfg, + struct netlink_ext_ack *extack, + bool newroute) +{ + struct rtnexthop *rtnh; + int remaining; + + remaining = cfg->fc_mp_len; + rtnh = (struct rtnexthop *)cfg->fc_mp; + + if (!rtnh_ok(rtnh, remaining)) { + NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - no valid nexthops"); + return -EINVAL; + } + + do { + bool has_gateway = cfg->fc_flags & RTF_GATEWAY; + int attrlen = rtnh_attrlen(rtnh); + + if (attrlen > 0) { + struct nlattr *nla, *attrs; + + attrs = rtnh_attrs(rtnh); + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + if (nla_len(nla) < sizeof(cfg->fc_gateway)) { + NL_SET_ERR_MSG(extack, + "Invalid IPv6 address in RTA_GATEWAY"); + return -EINVAL; + } + + has_gateway = true; + } + } + + if (newroute && (cfg->fc_nh_id || !has_gateway)) { + NL_SET_ERR_MSG(extack, + "Device only routes can not be added for IPv6 using the multipath API."); + return -EINVAL; + } + + rtnh = rtnh_next(rtnh, &remaining); + } while (rtnh_ok(rtnh, remaining)); + + return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, + extack, false); +} + static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rtmsg *rtm; + bool newroute = nlh->nlmsg_type == RTM_NEWROUTE; struct nlattr *tb[RTA_MAX+1]; + struct rtmsg *rtm; unsigned int pref; int err; @@ -5165,9 +5291,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); - err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, - cfg->fc_mp_len, - extack, true); + err = rtm_to_fib6_multipath_config(cfg, extack, newroute); if (err < 0) goto errout; } @@ -5187,7 +5311,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); err = lwtunnel_valid_encap_type(cfg->fc_encap_type, - extack, true); + extack, false); if (err < 0) goto errout; } @@ -5209,31 +5333,132 @@ errout: struct rt6_nh { struct fib6_info *fib6_info; struct fib6_config r_cfg; - struct list_head next; + struct list_head list; + int weight; }; -static int ip6_route_info_append(struct net *net, - struct list_head *rt6_nh_list, - struct fib6_info *rt, - struct fib6_config *r_cfg) +static void ip6_route_mpath_info_cleanup(struct list_head *rt6_nh_list) { - struct rt6_nh *nh; - int err = -EEXIST; + struct rt6_nh *nh, *nh_next; - list_for_each_entry(nh, rt6_nh_list, next) { - /* check if fib6_info already exists */ - if (rt6_duplicate_nexthop(nh->fib6_info, rt)) - return err; + list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) { + struct fib6_info *rt = nh->fib6_info; + + if (rt) { + free_percpu(rt->fib6_nh->nh_common.nhc_pcpu_rth_output); + free_percpu(rt->fib6_nh->rt6i_pcpu); + ip_fib_metrics_put(rt->fib6_metrics); + kfree(rt); + } + + list_del(&nh->list); + kfree(nh); } +} - nh = kzalloc(sizeof(*nh), GFP_KERNEL); - if (!nh) - return -ENOMEM; - nh->fib6_info = rt; - memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); - list_add_tail(&nh->next, rt6_nh_list); +static int ip6_route_mpath_info_create(struct list_head *rt6_nh_list, + struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + struct rtnexthop *rtnh; + int remaining; + int err; + + remaining = cfg->fc_mp_len; + rtnh = (struct rtnexthop *)cfg->fc_mp; + + /* Parse a Multipath Entry and build a list (rt6_nh_list) of + * fib6_info structs per nexthop + */ + while (rtnh_ok(rtnh, remaining)) { + struct fib6_config r_cfg; + struct fib6_info *rt; + struct rt6_nh *nh; + int attrlen; + + nh = kzalloc(sizeof(*nh), GFP_KERNEL); + if (!nh) { + err = -ENOMEM; + goto err; + } + + list_add_tail(&nh->list, rt6_nh_list); + + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + r_cfg.fc_gateway = nla_get_in6_addr(nla); + r_cfg.fc_flags |= RTF_GATEWAY; + } + + r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + if (nla) + r_cfg.fc_encap_type = nla_get_u16(nla); + } + + r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); + + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto err; + } + + nh->fib6_info = rt; + nh->weight = rtnh->rtnh_hops + 1; + memcpy(&nh->r_cfg, &r_cfg, sizeof(r_cfg)); + + rtnh = rtnh_next(rtnh, &remaining); + } return 0; +err: + ip6_route_mpath_info_cleanup(rt6_nh_list); + return err; +} + +static int ip6_route_mpath_info_create_nh(struct list_head *rt6_nh_list, + struct netlink_ext_ack *extack) +{ + struct rt6_nh *nh, *nh_next, *nh_tmp; + LIST_HEAD(tmp); + int err; + + list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) { + struct fib6_info *rt = nh->fib6_info; + + err = ip6_route_info_create_nh(rt, &nh->r_cfg, extack); + if (err) { + nh->fib6_info = NULL; + goto err; + } + + rt->fib6_nh->fib_nh_weight = nh->weight; + + list_move_tail(&nh->list, &tmp); + + list_for_each_entry(nh_tmp, rt6_nh_list, list) { + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh_tmp->fib6_info, rt)) { + err = -EEXIST; + goto err; + } + } + } +out: + list_splice(&tmp, rt6_nh_list); + return err; +err: + ip6_route_mpath_info_cleanup(rt6_nh_list); + goto out; } static void ip6_route_mpath_notify(struct fib6_info *rt, @@ -5287,108 +5512,35 @@ out: return should_notify; } -static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla, - struct netlink_ext_ack *extack) -{ - if (nla_len(nla) < sizeof(*gw)) { - NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY"); - return -EINVAL; - } - - *gw = nla_get_in6_addr(nla); - - return 0; -} - static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; - struct fib6_config r_cfg; - struct rtnexthop *rtnh; - struct fib6_info *rt; - struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; + LIST_HEAD(rt6_nh_list); + struct rt6_nh *err_nh; __u16 nlflags; - int remaining; - int attrlen; - int err = 1; int nhn = 0; - int replace = (cfg->fc_nlinfo.nlh && - (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); - LIST_HEAD(rt6_nh_list); + int replace; + int err; + + replace = (cfg->fc_nlinfo.nlh && + (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) nlflags |= NLM_F_APPEND; - remaining = cfg->fc_mp_len; - rtnh = (struct rtnexthop *)cfg->fc_mp; - - /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * fib6_info structs per nexthop - */ - while (rtnh_ok(rtnh, remaining)) { - memcpy(&r_cfg, cfg, sizeof(*cfg)); - if (rtnh->rtnh_ifindex) - r_cfg.fc_ifindex = rtnh->rtnh_ifindex; - - attrlen = rtnh_attrlen(rtnh); - if (attrlen > 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); - - nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla) { - err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, - extack); - if (err) - goto cleanup; - - r_cfg.fc_flags |= RTF_GATEWAY; - } - r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); - - /* RTA_ENCAP_TYPE length checked in - * lwtunnel_valid_encap_type_attr - */ - nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); - if (nla) - r_cfg.fc_encap_type = nla_get_u16(nla); - } - - r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto cleanup; - } - if (!rt6_qualify_for_ecmp(rt)) { - err = -EINVAL; - NL_SET_ERR_MSG(extack, - "Device only routes can not be added for IPv6 using the multipath API."); - fib6_info_release(rt); - goto cleanup; - } - - rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1; - - err = ip6_route_info_append(info->nl_net, &rt6_nh_list, - rt, &r_cfg); - if (err) { - fib6_info_release(rt); - goto cleanup; - } + err = ip6_route_mpath_info_create(&rt6_nh_list, cfg, extack); + if (err) + return err; - rtnh = rtnh_next(rtnh, &remaining); - } + rcu_read_lock(); - if (list_empty(&rt6_nh_list)) { - NL_SET_ERR_MSG(extack, - "Invalid nexthop configuration - no valid nexthops"); - return -EINVAL; - } + err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack); + if (err) + goto cleanup; /* for add and replace send one notification with all nexthops. * Skip the notification in fib6_add_rt2node and send one with @@ -5402,7 +5554,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, info->skip_notify_kernel = 1; err_nh = NULL; - list_for_each_entry(nh, &rt6_nh_list, next) { + list_for_each_entry(nh, &rt6_nh_list, list) { err = __ip6_ins_rt(nh->fib6_info, info, extack); if (err) { @@ -5470,16 +5622,18 @@ add_errout: ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); /* Delete routes that were already added */ - list_for_each_entry(nh, &rt6_nh_list, next) { + list_for_each_entry(nh, &rt6_nh_list, list) { if (err_nh == nh) break; ip6_route_del(&nh->r_cfg, extack); } cleanup: - list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { + rcu_read_unlock(); + + list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) { fib6_info_release(nh->fib6_info); - list_del(&nh->next); + list_del(&nh->list); kfree(nh); } @@ -5511,21 +5665,15 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, - extack); - if (err) { - last_err = err; - goto next_rtnh; - } - + r_cfg.fc_gateway = nla_get_in6_addr(nla); r_cfg.fc_flags |= RTF_GATEWAY; } } + err = ip6_route_del(&r_cfg, extack); if (err) last_err = err; -next_rtnh: rtnh = rtnh_next(rtnh, &remaining); } @@ -5542,15 +5690,20 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - if (cfg.fc_nh_id && - !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) { - NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); - return -EINVAL; + if (cfg.fc_nh_id) { + rcu_read_lock(); + err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id); + rcu_read_unlock(); + + if (err) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + return -EINVAL; + } } - if (cfg.fc_mp) + if (cfg.fc_mp) { return ip6_route_multipath_del(&cfg, extack); - else { + } else { cfg.fc_delete_all_nh = 1; return ip6_route_del(&cfg, extack); } @@ -5566,6 +5719,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + err = fib6_config_validate(&cfg, extack); + if (err) + return err; + if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; @@ -6030,7 +6187,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, struct rtmsg *rtm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for get route request"); return -EINVAL; @@ -6040,7 +6198,6 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, extack); - rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || @@ -6760,9 +6917,9 @@ static void bpf_iter_unregister(void) static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, - .doit = inet6_rtm_newroute}, + .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE, - .doit = inet6_rtm_delroute}, + .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, }; diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index bbf5b84a70fc..f78ecb6ad838 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -40,7 +40,14 @@ #include <net/seg6_hmac.h> #include <linux/random.h> -static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring); +struct hmac_storage { + local_lock_t bh_lock; + char hmac_ring[SEG6_HMAC_RING_SIZE]; +}; + +static DEFINE_PER_CPU(struct hmac_storage, hmac_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { @@ -187,7 +194,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, */ local_bh_disable(); - ring = this_cpu_ptr(hmac_ring); + local_lock_nested_bh(&hmac_storage.bh_lock); + ring = this_cpu_ptr(hmac_storage.hmac_ring); off = ring; /* source address */ @@ -212,6 +220,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, dgsize = __do_hmac(hinfo, ring, plen, tmp_out, SEG6_HMAC_MAX_DIGESTSIZE); + local_unlock_nested_bh(&hmac_storage.bh_lock); local_bh_enable(); if (dgsize < 0) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ac1dbd492c22..ee5e448cc7a8 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1671,7 +1671,7 @@ static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt, if (!seg6_validate_srh(srh, len, false)) return -EINVAL; - slwt->srh = kmemdup(srh, len, GFP_KERNEL); + slwt->srh = kmemdup(srh, len, GFP_ATOMIC); if (!slwt->srh) return -ENOMEM; @@ -1911,7 +1911,7 @@ static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt, if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME]) return -EINVAL; - slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL); + slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_ATOMIC); if (!slwt->bpf.name) return -ENOMEM; @@ -1994,7 +1994,7 @@ static int parse_nla_counters(struct nlattr **attrs, return -EINVAL; /* counters are always zero initialized */ - pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL); + pcounters = seg6_local_alloc_pcpu_counters(GFP_ATOMIC); if (!pcounters) return -ENOMEM; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 9a0f32acb750..a72dbca9e8fc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1804,8 +1804,7 @@ static struct xfrm_tunnel mplsip_handler __read_mostly = { }; #endif -static void __net_exit sit_destroy_tunnels(struct net *net, - struct list_head *head) +static void __net_exit sit_exit_rtnl_net(struct net *net, struct list_head *head) { struct sit_net *sitn = net_generic(net, sit_net_id); struct net_device *dev, *aux; @@ -1820,15 +1819,15 @@ static void __net_exit sit_destroy_tunnels(struct net *net, for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; - t = rtnl_dereference(sitn->tunnels[prio][h]); + t = rtnl_net_dereference(net, sitn->tunnels[prio][h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) - unregister_netdevice_queue(t->dev, - head); - t = rtnl_dereference(t->next); + unregister_netdevice_queue(t->dev, head); + + t = rtnl_net_dereference(net, t->next); } } } @@ -1881,19 +1880,9 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list, - struct list_head *dev_to_kill) -{ - struct net *net; - - ASSERT_RTNL(); - list_for_each_entry(net, net_list, exit_list) - sit_destroy_tunnels(net, dev_to_kill); -} - static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit_batch_rtnl = sit_exit_batch_rtnl, + .exit_rtnl = sit_exit_rtnl_net, .id = &sit_net_id, .size = sizeof(struct sit_net), }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b03c223eda4f..e8e68a142649 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -267,6 +267,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) + fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); @@ -1970,7 +1972,8 @@ do_time_wait: goto csum_error; } - tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, + &drop_reason); switch (tw_status) { case TCP_TW_SYN: { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 024458ef163c..7317f8e053f1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> +#include <net/udp_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> @@ -1825,6 +1826,7 @@ void udpv6_destroy_sock(struct sock *sk) if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); + udp_tunnel_cleanup_gro(sk); } } } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 404212dfc99a..d8445ac1b2e4 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -118,8 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct ipv6hdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); + struct sock *sk; int iif, sdif; + sk = udp_tunnel_sk(net, true); + if (sk && dport == htons(sk->sk_num)) + return sk; + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9f683f838431..9017b98fea04 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -146,8 +146,8 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; + struct ieee80211_bss_conf *old; - sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; @@ -156,14 +156,26 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_AP || !params->tx_wdev) return -EINVAL; + old = sdata_dereference(link_conf->tx_bss_conf, sdata); + if (old) + return -EALREADY; + tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params->tx_wdev); if (!tx_sdata) return -EINVAL; if (tx_sdata == sdata) { - sdata->vif.mbssid_tx_vif = &sdata->vif; + rcu_assign_pointer(link_conf->tx_bss_conf, link_conf); } else { - sdata->vif.mbssid_tx_vif = &tx_sdata->vif; + struct ieee80211_bss_conf *tx_bss_conf; + + tx_bss_conf = sdata_dereference(tx_sdata->vif.link_conf[params->tx_link_id], + sdata); + if (rcu_access_pointer(tx_bss_conf->tx_bss_conf) != tx_bss_conf) + return -EINVAL; + + rcu_assign_pointer(link_conf->tx_bss_conf, tx_bss_conf); + link_conf->nontransmitted = true; link_conf->bssid_index = params->index; } @@ -1669,7 +1681,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; - sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; @@ -1683,6 +1694,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, ieee80211_free_key_list(local, &keys); } + ieee80211_stop_mbssid(sdata); + RCU_INIT_POINTER(link_conf->tx_bss_conf, NULL); + link_conf->enable_beacon = false; sdata->beacon_rate_set = false; sdata->vif.cfg.ssid_len = 0; @@ -2066,6 +2080,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; + if (params->eml_cap_present) + sta->sta.eml_cap = params->eml_cap; + ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY, ¶ms->link_sta_params); if (ret) @@ -3700,6 +3717,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; + struct ieee80211_bss_conf *tx_bss_conf; struct ieee80211_link_data *link_data; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) @@ -3713,25 +3731,24 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) return; } - /* TODO: MBSSID with MLO changes */ - if (vif->mbssid_tx_vif == vif) { + tx_bss_conf = rcu_dereference(link_data->conf->tx_bss_conf); + if (tx_bss_conf == link_data->conf) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on * transmitting interface */ - struct ieee80211_sub_if_data *iter; - - list_for_each_entry_rcu(iter, &local->interfaces, list) { - if (!ieee80211_sdata_running(iter)) - continue; + struct ieee80211_link_data *iter; - if (iter == sdata || iter->vif.mbssid_tx_vif != vif) + for_each_sdata_link(local, iter) { + if (iter->sdata == sdata || + rcu_access_pointer(iter->conf->tx_bss_conf) != tx_bss_conf) continue; - wiphy_work_queue(iter->local->hw.wiphy, - &iter->deflink.csa.finalize_work); + wiphy_work_queue(iter->sdata->local->hw.wiphy, + &iter->csa.finalize_work); } } + wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work); rcu_read_unlock(); @@ -4833,17 +4850,19 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link, ieee80211_link_info_change_notify(sdata, link, changed); - if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { - struct ieee80211_sub_if_data *child; + if (!link->conf->nontransmitted && + rcu_access_pointer(link->conf->tx_bss_conf)) { + struct ieee80211_link_data *tmp; - list_for_each_entry(child, &sdata->local->interfaces, list) { - if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { - child->vif.bss_conf.he_bss_color.color = color; - child->vif.bss_conf.he_bss_color.enabled = enable; - ieee80211_link_info_change_notify(child, - &child->deflink, - BSS_CHANGED_HE_BSS_COLOR); - } + for_each_sdata_link(sdata->local, tmp) { + if (tmp->sdata == sdata || + rcu_access_pointer(tmp->conf->tx_bss_conf) != link->conf) + continue; + + tmp->conf->he_bss_color.color = color; + tmp->conf->he_bss_color.enabled = enable; + ieee80211_link_info_change_notify(tmp->sdata, tmp, + BSS_CHANGED_HE_BSS_COLOR); } } } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index c3bfac58151f..3aaf5abf1acc 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -2131,6 +2131,9 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + return; + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (rcu_access_pointer(link->conf->chanctx_conf)) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a8948f4d983e..49061bd4151b 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -152,12 +152,6 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, p += scnprintf(p, bufsz + buf - p, - "target %uus interval %uus ecn %s\n", - codel_time_to_us(sta->cparams.target), - codel_time_to_us(sta->cparams.interval), - sta->cparams.ecn ? "yes" : "no"); - p += scnprintf(p, - bufsz + buf - p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 4246d168374f..a6e7b7ba6a01 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -48,7 +48,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u8 *pos; struct ieee80211_supported_band *sband; - u32 rate_flags, rates = 0, rates_added = 0; + u32 rates = 0, rates_added = 0; struct beacon_data *presp; int frame_len; @@ -90,14 +90,11 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, pos += ifibss->ssid_len; sband = local->hw.wiphy->bands[chandef->chan->band]; - rate_flags = ieee80211_chandef_rate_flags(chandef); rates_n = 0; if (have_higher_than_11mbit) *have_higher_than_11mbit = false; for (i = 0; i < sband->n_bitrates; i++) { - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; if (sband->bitrates[i].bitrate > 110 && have_higher_than_11mbit) *have_higher_than_11mbit = true; @@ -395,7 +392,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; enum nl80211_channel_type chan_type; u64 tsf; - u32 rate_flags; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -429,7 +425,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; - rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef); basic_rates = 0; @@ -439,9 +434,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, for (j = 0; j < sband->n_bitrates; j++) { int brate; - if ((rate_flags & sband->bitrates[j].flags) - != rate_flags) - continue; brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, 5); if (brate == rate) { @@ -1717,12 +1709,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { u64 changed = 0; - u32 rate_flags; - struct ieee80211_supported_band *sband; enum ieee80211_chanctx_mode chanmode; struct ieee80211_local *local = sdata->local; int radar_detect_width = 0; - int i; int ret; lockdep_assert_wiphy(local->hw.wiphy); @@ -1765,12 +1754,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ - rate_flags = ieee80211_chandef_rate_flags(¶ms->chandef); - sband = local->hw.wiphy->bands[params->chandef.chan->band]; - for (i = 0; i < sband->n_bitrates; i++) { - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - sdata->u.ibss.basic_rates &= ~BIT(i); - } memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, sizeof(params->mcast_rate)); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fb05f3cd37ec..30809f0b35f7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1226,6 +1226,15 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) if ((_link = wiphy_dereference((_local)->hw.wiphy, \ ___sdata->link[___link_id]))) +#define for_each_link_data(sdata, __link) \ + struct ieee80211_sub_if_data *__sdata = sdata; \ + for (int __link_id = 0; \ + __link_id < ARRAY_SIZE((__sdata)->link); __link_id++) \ + if ((!(__sdata)->vif.valid_links || \ + (__sdata)->vif.valid_links & BIT(__link_id)) && \ + ((__link) = sdata_dereference((__sdata)->link[__link_id], \ + (__sdata)))) + static inline int ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, struct cfg80211_rnr_elems *rnr_elems, @@ -2078,6 +2087,9 @@ static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata ieee80211_vif_set_links(sdata, 0, 0); } +void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata); +void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata); + /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); void ieee80211_tx_pending(struct tasklet_struct *t); @@ -2613,7 +2625,7 @@ void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata, /* element building in SKBs */ int ieee80211_put_srates_elem(struct sk_buff *skb, const struct ieee80211_supported_band *sband, - u32 basic_rates, u32 rate_flags, u32 masked_rates, + u32 basic_rates, u32 masked_rates, u8 element_id); int ieee80211_put_he_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, @@ -2795,6 +2807,8 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata); + #if IS_ENABLED(CONFIG_MAC80211_KUNIT_TEST) #define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym) #define VISIBLE_IF_MAC80211_KUNIT diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 969b3e2c496a..7c27f3cd841c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -485,6 +485,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do case NL80211_IFTYPE_MONITOR: list_del_rcu(&sdata->u.mntr.list); break; + case NL80211_IFTYPE_AP_VLAN: + ieee80211_apvlan_link_clear(sdata); + break; default: break; } @@ -729,30 +732,59 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_add_virtual_monitor(local); } -static void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata) +void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *tx_sdata, *non_tx_sdata, *tmp_sdata; - struct ieee80211_vif *tx_vif = sdata->vif.mbssid_tx_vif; + struct ieee80211_sub_if_data *tx_sdata; + struct ieee80211_bss_conf *link_conf, *tx_bss_conf; + struct ieee80211_link_data *tx_link, *link; + unsigned int link_id; - if (!tx_vif) - return; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + /* Check if any of the links of current sdata is an MBSSID. */ + for_each_vif_active_link(&sdata->vif, link_conf, link_id) { + tx_bss_conf = sdata_dereference(link_conf->tx_bss_conf, sdata); + if (!tx_bss_conf) + continue; + + tx_sdata = vif_to_sdata(tx_bss_conf->vif); + RCU_INIT_POINTER(link_conf->tx_bss_conf, NULL); - tx_sdata = vif_to_sdata(tx_vif); - sdata->vif.mbssid_tx_vif = NULL; + /* If we are not tx sdata reset tx sdata's tx_bss_conf to avoid recusrion + * while closing tx sdata at the end of outer loop below. + */ + if (sdata != tx_sdata) { + tx_link = sdata_dereference(tx_sdata->link[tx_bss_conf->link_id], + tx_sdata); + if (!tx_link) + continue; - list_for_each_entry_safe(non_tx_sdata, tmp_sdata, - &tx_sdata->local->interfaces, list) { - if (non_tx_sdata != sdata && non_tx_sdata != tx_sdata && - non_tx_sdata->vif.mbssid_tx_vif == tx_vif && - ieee80211_sdata_running(non_tx_sdata)) { - non_tx_sdata->vif.mbssid_tx_vif = NULL; - dev_close(non_tx_sdata->wdev.netdev); + RCU_INIT_POINTER(tx_link->conf->tx_bss_conf, NULL); } - } - if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) { - tx_sdata->vif.mbssid_tx_vif = NULL; - dev_close(tx_sdata->wdev.netdev); + /* loop through sdatas to find if any of their links + * belong to same MBSSID set as the one getting deleted. + */ + for_each_sdata_link(tx_sdata->local, link) { + struct ieee80211_sub_if_data *link_sdata = link->sdata; + + if (link_sdata == sdata || link_sdata == tx_sdata || + rcu_access_pointer(link->conf->tx_bss_conf) != tx_bss_conf) + continue; + + RCU_INIT_POINTER(link->conf->tx_bss_conf, NULL); + + /* Remove all links of matching MLD until dynamic link + * removal can be supported. + */ + cfg80211_stop_iface(link_sdata->wdev.wiphy, &link_sdata->wdev, + GFP_KERNEL); + } + + /* If we are not tx sdata, remove links of tx sdata and proceed */ + if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) + cfg80211_stop_iface(tx_sdata->wdev.wiphy, + &tx_sdata->wdev, GFP_KERNEL); } } @@ -760,21 +792,25 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* close dependent VLAN and MBSSID interfaces before locking wiphy */ + /* close dependent VLAN interfaces before locking wiphy */ if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmpsdata; list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, u.vlan.list) dev_close(vlan->dev); - - ieee80211_stop_mbssid(sdata); } guard(wiphy)(sdata->local->hw.wiphy); wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work); + /* Close the dependent MBSSID interfaces with wiphy lock as we may be + * terminating its partner links too in case of MLD. + */ + if (sdata->vif.type == NL80211_IFTYPE_AP) + ieee80211_stop_mbssid(sdata); + ieee80211_do_stop(sdata, true); return 0; @@ -1268,6 +1304,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->crypto_tx_tailroom_needed_cnt += master->crypto_tx_tailroom_needed_cnt; + ieee80211_apvlan_link_setup(sdata); + break; } case NL80211_IFTYPE_AP: @@ -1324,7 +1362,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_AP_VLAN: /* no need to tell driver, but set carrier and chanctx */ if (sdata->bss->active) { - ieee80211_link_vlan_copy_chanctx(&sdata->deflink); + struct ieee80211_link_data *link; + + for_each_link_data(sdata, link) { + ieee80211_link_vlan_copy_chanctx(link); + } + netif_carrier_on(dev); ieee80211_set_vif_encap_ops(sdata); } else { diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 58a76bcd6ae6..d40c2bd3b50b 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -12,6 +12,71 @@ #include "key.h" #include "debugfs_netdev.h" +static void ieee80211_update_apvlan_links(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *vlan; + struct ieee80211_link_data *link; + u16 ap_bss_links = sdata->vif.valid_links; + u16 new_links, vlan_links; + unsigned long add; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { + int link_id; + + if (!vlan) + continue; + + /* No support for 4addr with MLO yet */ + if (vlan->wdev.use_4addr) + return; + + vlan_links = vlan->vif.valid_links; + + new_links = ap_bss_links; + + add = new_links & ~vlan_links; + if (!add) + continue; + + ieee80211_vif_set_links(vlan, add, 0); + + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + link = sdata_dereference(vlan->link[link_id], vlan); + ieee80211_link_vlan_copy_chanctx(link); + } + } +} + +void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *ap_bss = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + u16 new_links = ap_bss->vif.valid_links; + unsigned long add; + int link_id; + + if (!ap_bss->vif.valid_links) + return; + + add = new_links; + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + sdata->wdev.valid_links |= BIT(link_id); + ether_addr_copy(sdata->wdev.links[link_id].addr, + ap_bss->wdev.links[link_id].addr); + } + + ieee80211_vif_set_links(sdata, new_links, 0); +} + +void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata) +{ + if (!sdata->wdev.valid_links) + return; + + sdata->wdev.valid_links = 0; + ieee80211_vif_clear_links(sdata); +} + void ieee80211_link_setup(struct ieee80211_link_data *link) { if (link->sdata->vif.type == NL80211_IFTYPE_STATION) @@ -31,6 +96,17 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); rcu_assign_pointer(sdata->link[link_id], link); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + struct ieee80211_sub_if_data *ap_bss; + struct ieee80211_bss_conf *ap_bss_conf; + + ap_bss = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], + ap_bss); + memcpy(link_conf, ap_bss_conf, sizeof(*link_conf)); + } + link->sdata = sdata; link->link_id = link_id; link->conf = link_conf; @@ -54,6 +130,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (!deflink) { switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: ether_addr_copy(link_conf->addr, sdata->wdev.links[link_id].addr); link_conf->bssid = link_conf->addr; @@ -177,6 +254,7 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: /* in an AP all links are always active */ sdata->vif.active_links = valid_links; @@ -278,12 +356,16 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_set_vif_links_bitmaps(sdata, new_links, dormant_links); /* tell the driver */ - ret = drv_change_vif_links(sdata->local, sdata, - old_links & old_active, - new_links & sdata->vif.active_links, - old); + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN) + ret = drv_change_vif_links(sdata->local, sdata, + old_links & old_active, + new_links & sdata->vif.active_links, + old); if (!new_links) ieee80211_debugfs_recreate_netdev(sdata, false); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + ieee80211_update_apvlan_links(sdata); } if (ret) { diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 7257f5610af5..a381b4b756ea 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/unaligned.h> +#include <net/sock.h> #include "ieee80211_i.h" #include "mesh.h" #include "wme.h" @@ -776,7 +777,7 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, if (ethertype < ETH_P_802_3_MIN) return false; - if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + if (skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS)) return false; if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -956,13 +957,10 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); - u32 rate_flags; sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); sband = ieee80211_get_sband(sdata); - rate_flags = - ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata); @@ -1091,7 +1089,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) if (ieee80211_put_srates_elem(skb, sband, sdata->vif.bss_conf.basic_rates, - rate_flags, 0, WLAN_EID_SUPP_RATES) || + 0, WLAN_EID_SUPP_RATES) || mesh_add_ds_params_ie(sdata, skb)) goto out_free; @@ -1104,7 +1102,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) if (ieee80211_put_srates_elem(skb, sband, sdata->vif.bss_conf.basic_rates, - rate_flags, 0, WLAN_EID_EXT_SUPP_RATES) || + 0, WLAN_EID_EXT_SUPP_RATES) || mesh_add_rsn_ie(sdata, skb) || mesh_add_ht_cap_ie(sdata, skb) || mesh_add_ht_oper_ie(sdata, skb) || diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 9f9cb5af0a97..0319674be832 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -22,7 +22,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); + return jhash_1word(get_unaligned((u32 *)((u8 *)addr + 2)), seed); } static const struct rhashtable_params mesh_rht_params = { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 96e0a861886a..9c6a2b342170 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -264,7 +264,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action != WLAN_SP_MESH_PEERING_CLOSE) { struct ieee80211_supported_band *sband; - u32 rate_flags, basic_rates; + u32 basic_rates; sband = ieee80211_get_sband(sdata); if (!sband) { @@ -280,16 +280,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, put_unaligned_le16(sta->sta.aid, pos); } - rate_flags = - ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); basic_rates = sdata->vif.bss_conf.basic_rates; if (ieee80211_put_srates_elem(skb, sband, basic_rates, - rate_flags, 0, - WLAN_EID_SUPP_RATES) || + 0, WLAN_EID_SUPP_RATES) || ieee80211_put_srates_elem(skb, sband, basic_rates, - rate_flags, 0, - WLAN_EID_EXT_SUPP_RATES) || + 0, WLAN_EID_EXT_SUPP_RATES) || mesh_add_rsn_ie(sdata, skb) || mesh_add_meshid_ie(sdata, skb) || mesh_add_meshconf_ie(sdata, skb)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 35eaf0812c5b..b84150dbfe8c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1525,9 +1525,9 @@ static void ieee80211_assoc_add_rates(struct ieee80211_local *local, rates = ~0; } - ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates, + ieee80211_put_srates_elem(skb, sband, 0, ~rates, WLAN_EID_SUPP_RATES); - ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates, + ieee80211_put_srates_elem(skb, sband, 0, ~rates, WLAN_EID_EXT_SUPP_RATES); } diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 6da39c864f45..96584b39215e 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -1101,7 +1101,6 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates) { - u32 rate_flags = ieee80211_chanwidth_rate_flags(width); struct ieee80211_rate *br; int brate, rate, i, j, count = 0; @@ -1112,8 +1111,6 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, for (j = 0; j < sband->n_bitrates; j++) { br = &sband->bitrates[j]; - if ((rate_flags & br->flags) != rate_flags) - continue; brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 0d056db9f81e..3cb2ad6d0b28 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -368,9 +368,8 @@ static void __rate_control_send_low(struct ieee80211_hw *hw, struct ieee80211_tx_info *info, u32 rate_mask) { + u32 rate_flags = 0; int i; - u32 rate_flags = - ieee80211_chandef_rate_flags(&hw->conf.chandef); if (sband->band == NL80211_BAND_S1GHZ) { info->control.rates[0].flags |= IEEE80211_TX_RC_S1G_MCS; @@ -778,14 +777,9 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], u16 vht_mask[NL80211_VHT_NSS_MAX]) { - u32 i, flags; + u32 i; *mask = sdata->rc_rateidx_mask[sband->band]; - flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); - for (i = 0; i < sband->n_bitrates; i++) { - if ((flags & sband->bitrates[i].flags) != flags) - *mask &= ~BIT(i); - } if (*mask == (1 << sband->n_bitrates) - 1 && !sdata->rc_has_mcs_mask[sband->band] && @@ -990,8 +984,6 @@ int rate_control_set_rates(struct ieee80211_hw *hw, if (sta->uploaded) drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); - ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); - return 0; } EXPORT_SYMBOL(rate_control_set_rates); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 706cbc99f718..f66910013218 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1873,16 +1873,13 @@ minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) static void minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband, - const s16 *bitrates, int n_rates, u32 rate_flags) + const s16 *bitrates, int n_rates) { int i, j; for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; - for (j = 0; j < n_rates; j++) { if (rate->bitrate != bitrates[j]) continue; @@ -1898,7 +1895,6 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp) { static const s16 bitrates[4] = { 10, 20, 55, 110 }; struct ieee80211_supported_band *sband; - u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates)); sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; @@ -1908,8 +1904,7 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp) BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->cck_rates, sband, minstrel_cck_bitrates, - ARRAY_SIZE(minstrel_cck_bitrates), - rate_flags); + ARRAY_SIZE(minstrel_cck_bitrates)); } static void @@ -1917,7 +1912,6 @@ minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) { static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; struct ieee80211_supported_band *sband; - u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band])); sband = mp->hw->wiphy->bands[band]; @@ -1927,8 +1921,7 @@ minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband, minstrel_ofdm_bitrates, - ARRAY_SIZE(minstrel_ofdm_bitrates), - rate_flags); + ARRAY_SIZE(minstrel_ofdm_bitrates)); } static void * diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index c6015cd00372..7422888d3640 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -147,14 +147,14 @@ validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u32 control_freq, center_freq1, center_freq2; enum nl80211_chan_width chan_width; - struct { - struct ieee80211_he_operation _oper; - struct ieee80211_he_6ghz_oper _6ghz_oper; - } __packed he; - struct { - struct ieee80211_eht_operation _oper; - struct ieee80211_eht_operation_info _oper_info; - } __packed eht; + DEFINE_RAW_FLEX(struct ieee80211_he_operation, he, optional, + sizeof(struct ieee80211_he_6ghz_oper)); + struct ieee80211_he_6ghz_oper *_6ghz_oper = + (struct ieee80211_he_6ghz_oper *)he->optional; + DEFINE_RAW_FLEX(struct ieee80211_eht_operation, eht, optional, + sizeof(struct ieee80211_eht_operation_info)); + struct ieee80211_eht_operation_info *_oper_info = + (struct ieee80211_eht_operation_info *)eht->optional; const struct ieee80211_eht_operation *eht_oper; if (conn->mode < IEEE80211_CONN_MODE_HE) { @@ -167,38 +167,38 @@ validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, center_freq2 = chandef->center_freq2; chan_width = chandef->width; - he._oper.he_oper_params = + he->he_oper_params = le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO); - he._6ghz_oper.primary = + _6ghz_oper->primary = ieee80211_frequency_to_channel(control_freq); - he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1); - he._6ghz_oper.ccfs1 = center_freq2 ? + _6ghz_oper->ccfs0 = ieee80211_frequency_to_channel(center_freq1); + _6ghz_oper->ccfs1 = center_freq2 ? ieee80211_frequency_to_channel(center_freq2) : 0; switch (chan_width) { case NL80211_CHAN_WIDTH_320: - he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; - he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16; - he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; + _6ghz_oper->ccfs1 = _6ghz_oper->ccfs0; + _6ghz_oper->ccfs0 += control_freq < center_freq1 ? -16 : 16; + _6ghz_oper->control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; break; case NL80211_CHAN_WIDTH_160: - he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; - he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8; + _6ghz_oper->ccfs1 = _6ghz_oper->ccfs0; + _6ghz_oper->ccfs0 += control_freq < center_freq1 ? -8 : 8; fallthrough; case NL80211_CHAN_WIDTH_80P80: - he._6ghz_oper.control = + _6ghz_oper->control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; break; case NL80211_CHAN_WIDTH_80: - he._6ghz_oper.control = + _6ghz_oper->control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ; break; case NL80211_CHAN_WIDTH_40: - he._6ghz_oper.control = + _6ghz_oper->control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ; break; default: - he._6ghz_oper.control = + _6ghz_oper->control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ; break; } @@ -206,15 +206,14 @@ validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, if (conn->mode < IEEE80211_CONN_MODE_EHT) { eht_oper = NULL; } else { - eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT; - eht._oper_info.control = he._6ghz_oper.control; - eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0; - eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1; - eht_oper = &eht._oper; + eht->params = IEEE80211_EHT_OPER_INFO_PRESENT; + _oper_info->control = _6ghz_oper->control; + _oper_info->ccfs0 = _6ghz_oper->ccfs0; + _oper_info->ccfs1 = _6ghz_oper->ccfs1; + eht_oper = eht; } - if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper, - eht_oper, chandef)) + if (!ieee80211_chandef_he_6ghz_oper(local, he, eht_oper, chandef)) chandef->chan = NULL; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 248e1f63bf73..84b18be1f0b1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -18,7 +18,6 @@ #include <linux/timer.h> #include <linux/rtnetlink.h> -#include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -701,12 +700,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } - sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; - sta->cparams.target = MS2TIME(20); - sta->cparams.interval = MS2TIME(100); - sta->cparams.ecn = true; - sta->cparams.ce_threshold_selector = 0; - sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); @@ -2905,27 +2898,6 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) return sta->deflink.status_stats.last_ack; } -static void sta_update_codel_params(struct sta_info *sta, u32 thr) -{ - if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { - sta->cparams.target = MS2TIME(50); - sta->cparams.interval = MS2TIME(300); - sta->cparams.ecn = false; - } else { - sta->cparams.target = MS2TIME(20); - sta->cparams.interval = MS2TIME(100); - sta->cparams.ecn = true; - } -} - -void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, - u32 thr) -{ - struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - - sta_update_codel_params(sta, thr); -} - int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 07b7ec39a52f..7a95d8d34fca 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -466,14 +466,6 @@ struct ieee80211_fragment_cache { unsigned int next; }; -/* - * The bandwidth threshold below which the per-station CoDel parameters will be - * scaled to be more lenient (to prevent starvation of slow stations). This - * value will be scaled by the number of active stations when it is being - * applied. - */ -#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ - /** * struct link_sta_info - Link STA information * All link specific sta info are stored here for reference. This can be @@ -626,7 +618,6 @@ struct link_sta_info { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @rcu_head: RCU head used for freeing this station struct - * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: * @@ -717,8 +708,6 @@ struct sta_info { struct dentry *debugfs_dir; #endif - struct codel_params cparams; - u8 reserved_tid; s8 amsdu_mesh_control; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 2f92e7c7f203..94714f8ffd22 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -382,8 +382,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, if (WARN_ON_ONCE(!sband)) return; - ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_SUPP_RATES); - ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_EXT_SUPP_RATES); + ieee80211_put_srates_elem(skb, sband, 0, 0, WLAN_EID_SUPP_RATES); + ieee80211_put_srates_elem(skb, sband, 0, 0, WLAN_EID_EXT_SUPP_RATES); ieee80211_tdls_add_supp_channels(sdata, skb); /* add any custom IEs that go before Extended Capabilities */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20179db88c4a..3b9392a6ddb2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -26,6 +26,7 @@ #include <net/codel_impl.h> #include <linux/unaligned.h> #include <net/fq_impl.h> +#include <net/sock.h> #include <net/gso.h> #include "ieee80211_i.h" @@ -49,19 +50,11 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct ieee80211_supported_band *sband; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_chanctx_conf *chanctx_conf; - u32 rate_flags = 0; /* assume HW handles this */ if (tx->rate.flags & (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS)) return 0; - rcu_read_lock(); - chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf); - if (chanctx_conf) - rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); - rcu_read_unlock(); - /* uh huh? */ if (WARN_ON_ONCE(tx->rate.idx < 0)) return 0; @@ -138,9 +131,6 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (r->bitrate > txrate->bitrate) break; - if ((rate_flags & r->flags) != rate_flags) - continue; - if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) rate = r->bitrate; @@ -1402,16 +1392,9 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); + cparams = &local->cparams; cstats = &txqi->cstats; - if (txqi->txq.sta) { - struct sta_info *sta = container_of(txqi->txq.sta, - struct sta_info, sta); - cparams = &sta->cparams; - } else { - cparams = &local->cparams; - } - if (flow == &tin->default_flow) cvars = &txqi->def_cvars; else @@ -2876,8 +2859,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } if (unlikely(!multicast && - ((skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || + ((skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS)) || ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) info_id = ieee80211_store_ack_skb(local, skb, &info_flags, cookie); @@ -3774,7 +3756,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return false; /* don't handle TX status request here either */ - if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + if (skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS)) return false; if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { @@ -4526,8 +4508,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL); } else if (ieee80211_vif_is_mld(&sdata->vif) && - sdata->vif.type == NL80211_IFTYPE_AP && - !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) { + ((sdata->vif.type == NL80211_IFTYPE_AP && + !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) || + (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + !sdata->wdev.use_4addr))) { ieee80211_mlo_multicast_tx(dev, skb); } else { normal: @@ -4664,8 +4648,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, memcpy(IEEE80211_SKB_CB(seg), info, sizeof(*info)); } - if (unlikely(skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) { + if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) { info->status_data = ieee80211_store_ack_skb(local, skb, &info->flags, NULL); if (info->status_data) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dec6e16b8c7d..27d414efa3fd 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1204,7 +1204,6 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, struct ieee80211_supported_band *sband; int i, err; size_t noffset; - u32 rate_flags; bool have_80mhz = false; *offset = 0; @@ -1213,13 +1212,11 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, if (WARN_ON_ONCE(!sband)) return 0; - rate_flags = ieee80211_chandef_rate_flags(chandef); - /* For direct scan add S1G IE and consider its override bits */ if (band == NL80211_BAND_S1GHZ) return ieee80211_put_s1g_cap(skb, &sband->s1g_cap); - err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, + err = ieee80211_put_srates_elem(skb, sband, 0, ~rate_mask, WLAN_EID_SUPP_RATES); if (err) return err; @@ -1241,7 +1238,7 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, *offset = noffset; } - err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, + err = ieee80211_put_srates_elem(skb, sband, 0, ~rate_mask, WLAN_EID_EXT_SUPP_RATES); if (err) return err; @@ -1522,16 +1519,13 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, { struct ieee80211_supported_band *sband; size_t num_rates; - u32 supp_rates, rate_flags; + u32 supp_rates; int i, j; sband = sdata->local->hw.wiphy->bands[band]; if (WARN_ON(!sband)) return 1; - rate_flags = - ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); - num_rates = sband->n_bitrates; supp_rates = 0; for (i = 0; i < elems->supp_rates_len + @@ -1551,12 +1545,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, continue; for (j = 0; j < num_rates; j++) { - int brate; - if ((rate_flags & sband->bitrates[j].flags) - != rate_flags) - continue; - - brate = sband->bitrates[j].bitrate; + int brate = sband->bitrates[j].bitrate; if (brate == own_rate) { supp_rates |= BIT(j); @@ -3223,15 +3212,13 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, int ieee80211_put_srates_elem(struct sk_buff *skb, const struct ieee80211_supported_band *sband, - u32 basic_rates, u32 rate_flags, u32 masked_rates, + u32 basic_rates, u32 masked_rates, u8 element_id) { u8 i, rates, skip; rates = 0; for (i = 0; i < sband->n_bitrates; i++) { - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; if (masked_rates & BIT(i)) continue; rates++; @@ -3257,8 +3244,6 @@ int ieee80211_put_srates_elem(struct sk_buff *skb, int rate; u8 basic; - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; if (masked_rates & BIT(i)) continue; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1f63b32d76d6..d536c97144e9 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2095,12 +2095,12 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct rtmsg *rtm; int err, i; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request"); return -EINVAL; } - rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { @@ -2288,7 +2288,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, struct rtmsg *rtm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for get route request"); return -EINVAL; @@ -2298,7 +2299,6 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, extack); - rtm = nlmsg_data(nlh); if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 19eb9292bd60..0c24545f0e8d 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -28,6 +28,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinRejected", MPTCP_MIB_JOINREJECTED), SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 128282982843..250c6b77977e 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -23,6 +23,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ + MPTCP_MIB_JOINREJECTED, /* The PM rejected the JOIN request */ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 31747f974941..1306d4dc287b 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -151,10 +151,13 @@ bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; + bool ret; entry = mptcp_pm_del_add_timer(msk, addr, false); + ret = entry; kfree(entry); - return entry; + + return ret; } bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 44f7ab463d75..0749733ea897 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -46,7 +46,9 @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm static void __mptcp_destroy_sock(struct sock *sk); static void mptcp_check_send_data_fin(struct sock *sk); -DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); +DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static struct net_device *mptcp_napi_dev; /* Returns end sequence number of the receiver's advertised window */ @@ -3142,9 +3144,9 @@ static int mptcp_disconnect(struct sock *sk, int flags) #if IS_ENABLED(CONFIG_MPTCP_IPV6) static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) { - unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo); + struct mptcp6_sock *msk6 = container_of(mptcp_sk(sk), struct mptcp6_sock, msk); - return (struct ipv6_pinfo *)(((u8 *)sk) + offset); + return &msk6->np; } static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) @@ -3527,8 +3529,10 @@ bool mptcp_finish_join(struct sock *ssk) return true; } - if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_JOINREJECTED); goto err_prohibited; + } /* If we can't acquire msk socket lock here, let the release callback * handle it diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d409586b5977..3dd11dd3ba16 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -479,6 +479,7 @@ mptcp_subflow_rsk(const struct request_sock *rsk) struct mptcp_delegated_action { struct napi_struct napi; + local_lock_t bh_lock; struct list_head head; }; @@ -670,9 +671,11 @@ static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) return; + local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); delegated = this_cpu_ptr(&mptcp_delegated_actions); schedule = list_empty(&delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head); + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); sock_hold(mptcp_subflow_tcp_sock(subflow)); if (schedule) napi_schedule(&delegated->napi); @@ -684,11 +687,15 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) { struct mptcp_subflow_context *ret; - if (list_empty(&delegated->head)) + local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); + if (list_empty(&delegated->head)) { + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return NULL; + } ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); list_del_init(&ret->delegated_node); + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return ret; } @@ -744,6 +751,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); struct mptcp_sched_ops *mptcp_sched_find(const char *name); +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_sched_init(void); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index c16c6fbd4ba2..1e59072d478c 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -16,8 +16,7 @@ static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); -static int mptcp_sched_default_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int mptcp_sched_default_get_send(struct mptcp_sock *msk) { struct sock *ssk; @@ -29,8 +28,7 @@ static int mptcp_sched_default_get_send(struct mptcp_sock *msk, return 0; } -static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk) { struct sock *ssk; @@ -84,10 +82,23 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) rcu_read_unlock(); } -int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched) { - if (!sched->get_send) + if (!sched->get_send) { + pr_err("%s does not implement required ops\n", sched->name); return -EINVAL; + } + + return 0; +} + +int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +{ + int ret; + + ret = mptcp_validate_scheduler(sched); + if (ret) + return ret; spin_lock(&mptcp_sched_list_lock); if (mptcp_sched_find(sched->name)) { @@ -157,7 +168,6 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -178,14 +188,13 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) } if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_send(msk, data); - return msk->sched->get_send(msk, data); + return mptcp_sched_default_get_send(msk); + return msk->sched->get_send(msk); } int mptcp_sched_get_retrans(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -199,8 +208,8 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) } if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_retrans(msk, data); + return mptcp_sched_default_get_retrans(msk); if (msk->sched->get_retrans) - return msk->sched->get_retrans(msk, data); - return msk->sched->get_send(msk, data); + return msk->sched->get_retrans(msk); + return msk->sched->get_send(msk); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 24c2de1891bd..15613d691bfe 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -247,6 +247,7 @@ again: if (unlikely(req->syncookie)) { if (!mptcp_can_accept_new_subflow(subflow_req->msk)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); return -EPERM; } @@ -745,15 +746,11 @@ struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *op EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); /* validate hmac received in third ACK */ -static bool subflow_hmac_valid(const struct request_sock *req, +static bool subflow_hmac_valid(const struct mptcp_subflow_request_sock *subflow_req, const struct mptcp_options_received *mp_opt) { - const struct mptcp_subflow_request_sock *subflow_req; + struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; - struct mptcp_sock *msk; - - subflow_req = mptcp_subflow_rsk(req); - msk = subflow_req->msk; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), @@ -899,13 +896,14 @@ create_child: goto dispose_child; } - if (!subflow_hmac_valid(req, &mp_opt)) { + if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } if (!mptcp_can_accept_new_subflow(owner)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 4e0842df5234..2c260f33b55c 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter { }; struct ncsi_channel_stats { - u32 hnc_cnt_hi; /* Counter cleared */ - u32 hnc_cnt_lo; /* Counter cleared */ - u32 hnc_rx_bytes; /* Rx bytes */ - u32 hnc_tx_bytes; /* Tx bytes */ - u32 hnc_rx_uc_pkts; /* Rx UC packets */ - u32 hnc_rx_mc_pkts; /* Rx MC packets */ - u32 hnc_rx_bc_pkts; /* Rx BC packets */ - u32 hnc_tx_uc_pkts; /* Tx UC packets */ - u32 hnc_tx_mc_pkts; /* Tx MC packets */ - u32 hnc_tx_bc_pkts; /* Tx BC packets */ + u64 hnc_cnt; /* Counter cleared */ + u64 hnc_rx_bytes; /* Rx bytes */ + u64 hnc_tx_bytes; /* Tx bytes */ + u64 hnc_rx_uc_pkts; /* Rx UC packets */ + u64 hnc_rx_mc_pkts; /* Rx MC packets */ + u64 hnc_rx_bc_pkts; /* Rx BC packets */ + u64 hnc_tx_uc_pkts; /* Tx UC packets */ + u64 hnc_tx_mc_pkts; /* Tx MC packets */ + u64 hnc_tx_bc_pkts; /* Tx BC packets */ u32 hnc_fcs_err; /* FCS errors */ u32 hnc_align_err; /* Alignment errors */ u32 hnc_false_carrier; /* False carrier detection */ @@ -181,7 +180,7 @@ struct ncsi_channel_stats { u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */ u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */ u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */ - u32 hnc_rx_valid_bytes; /* Rx valid bytes */ + u64 hnc_rx_valid_bytes; /* Rx valid bytes */ u32 hnc_rx_runt_pkts; /* Rx error runt packets */ u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */ u32 ncsi_rx_cmds; /* Rx NCSI commands */ diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index f2f3b5c1b941..24edb2737972 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt { /* Get Controller Packet Statistics */ struct ncsi_rsp_gcps_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 cnt_hi; /* Counter cleared */ - __be32 cnt_lo; /* Counter cleared */ - __be32 rx_bytes; /* Rx bytes */ - __be32 tx_bytes; /* Tx bytes */ - __be32 rx_uc_pkts; /* Rx UC packets */ - __be32 rx_mc_pkts; /* Rx MC packets */ - __be32 rx_bc_pkts; /* Rx BC packets */ - __be32 tx_uc_pkts; /* Tx UC packets */ - __be32 tx_mc_pkts; /* Tx MC packets */ - __be32 tx_bc_pkts; /* Tx BC packets */ + __be64 cnt; /* Counter cleared */ + __be64 rx_bytes; /* Rx bytes */ + __be64 tx_bytes; /* Tx bytes */ + __be64 rx_uc_pkts; /* Rx UC packets */ + __be64 rx_mc_pkts; /* Rx MC packets */ + __be64 rx_bc_pkts; /* Rx BC packets */ + __be64 tx_uc_pkts; /* Tx UC packets */ + __be64 tx_mc_pkts; /* Tx MC packets */ + __be64 tx_bc_pkts; /* Tx BC packets */ __be32 fcs_err; /* FCS errors */ __be32 align_err; /* Alignment errors */ __be32 false_carrier; /* False carrier detection */ @@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt { __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ - __be32 rx_valid_bytes; /* Rx valid bytes */ + __be64 rx_valid_bytes; /* Rx valid bytes */ __be32 rx_runt_pkts; /* Rx error runt packets */ __be32 rx_jabber_pkts; /* Rx error jabber packets */ __be32 checksum; /* Checksum */ -}; +} __packed __aligned(4); /* Get NCSI Statistics */ struct ncsi_rsp_gns_pkt { diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 4a8ce2949fae..8668888c5a2f 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -926,16 +926,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) /* Update HNC's statistics */ ncs = &nc->stats; - ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi); - ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo); - ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes); - ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes); - ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts); - ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts); - ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts); - ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts); - ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts); - ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts); + ncs->hnc_cnt = be64_to_cpu(rsp->cnt); + ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes); + ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes); + ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts); + ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts); + ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts); + ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts); + ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts); + ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts); ncs->hnc_fcs_err = ntohl(rsp->fcs_err); ncs->hnc_align_err = ntohl(rsp->align_err); ncs->hnc_false_carrier = ntohl(rsp->false_carrier); @@ -964,7 +963,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames); ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames); ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames); - ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes); + ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes); ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts); ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 047ba81865ed..3b2183fc7e56 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1180,7 +1180,7 @@ config NETFILTER_XT_MATCH_CGROUP tristate '"control group" match support' depends on NETFILTER_ADVANCED depends on CGROUPS - select CGROUP_NET_CLASSID + select SOCK_CGROUP_DATA help Socket/process control group matching allows you to match locally generated packets based on which net_cls control group processes diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7f8b245e287a..de8d50af9b5b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -531,10 +531,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, p = tmpl; tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); - if (tmpl != p) { - tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); + if (tmpl != p) tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; - } } else { tmpl = kzalloc(sizeof(*tmpl), flags); if (!tmpl) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2f666751c7e7..6c4cff10357d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -98,69 +98,87 @@ struct ct_iter_state { struct seq_net_private p; struct hlist_nulls_head *hash; unsigned int htable_size; + unsigned int skip_elems; unsigned int bucket; u_int64_t time_now; }; -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) +static struct nf_conntrack_tuple_hash *ct_get_next(const struct net *net, + struct ct_iter_state *st) { - struct ct_iter_state *st = seq->private; + struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; + unsigned int i; - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} + for (i = st->bucket; i < st->htable_size; i++) { + unsigned int skip = 0; -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; +restart: + hlist_nulls_for_each_entry_rcu(h, n, &st->hash[i], hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct hlist_nulls_node *tmp = n; - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; + if (!net_eq(net, nf_ct_net(ct))) + continue; + + if (++skip <= st->skip_elems) + continue; + + /* h should be returned, skip to nulls marker. */ + while (!is_a_nulls(tmp)) + tmp = rcu_dereference(hlist_nulls_next_rcu(tmp)); + + /* check if h is still linked to hash[i] */ + if (get_nulls_value(tmp) != i) { + skip = 0; + goto restart; + } + + st->skip_elems = skip; + st->bucket = i; + return h; } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); + skip = 0; + if (get_nulls_value(n) != i) + goto restart; + + st->skip_elems = 0; + } - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; + st->bucket = i; + return NULL; } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct ct_iter_state *st = seq->private; + struct net *net = seq_file_net(seq); st->time_now = ktime_get_real_ns(); rcu_read_lock(); nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); + + if (*pos == 0) { + st->skip_elems = 0; + st->bucket = 0; + } else if (st->skip_elems) { + /* resume from last dumped entry */ + st->skip_elems--; + } + + return ct_get_next(net, st); } static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct ct_iter_state *st = s->private; + struct net *net = seq_file_net(s); + (*pos)++; - return ct_get_next(s, v); + return ct_get_next(net, st); } static void ct_seq_stop(struct seq_file *s, void *v) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a133e1c175ce..b28f6730e26d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4569,6 +4569,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_HANDLE] = { .type = NLA_U64 }, [NFTA_SET_EXPR] = { .type = NLA_NESTED }, [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), + [NFTA_SET_TYPE] = { .type = NLA_REJECT }, + [NFTA_SET_COUNT] = { .type = NLA_REJECT }, }; static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { @@ -4763,6 +4765,27 @@ static u32 nft_set_userspace_size(const struct nft_set_ops *ops, u32 size) return size; } +static noinline_for_stack int +nf_tables_fill_set_info(struct sk_buff *skb, const struct nft_set *set) +{ + unsigned int nelems; + char str[40]; + int ret; + + ret = snprintf(str, sizeof(str), "%ps", set->ops); + + /* Not expected to happen and harmless: NFTA_SET_TYPE is dumped + * to userspace purely for informational/debug purposes. + */ + DEBUG_NET_WARN_ON_ONCE(ret >= sizeof(str)); + + if (nla_put_string(skb, NFTA_SET_TYPE, str)) + return -EMSGSIZE; + + nelems = nft_set_userspace_size(set->ops, atomic_read(&set->nelems)); + return nla_put_be32(skb, NFTA_SET_COUNT, htonl(nelems)); +} + static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { @@ -4843,6 +4866,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nla_nest_end(skb, nest); + if (nf_tables_fill_set_info(skb, set)) + goto nla_put_failure; + if (set->num_exprs == 1) { nest = nla_nest_start_noflag(skb, NFTA_SET_EXPR); if (nf_tables_fill_expr_info(skb, set->exprs[0], false) < 0) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 9b2d7463d3d3..df0798da2329 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -19,10 +19,16 @@ struct nft_quota { }; static inline bool nft_overquota(struct nft_quota *priv, - const struct sk_buff *skb) + const struct sk_buff *skb, + bool *report) { - return atomic64_add_return(skb->len, priv->consumed) >= - atomic64_read(&priv->quota); + u64 consumed = atomic64_add_return(skb->len, priv->consumed); + u64 quota = atomic64_read(&priv->quota); + + if (report) + *report = consumed >= quota; + + return consumed > quota; } static inline bool nft_quota_invert(struct nft_quota *priv) @@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) + if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } @@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj, const struct nft_pktinfo *pkt) { struct nft_quota *priv = nft_obj_data(obj); - bool overquota; + bool overquota, report; - overquota = nft_overquota(priv, pkt->skb); + overquota = nft_overquota(priv, pkt->skb, &report); if (overquota ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; - if (overquota && + if (report && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7be342b495f5..c5855069bdab 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -663,6 +663,9 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f, check_add_overflow(rules, extra, &rules_alloc)) return -EOVERFLOW; + if (rules_alloc > (INT_MAX / sizeof(*new_mt))) + return -ENOMEM; + new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT); if (!new_mt) return -ENOMEM; @@ -683,6 +686,30 @@ out_free: return 0; } + +/** + * lt_calculate_size() - Get storage size for lookup table with overflow check + * @groups: Amount of bit groups + * @bb: Number of bits grouped together in lookup table buckets + * @bsize: Size of each bucket in lookup table, in longs + * + * Return: allocation size including alignment overhead, negative on overflow + */ +static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb, + unsigned int bsize) +{ + ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long); + + if (check_mul_overflow(ret, bsize, &ret)) + return -1; + if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret)) + return -1; + if (ret > INT_MAX) + return -1; + + return ret; +} + /** * pipapo_resize() - Resize lookup or mapping table, or both * @f: Field containing lookup and mapping tables @@ -701,6 +728,7 @@ static int pipapo_resize(struct nft_pipapo_field *f, long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; unsigned int new_bucket_size, copy; int group, bucket, err; + ssize_t lt_size; if (rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; @@ -719,10 +747,11 @@ static int pipapo_resize(struct nft_pipapo_field *f, else copy = new_bucket_size; - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * - new_bucket_size * sizeof(*new_lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL); + lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size); + if (lt_size < 0) + return -ENOMEM; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return -ENOMEM; @@ -907,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) { unsigned int groups, bb; unsigned long *new_lt; - size_t lt_size; + ssize_t lt_size; lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * sizeof(*f->lt); @@ -917,15 +946,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) groups = f->groups * 2; bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && lt_size < NFT_PIPAPO_LT_SIZE_LOW) { groups = f->groups / 2; bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; /* Don't increase group width if the resulting lookup table size * would exceed the upper size threshold for a "small" set. @@ -936,7 +967,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) return; } - new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT); + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return; @@ -1451,13 +1482,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) for (i = 0; i < old->field_count; i++) { unsigned long *new_lt; + ssize_t lt_size; memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); - new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * - src->bsize * sizeof(*dst->lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL_ACCOUNT); + lt_size = lt_calculate_size(src->groups, src->bb, src->bsize); + if (lt_size < 0) + goto out_lt; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) goto out_lt; @@ -1469,6 +1502,9 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->groups * NFT_PIPAPO_BUCKETS(src->bb)); if (src->rules > 0) { + if (src->rules_alloc > (INT_MAX / sizeof(*src->mt))) + goto out_mt; + dst->mt = kvmalloc_array(src->rules_alloc, sizeof(*src->mt), GFP_KERNEL_ACCOUNT); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 9f54819eb52c..9082155ee558 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -168,7 +168,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) INIT_WORK(&info->timer->work, idletimer_tg_work); mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); return 0; @@ -229,7 +229,7 @@ static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info) } else { timer_setup(&info->timer->timer, idletimer_tg_expired, 0); mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); } return 0; @@ -254,7 +254,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, info->label, info->timeout); mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); return XT_CONTINUE; } @@ -275,7 +275,7 @@ static unsigned int idletimer_tg_target_v1(struct sk_buff *skb, alarm_start_relative(&info->timer->alarm, tout); } else { mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); } return XT_CONTINUE; @@ -320,7 +320,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) if (info->timer) { info->timer->refcnt++; mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); @@ -382,7 +382,7 @@ static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) } } else { mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + secs_to_jiffies(info->timeout) + jiffies); } pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index c0f5e9a4f3c6..c437fbd59ec1 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -23,6 +23,8 @@ MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); +#define NET_CLS_CLASSID_INVALID_MSG "xt_cgroup: classid invalid without net_cls cgroups\n" + static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) { struct xt_cgroup_info_v0 *info = par->matchinfo; @@ -30,6 +32,11 @@ static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) if (info->invert & ~1) return -EINVAL; + if (!IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)) { + pr_info(NET_CLS_CLASSID_INVALID_MSG); + return -EINVAL; + } + return 0; } @@ -51,6 +58,11 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return -EINVAL; } + if (info->has_classid && !IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)) { + pr_info(NET_CLS_CLASSID_INVALID_MSG); + return -EINVAL; + } + info->priv = NULL; if (info->has_path) { cgrp = cgroup_get_from_path(info->path); @@ -83,6 +95,11 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) return -EINVAL; } + if (info->has_classid && !IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)) { + pr_info(NET_CLS_CLASSID_INVALID_MSG); + return -EINVAL; + } + info->priv = NULL; if (info->has_path) { cgrp = cgroup_get_from_path(info->path); @@ -100,6 +117,7 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { +#ifdef CONFIG_CGROUP_NET_CLASSID const struct xt_cgroup_info_v0 *info = par->matchinfo; struct sock *sk = skb->sk; @@ -108,6 +126,8 @@ cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ info->invert; +#endif + return false; } static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) @@ -123,9 +143,12 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) if (ancestor) return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ info->invert_path; +#ifdef CONFIG_CGROUP_NET_CLASSID else return (info->classid == sock_cgroup_classid(skcd)) ^ info->invert_classid; +#endif + return false; } static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) @@ -141,9 +164,12 @@ static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) if (ancestor) return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ info->invert_path; +#ifdef CONFIG_CGROUP_NET_CLASSID else return (info->classid == sock_cgroup_classid(skcd)) ^ info->invert_classid; +#endif + return false; } static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 1f8909c16f14..99458da6be32 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -311,6 +311,8 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, NL_POLICY_TYPE_ATTR_PAD)) goto nla_put_failure; break; + } else if (pt->validation_type == NLA_VALIDATE_FUNCTION) { + break; } nla_get_range_unsigned(pt, &range); @@ -340,6 +342,9 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, else type = NL_ATTR_TYPE_SINT; + if (pt->validation_type == NLA_VALIDATE_FUNCTION) + break; + nla_get_range_signed(pt, &range); if (nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_S, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2f22ca59586f..e7269a3eec79 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -39,56 +39,18 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -struct deferred_action { - struct sk_buff *skb; - const struct nlattr *actions; - int actions_len; - - /* Store pkt_key clone when creating deferred action. */ - struct sw_flow_key pkt_key; -}; - -#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) -struct ovs_frag_data { - unsigned long dst; - struct vport *vport; - struct ovs_skb_cb cb; - __be16 inner_protocol; - u16 network_offset; /* valid only for MPLS */ - u16 vlan_tci; - __be16 vlan_proto; - unsigned int l2_len; - u8 mac_proto; - u8 l2_data[MAX_L2_LEN]; -}; - -static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); - -#define DEFERRED_ACTION_FIFO_SIZE 10 -#define OVS_RECURSION_LIMIT 5 -#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) -struct action_fifo { - int head; - int tail; - /* Deferred action fifo queue storage. */ - struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; +DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -struct action_flow_keys { - struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; -}; - -static struct action_fifo __percpu *action_fifos; -static struct action_flow_keys __percpu *flow_keys; -static DEFINE_PER_CPU(int, exec_actions_level); - /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct action_flow_keys *keys = this_cpu_ptr(flow_keys); - int level = this_cpu_read(exec_actions_level); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct action_flow_keys *keys = &ovs_pcpu->flow_keys; + int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { @@ -132,10 +94,9 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo; + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); struct deferred_action *da; - fifo = this_cpu_ptr(action_fifos); da = action_fifo_put(fifo); if (da) { da->skb = skb; @@ -794,7 +755,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { @@ -846,7 +807,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_frag_data_storage); + data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -1608,13 +1569,13 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(exec_actions_level); + __this_cpu_inc(ovs_pcpu_storage.exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage.exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1650,7 +1611,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1681,7 +1642,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(exec_actions_level); + level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); @@ -1698,27 +1659,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(exec_actions_level); + __this_cpu_dec(ovs_pcpu_storage.exec_level); return err; } - -int action_fifos_init(void) -{ - action_fifos = alloc_percpu(struct action_fifo); - if (!action_fifos) - return -ENOMEM; - - flow_keys = alloc_percpu(struct action_flow_keys); - if (!flow_keys) { - free_percpu(action_fifos); - return -ENOMEM; - } - - return 0; -} - -void action_fifos_exit(void) -{ - free_percpu(action_fifos); - free_percpu(flow_keys); -} diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 5d548eda742d..6a304ae2d959 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct dp_stats_percpu *stats; + bool ovs_pcpu_locked = false; u64 *stats_counter; u32 n_mask_hit; u32 n_cache_hit; @@ -290,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); + /* This path can be invoked recursively: Use the current task to + * identify recursive invocation - the lock must be acquired only once. + * Even with disabled bottom halves this can be preempted on PREEMPT_RT. + * Limit the locking to RT to avoid assigning `owner' if it can be + * avoided. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + ovs_pcpu->owner = current; + ovs_pcpu_locked = true; + } + error = ovs_execute_actions(dp, skb, sf_acts, key); if (unlikely(error)) net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", ovs_dp_name(dp), error); + if (ovs_pcpu_locked) { + ovs_pcpu->owner = NULL; + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + } stats_counter = &stats->n_hit; @@ -671,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); local_bh_enable(); rcu_read_unlock(); @@ -2729,13 +2753,9 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); - err = action_fifos_init(); - if (err) - goto error; - err = ovs_internal_dev_rtnl_link_register(); if (err) - goto error_action_fifos_exit; + goto error; err = ovs_flow_init(); if (err) @@ -2778,8 +2798,6 @@ error_flow_exit: ovs_flow_exit(); error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); -error_action_fifos_exit: - action_fifos_exit(); error: return err; } @@ -2795,7 +2813,6 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); - action_fifos_exit(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 384ca77f4e79..1b5348b0f559 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -13,6 +13,7 @@ #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> #include <net/ip_tunnels.h> +#include <net/mpls.h> #include "conntrack.h" #include "flow.h" @@ -173,6 +174,54 @@ struct ovs_net { bool xt_label; }; +#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) +struct ovs_frag_data { + unsigned long dst; + struct vport *vport; + struct ovs_skb_cb cb; + __be16 inner_protocol; + u16 network_offset; /* valid only for MPLS */ + u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 mac_proto; + u8 l2_data[MAX_L2_LEN]; +}; + +struct deferred_action { + struct sk_buff *skb; + const struct nlattr *actions; + int actions_len; + + /* Store pkt_key clone when creating deferred action. */ + struct sw_flow_key pkt_key; +}; + +#define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) + +struct action_fifo { + int head; + int tail; + /* Deferred action fifo queue storage. */ + struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; +}; + +struct action_flow_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + +struct ovs_pcpu_storage { + struct action_fifo action_fifos; + struct action_flow_keys flow_keys; + struct ovs_frag_data frag_data; + int exec_level; + struct task_struct *owner; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + /** * enum ovs_pkt_hash_types - hash info to include with a packet * to send to userspace. @@ -281,9 +330,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, void ovs_dp_notify_wq(struct work_struct *work); -int action_fifos_init(void); -void action_fifos_exit(void); - /* 'KEY' must not have any bits set outside of the 'MASK' */ #define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) #define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 518be23e48ea..ad64bb9ab5e2 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -3049,7 +3049,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, + error = nla_parse_deprecated_strict(a, OVS_USERSPACE_ATTR_MAX, + nla_data(attr), nla_len(attr), userspace_policy, NULL); if (error) return error; diff --git a/net/rds/page.c b/net/rds/page.c index 7cc57e098ddb..afb151eac271 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -40,10 +40,12 @@ struct rds_page_remainder { struct page *r_page; unsigned long r_offset; + local_lock_t bh_lock; }; -static -DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; /** * rds_page_remainder_alloc - build up regions of a message. @@ -69,7 +71,6 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, gfp_t gfp) { struct rds_page_remainder *rem; - unsigned long flags; struct page *page; int ret; @@ -87,8 +88,9 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, goto out; } - rem = &per_cpu(rds_page_remainders, get_cpu()); - local_irq_save(flags); + local_bh_disable(); + local_lock_nested_bh(&rds_page_remainders.bh_lock); + rem = this_cpu_ptr(&rds_page_remainders); while (1) { /* avoid a tiny region getting stuck by tossing it */ @@ -116,13 +118,14 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, } /* alloc if there is nothing for us to use */ - local_irq_restore(flags); - put_cpu(); + local_unlock_nested_bh(&rds_page_remainders.bh_lock); + local_bh_enable(); page = alloc_page(gfp); - rem = &per_cpu(rds_page_remainders, get_cpu()); - local_irq_save(flags); + local_bh_disable(); + local_lock_nested_bh(&rds_page_remainders.bh_lock); + rem = this_cpu_ptr(&rds_page_remainders); if (!page) { ret = -ENOMEM; @@ -140,8 +143,8 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, rem->r_offset = 0; } - local_irq_restore(flags); - put_cpu(); + local_unlock_nested_bh(&rds_page_remainders.bh_lock); + local_bh_enable(); out: rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index a20986806fea..f60b81c66078 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -67,6 +67,29 @@ config RXKAD See Documentation/networking/rxrpc.rst. +config RXGK + bool "RxRPC GSSAPI security" + select CRYPTO_KRB5 + select CRYPTO_MANAGER + select CRYPTO_KRB5ENC + select CRYPTO_AUTHENC + select CRYPTO_SKCIPHER + select CRYPTO_HASH_INFO + select CRYPTO_HMAC + select CRYPTO_CMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + select CRYPTO_CBC + select CRYPTO_CTS + select CRYPTO_AES + select CRYPTO_CAMELLIA + help + Provide the GSSAPI-based RxGK security class for AFS. Keys are added + with add_key(). + + See Documentation/networking/rxrpc.rst. + config RXPERF tristate "RxRPC test service" help diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 210b75e3179e..c0542bae719e 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -24,6 +24,7 @@ rxrpc-y := \ local_object.o \ misc.o \ net_ns.o \ + oob.o \ output.o \ peer_event.o \ peer_object.o \ @@ -39,6 +40,9 @@ rxrpc-y := \ rxrpc-$(CONFIG_PROC_FS) += proc.o rxrpc-$(CONFIG_RXKAD) += rxkad.o rxrpc-$(CONFIG_SYSCTL) += sysctl.o - +rxrpc-$(CONFIG_RXGK) += \ + rxgk.o \ + rxgk_app.o \ + rxgk_kdf.o obj-$(CONFIG_RXPERF) += rxperf.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 86873399f7d5..36df0274d7b7 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -265,7 +265,10 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: Allocation flags * * Lookup or create a remote transport endpoint record for the specified - * address and return it with a ref held. + * address. + * + * Return: The peer record found with a reference, %NULL if no record is found + * or a negative error code if the address is invalid or unsupported. */ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -283,9 +286,11 @@ EXPORT_SYMBOL(rxrpc_kernel_lookup_peer); /** * rxrpc_kernel_get_peer - Get a reference on a peer - * @peer: The peer to get a reference on. + * @peer: The peer to get a reference on (may be NULL). + * + * Get a reference for a remote peer record (if not NULL). * - * Get a record for the remote peer in a call. + * Return: The @peer argument. */ struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer) { @@ -296,6 +301,8 @@ EXPORT_SYMBOL(rxrpc_kernel_get_peer); /** * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference * @peer: The peer to drop a ref on + * + * Drop a reference on a peer record. */ void rxrpc_kernel_put_peer(struct rxrpc_peer *peer) { @@ -320,10 +327,12 @@ EXPORT_SYMBOL(rxrpc_kernel_put_peer); * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and - * call IDs as appropriate. The call to be used is returned. + * call IDs as appropriate. * * The default socket destination address and security may be overridden by * supplying @srx and @key. + * + * Return: The new call or an error code. */ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_peer *peer, @@ -437,6 +446,8 @@ EXPORT_SYMBOL(rxrpc_kernel_put_call); * * Allow a kernel service to find out whether a call is still alive - whether * it has completed successfully and all received data has been consumed. + * + * Return: %true if the call is still ongoing and %false if it has completed. */ bool rxrpc_kernel_check_life(const struct socket *sock, const struct rxrpc_call *call) @@ -450,63 +461,20 @@ bool rxrpc_kernel_check_life(const struct socket *sock, EXPORT_SYMBOL(rxrpc_kernel_check_life); /** - * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. - * @sock: The socket the call is on - * @call: The call to query - * - * Allow a kernel service to retrieve the epoch value from a service call to - * see if the client at the other end rebooted. - */ -u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) -{ - return call->conn->proto.epoch; -} -EXPORT_SYMBOL(rxrpc_kernel_get_epoch); - -/** - * rxrpc_kernel_new_call_notification - Get notifications of new calls - * @sock: The socket to intercept received messages on - * @notify_new_call: Function to be called when new calls appear - * @discard_new_call: Function to discard preallocated calls + * rxrpc_kernel_set_notifications - Set table of callback operations + * @sock: The socket to install table upon + * @app_ops: Callback operation table to set * - * Allow a kernel service to be given notifications about new calls. + * Allow a kernel service to set a table of event notifications on a socket. */ -void rxrpc_kernel_new_call_notification( - struct socket *sock, - rxrpc_notify_new_call_t notify_new_call, - rxrpc_discard_new_call_t discard_new_call) +void rxrpc_kernel_set_notifications(struct socket *sock, + const struct rxrpc_kernel_ops *app_ops) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - rx->notify_new_call = notify_new_call; - rx->discard_new_call = discard_new_call; + rx->app_ops = app_ops; } -EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); - -/** - * rxrpc_kernel_set_max_life - Set maximum lifespan on a call - * @sock: The socket the call is on - * @call: The call to configure - * @hard_timeout: The maximum lifespan of the call in ms - * - * Set the maximum lifespan of a call. The call will end with ETIME or - * ETIMEDOUT if it takes longer than this. - */ -void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call, - unsigned long hard_timeout) -{ - ktime_t delay = ms_to_ktime(hard_timeout), expect_term_by; - - mutex_lock(&call->user_mutex); - - expect_term_by = ktime_add(ktime_get_real(), delay); - WRITE_ONCE(call->expect_term_by, expect_term_by); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); - rxrpc_poke_call(call, rxrpc_call_poke_set_timeout); - - mutex_unlock(&call->user_mutex); -} -EXPORT_SYMBOL(rxrpc_kernel_set_max_life); +EXPORT_SYMBOL(rxrpc_kernel_set_notifications); /* * connect an RxRPC socket @@ -624,7 +592,10 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) fallthrough; case RXRPC_SERVER_BOUND: case RXRPC_SERVER_LISTENING: - ret = rxrpc_do_sendmsg(rx, m, len); + if (m->msg_flags & MSG_OOB) + ret = rxrpc_sendmsg_oob(rx, m, len); + else + ret = rxrpc_do_sendmsg(rx, m, len); /* The socket has been unlocked */ goto out; default: @@ -659,7 +630,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - unsigned int min_sec_level; + unsigned int min_sec_level, val; u16 service_upgrade[2]; int ret; @@ -740,6 +711,26 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, rx->service_upgrade.to = service_upgrade[1]; goto success; + case RXRPC_MANAGE_RESPONSE: + ret = -EINVAL; + if (optlen != sizeof(unsigned int)) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_UNBOUND) + goto error; + ret = copy_safe_from_sockptr(&val, sizeof(val), + optval, optlen); + if (ret) + goto error; + ret = -EINVAL; + if (val > 1) + goto error; + if (val) + set_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags); + else + clear_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags); + goto success; + default: break; } @@ -846,6 +837,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->calls = RB_ROOT; spin_lock_init(&rx->incoming_lock); + skb_queue_head_init(&rx->recvmsg_oobq); + rx->pending_oobq = RB_ROOT; INIT_LIST_HEAD(&rx->sock_calls); INIT_LIST_HEAD(&rx->to_be_accepted); INIT_LIST_HEAD(&rx->recvmsg_q); @@ -879,8 +872,10 @@ static int rxrpc_shutdown(struct socket *sock, int flags) lock_sock(sk); if (sk->sk_state < RXRPC_CLOSE) { + spin_lock_irq(&rx->recvmsg_lock); sk->sk_state = RXRPC_CLOSE; sk->sk_shutdown = SHUTDOWN_MASK; + spin_unlock_irq(&rx->recvmsg_lock); } else { ret = -ESHUTDOWN; } @@ -892,12 +887,30 @@ static int rxrpc_shutdown(struct socket *sock, int flags) } /* + * Purge the out-of-band queue. + */ +static void rxrpc_purge_oob_queue(struct sock *sk) +{ + struct rxrpc_sock *rx = rxrpc_sk(sk); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&rx->recvmsg_oobq))) + rxrpc_kernel_free_oob(skb); + while (!RB_EMPTY_ROOT(&rx->pending_oobq)) { + skb = rb_entry(rx->pending_oobq.rb_node, struct sk_buff, rbnode); + rb_erase(&skb->rbnode, &rx->pending_oobq); + rxrpc_kernel_free_oob(skb); + } +} + +/* * RxRPC socket destructor */ static void rxrpc_sock_destructor(struct sock *sk) { _enter("%p", sk); + rxrpc_purge_oob_queue(sk); rxrpc_purge_queue(&sk->sk_receive_queue); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); @@ -936,7 +949,9 @@ static int rxrpc_release_sock(struct sock *sk) break; } + spin_lock_irq(&rx->recvmsg_lock); sk->sk_state = RXRPC_CLOSE; + spin_unlock_irq(&rx->recvmsg_lock); if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); @@ -948,6 +963,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); + rxrpc_purge_oob_queue(sk); rxrpc_purge_queue(&sk->sk_receive_queue); rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3cc3af15086f..5bd3922c310d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -31,6 +31,7 @@ struct key_preparsed_payload; struct rxrpc_connection; struct rxrpc_txbuf; struct rxrpc_txqueue; +struct rxgk_context; /* * Mark applied to socket buffers in skb->mark. skb->priority is used @@ -39,6 +40,7 @@ struct rxrpc_txqueue; enum rxrpc_skb_mark { RXRPC_SKB_MARK_PACKET, /* Received packet */ RXRPC_SKB_MARK_ERROR, /* Error notification */ + RXRPC_SKB_MARK_CHALLENGE, /* Challenge notification */ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ @@ -146,10 +148,12 @@ struct rxrpc_backlog { struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ - rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ + const struct rxrpc_kernel_ops *app_ops; /* Table of kernel app notification funcs */ struct rxrpc_local *local; /* local endpoint */ struct rxrpc_backlog *backlog; /* Preallocation for services */ + struct sk_buff_head recvmsg_oobq; /* OOB messages for recvmsg to pick up */ + struct rb_root pending_oobq; /* OOB messages awaiting userspace to respond to */ + u64 oob_id_counter; /* OOB message ID counter */ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ struct list_head sock_calls; /* List of calls owned by this socket */ struct list_head to_be_accepted; /* calls awaiting acceptance */ @@ -160,6 +164,7 @@ struct rxrpc_sock { struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ +#define RXRPC_SOCK_MANAGE_RESPONSE 1 /* User wants to manage RESPONSE packets */ rwlock_t call_lock; /* lock for calls */ u32 min_sec_level; /* minimum security level */ #define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT @@ -203,7 +208,7 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { union { - struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ + struct rxrpc_connection *poke_conn; /* Conn referred to (poke packet) */ struct { u16 offset; /* Offset of data */ u16 len; /* Length of data */ @@ -217,6 +222,19 @@ struct rxrpc_skb_priv { u16 nr_acks; /* Number of acks+nacks */ u8 reason; /* Reason for ack */ } ack; + struct { + struct rxrpc_connection *conn; /* Connection referred to */ + union { + u32 rxkad_nonce; + }; + } chall; + struct { + rxrpc_serial_t challenge_serial; + u32 kvno; + u32 version; + u16 len; + u16 ticket_len; + } resp; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; @@ -270,9 +288,24 @@ struct rxrpc_security { /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); + /* Validate a challenge packet */ + bool (*validate_challenge)(struct rxrpc_connection *conn, + struct sk_buff *skb); + + /* Fill out the cmsg for recvmsg() to pass on a challenge to userspace. + * The security class gets to add additional information. + */ + int (*challenge_to_recvmsg)(struct rxrpc_connection *conn, + struct sk_buff *challenge, + struct msghdr *msg); + + /* Parse sendmsg() control message and respond to challenge. */ + int (*sendmsg_respond_to_challenge)(struct sk_buff *challenge, + struct msghdr *msg); + /* respond to a challenge */ - int (*respond_to_challenge)(struct rxrpc_connection *, - struct sk_buff *); + int (*respond_to_challenge)(struct rxrpc_connection *conn, + struct sk_buff *challenge); /* verify a response */ int (*verify_response)(struct rxrpc_connection *, @@ -280,6 +313,11 @@ struct rxrpc_security { /* clear connection security */ void (*clear)(struct rxrpc_connection *); + + /* Default ticket -> key decoder */ + int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int ticket_offset, unsigned int ticket_len, + struct key **_key); }; /* @@ -526,7 +564,17 @@ struct rxrpc_connection { struct rxrpc_crypt csum_iv; /* packet checksum base */ u32 nonce; /* response re-use preventer */ } rxkad; + struct { + struct rxgk_context *keys[4]; /* (Re-)keying buffer */ + u64 start_time; /* The start time for TK derivation */ + u8 nonce[20]; /* Response re-use preventer */ + u32 enctype; /* Kerberos 5 encoding type */ + u32 key_number; /* Current key number */ + } rxgk; }; + rwlock_t security_use_lock; /* Security use/modification lock */ + struct sk_buff *tx_response; /* Response packet to be transmitted */ + unsigned long flags; unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ @@ -692,6 +740,7 @@ struct rxrpc_call { u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ u32 security_level; /* Security level selected */ + u32 security_enctype; /* Security-specific encoding type (or 0) */ int debug_id; /* debug ID for printks */ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ @@ -867,6 +916,8 @@ struct rxrpc_txbuf { unsigned short len; /* Amount of data in buffer */ unsigned short space; /* Remaining data space */ unsigned short offset; /* Offset of fill point */ + unsigned short crypto_header; /* Size of crypto header */ + unsigned short sec_header; /* Size of security header */ unsigned short pkt_len; /* Size of packet content */ unsigned short alloc_size; /* Amount of bufferage allocated */ unsigned int flags; @@ -1001,7 +1052,9 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct rxrpc_call_params *, gfp_t, - unsigned int); + unsigned int) + __releases(&rx->sk.sk_lock) + __acquires(&call->user_mutex); void rxrpc_start_call_timer(struct rxrpc_call *call); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); @@ -1198,8 +1251,11 @@ void rxrpc_error_report(struct sock *); bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err); int rxrpc_io_thread(void *data); +void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) { + if (!local->io_thread) + return; wake_up_process(READ_ONCE(local->io_thread)); } @@ -1289,8 +1345,16 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) } /* + * out_of_band.c + */ +void rxrpc_notify_socket_oob(struct rxrpc_call *call, struct sk_buff *skb); +void rxrpc_add_pending_oob(struct rxrpc_sock *rx, struct sk_buff *skb); +int rxrpc_sendmsg_oob(struct rxrpc_sock *rx, struct msghdr *msg, size_t len); + +/* * output.c */ +ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len); void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); @@ -1299,6 +1363,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); +void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *skb); /* * peer_event.c @@ -1363,6 +1428,11 @@ ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans); void rxrpc_call_init_rtt(struct rxrpc_call *call); /* + * rxgk.c + */ +extern const struct rxrpc_security rxgk_yfs; + +/* * rxkad.c */ #ifdef CONFIG_RXKAD @@ -1433,7 +1503,6 @@ static inline void rxrpc_sysctl_exit(void) {} extern atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp); -void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index e685034ce4f7..a4b363b47cca 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -34,7 +34,6 @@ static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, - rxrpc_user_attach_call_t user_attach_call, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { @@ -123,9 +122,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; - if (user_attach_call) { + if (rx->app_ops && + rx->app_ops->user_attach_call) { rxrpc_get_call(call, rxrpc_call_get_kernel_service); - user_attach_call(call, user_call_ID); + rx->app_ops->user_attach_call(call, user_call_ID); } rxrpc_get_call(call, rxrpc_call_get_userid); @@ -219,9 +219,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; rcu_assign_pointer(call->socket, rx); - if (rx->discard_new_call) { + if (rx->app_ops && + rx->app_ops->discard_new_call) { _debug("discard %lx", call->user_call_ID); - rx->discard_new_call(call, call->user_call_ID); + rx->app_ops->discard_new_call(call, call->user_call_ID); if (call->notify_rx) call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); @@ -387,8 +388,9 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, rxrpc_incoming_call(rx, call, skb); conn = call->conn; - if (rx->notify_new_call) - rx->notify_new_call(&rx->sk, call, call->user_call_ID); + if (rx->app_ops && + rx->app_ops->notify_new_call) + rx->app_ops->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { @@ -440,8 +442,7 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) if (rx->sk.sk_state == RXRPC_CLOSE) return -ESHUTDOWN; - return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, - GFP_KERNEL, + return rxrpc_service_prealloc_one(rx, b, NULL, user_call_ID, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); } @@ -449,20 +450,18 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls * @sock: The socket on which to preallocate * @notify_rx: Event notification function for the call - * @user_attach_call: Func to attach call to user_call_ID * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. * @debug_id: The tracing debug ID. * - * Charge up the socket with preallocated calls, each with a user ID. A - * function should be provided to effect the attachment from the user's side. - * The user is given a ref to hold on the call. + * Charge up the socket with preallocated calls, each with a user ID. The + * ->user_attach_call() callback function should be provided to effect the + * attachment from the user's side. The user is given a ref to hold on the + * call. * * Note that the call may be come connected before this function returns. */ -int rxrpc_kernel_charge_accept(struct socket *sock, - rxrpc_notify_rx_t notify_rx, - rxrpc_user_attach_call_t user_attach_call, +int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { @@ -472,8 +471,7 @@ int rxrpc_kernel_charge_accept(struct socket *sock, if (sock->sk->sk_state == RXRPC_CLOSE) return -ESHUTDOWN; - return rxrpc_service_prealloc_one(rx, b, notify_rx, - user_attach_call, user_call_ID, + return rxrpc_service_prealloc_one(rx, b, notify_rx, user_call_ID, gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fce58be65e7c..e9e8f0ef3fd5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -145,8 +145,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); INIT_LIST_HEAD(&call->attend_link); - skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->recvmsg_queue); + skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); @@ -322,7 +322,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) - __releases(&rx->sk.sk_lock.slock) + __releases(&rx->sk.sk_lock) __acquires(&call->user_mutex) { struct rxrpc_call *call, *xcall; @@ -760,3 +760,23 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } + +/** + * rxrpc_kernel_query_call_security - Query call's security parameters + * @call: The call to query + * @_service_id: Where to return the service ID + * @_enctype: Where to return the "encoding type" + * + * This queries the security parameters of a call, setting *@_service_id and + * *@_enctype and returning the security class. + * + * Return: The security class protocol number. + */ +u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call, + u16 *_service_id, u32 *_enctype) +{ + *_service_id = call->dest_srx.srx_service; + *_enctype = call->security_enctype; + return call->security_ix; +} +EXPORT_SYMBOL(rxrpc_kernel_query_call_security); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 4d9c5e21ba78..232b6986da83 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -19,7 +19,7 @@ /* * Set the completion state on an aborted connection. */ -static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb, +static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, s32 abort_code, int err, enum rxrpc_call_completion compl) { @@ -49,12 +49,20 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, s32 abort_code, int err, enum rxrpc_abort_reason why) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (rxrpc_set_conn_aborted(conn, skb, abort_code, err, + u32 cid = conn->proto.cid, call = 0, seq = 0; + + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + cid = sp->hdr.cid; + call = sp->hdr.callNumber; + seq = sp->hdr.seq; + } + + if (rxrpc_set_conn_aborted(conn, abort_code, err, RXRPC_CALL_LOCALLY_ABORTED)) { - trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, - sp->hdr.seq, abort_code, err); + trace_rxrpc_abort(0, why, cid, call, seq, abort_code, err); rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort); } return -EPROTO; @@ -67,7 +75,7 @@ static void rxrpc_input_conn_abort(struct rxrpc_connection *conn, struct sk_buff *skb) { trace_rxrpc_rx_conn_abort(conn, skb); - rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + rxrpc_set_conn_aborted(conn, skb->priority, -ECONNABORTED, RXRPC_CALL_REMOTELY_ABORTED); } @@ -248,7 +256,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_CHALLENGE: - return conn->security->respond_to_challenge(conn, skb); + ret = conn->security->respond_to_challenge(conn, skb); + sp->chall.conn = NULL; + rxrpc_put_connection(conn, rxrpc_conn_put_challenge_input); + return ret; case RXRPC_PACKET_TYPE_RESPONSE: ret = conn->security->verify_response(conn, skb); @@ -270,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ - sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); + sp->poke_conn = rxrpc_get_connection( + conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -392,6 +404,61 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, } /* + * Post a CHALLENGE packet to the socket of one of a connection's calls so that + * it can get application data to include in the packet, possibly querying + * userspace. + */ +static bool rxrpc_post_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_call *call = NULL; + struct rxrpc_sock *rx; + bool respond = false; + + sp->chall.conn = + rxrpc_get_connection(conn, rxrpc_conn_get_challenge_input); + + if (!conn->security->challenge_to_recvmsg) { + rxrpc_post_packet_to_conn(conn, skb); + return true; + } + + rcu_read_lock(); + + for (int i = 0; i < ARRAY_SIZE(conn->channels); i++) { + if (conn->channels[i].call) { + call = conn->channels[i].call; + rx = rcu_dereference(call->socket); + if (!rx) { + call = NULL; + continue; + } + + respond = true; + if (test_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags)) + break; + call = NULL; + } + } + + if (!respond) { + rcu_read_unlock(); + rxrpc_put_connection(conn, rxrpc_conn_put_challenge_input); + sp->chall.conn = NULL; + return false; + } + + if (call) + rxrpc_notify_socket_oob(call, skb); + rcu_read_unlock(); + + if (!call) + rxrpc_post_packet_to_conn(conn, skb); + return true; +} + +/* * Input a connection-level packet. */ bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) @@ -411,6 +478,16 @@ bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) return true; case RXRPC_PACKET_TYPE_CHALLENGE: + rxrpc_see_skb(skb, rxrpc_skb_see_oob_challenge); + if (rxrpc_is_conn_aborted(conn)) { + if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED) + rxrpc_send_conn_abort(conn); + return true; + } + if (!conn->security->validate_challenge(conn, skb)) + return false; + return rxrpc_post_challenge(conn, skb); + case RXRPC_PACKET_TYPE_RESPONSE: if (rxrpc_is_conn_aborted(conn)) { if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED) @@ -436,6 +513,19 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); + if (conn->tx_response) { + struct sk_buff *skb; + + spin_lock_irq(&conn->local->lock); + skb = conn->tx_response; + conn->tx_response = NULL; + spin_unlock_irq(&conn->local->lock); + + if (conn->state != RXRPC_CONN_ABORTED) + rxrpc_send_response(conn, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_response); + } + if (skb) { switch (skb->mark) { case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: @@ -452,3 +542,31 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, false); } + +/* + * Post a RESPONSE message to the I/O thread for transmission. + */ +void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_local *local = conn->local; + struct sk_buff *old; + + _enter("%x", sp->resp.challenge_serial); + + spin_lock_irq(&local->lock); + old = conn->tx_response; + if (old) { + struct rxrpc_skb_priv *osp = rxrpc_skb(skb); + + /* Always go with the response to the most recent challenge. */ + if (after(sp->resp.challenge_serial, osp->resp.challenge_serial)) + conn->tx_response = old; + else + old = skb; + } else { + conn->tx_response = skb; + } + spin_unlock_irq(&local->lock); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_response); +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 8ac22dde8b39..37340becb224 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -73,6 +73,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; + rwlock_init(&conn->security_use_lock); spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->idle_timestamp = jiffies; @@ -329,6 +330,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work) } rxrpc_purge_queue(&conn->rx_queue); + rxrpc_free_skb(conn->tx_response, rxrpc_skb_put_response); rxrpc_kill_client_conn(conn); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index e068f9b79d02..1f7c136d6d0e 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -42,13 +42,19 @@ static void none_free_call_crypto(struct rxrpc_call *call) { } -static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb) +static bool none_validate_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb) { return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_challenge); } +static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge, + struct msghdr *msg) +{ + return -EINVAL; +} + static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb) { @@ -82,7 +88,8 @@ const struct rxrpc_security rxrpc_no_security = { .alloc_txbuf = none_alloc_txbuf, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, - .respond_to_challenge = none_respond_to_challenge, + .validate_challenge = none_validate_challenge, + .sendmsg_respond_to_challenge = none_sendmsg_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, }; diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 64f8d77b8731..27b650d30f4d 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -489,8 +489,8 @@ int rxrpc_io_thread(void *data) rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - rxrpc_input_conn_event(sp->conn, skb); - rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke); + rxrpc_input_conn_event(sp->poke_conn, skb); + rxrpc_put_connection(sp->poke_conn, rxrpc_conn_put_poke); rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured); break; default: @@ -501,9 +501,11 @@ int rxrpc_io_thread(void *data) } /* Deal with connections that want immediate attention. */ - spin_lock_irq(&local->lock); - list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); - spin_unlock_irq(&local->lock); + if (!list_empty_careful(&local->conn_attend_q)) { + spin_lock_irq(&local->lock); + list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); + spin_unlock_irq(&local->lock); + } while ((conn = list_first_entry_or_null(&conn_attend_q, struct rxrpc_connection, diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 33e8302a79e3..9fdc1f031c9d 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -129,6 +129,160 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, return 0; } +static u64 xdr_dec64(const __be32 *xdr) +{ + return (u64)ntohl(xdr[0]) << 32 | (u64)ntohl(xdr[1]); +} + +static time64_t rxrpc_s64_to_time64(s64 time_in_100ns) +{ + bool neg = false; + u64 tmp = time_in_100ns; + + if (time_in_100ns < 0) { + tmp = -time_in_100ns; + neg = true; + } + do_div(tmp, 10000000); + return neg ? -tmp : tmp; +} + +/* + * Parse a YFS-RxGK type XDR format token + * - the caller guarantees we have at least 4 words + * + * struct token_rxgk { + * opr_time begintime; + * opr_time endtime; + * afs_int64 level; + * afs_int64 lifetime; + * afs_int64 bytelife; + * afs_int64 enctype; + * opaque key<>; + * opaque ticket<>; + * }; + */ +static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, + size_t datalen, + const __be32 *xdr, unsigned int toklen) +{ + struct rxrpc_key_token *token, **pptoken; + time64_t expiry; + size_t plen; + const __be32 *ticket, *key; + s64 tmp; + u32 tktlen, keylen; + + _enter(",{%x,%x,%x,%x},%x", + ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), + toklen); + + if (6 * 2 + 2 > toklen / 4) + goto reject; + + key = xdr + (6 * 2 + 1); + keylen = ntohl(key[-1]); + _debug("keylen: %x", keylen); + keylen = round_up(keylen, 4); + if ((6 * 2 + 2) * 4 + keylen > toklen) + goto reject; + + ticket = xdr + (6 * 2 + 1 + (keylen / 4) + 1); + tktlen = ntohl(ticket[-1]); + _debug("tktlen: %x", tktlen); + tktlen = round_up(tktlen, 4); + if ((6 * 2 + 2) * 4 + keylen + tktlen != toklen) { + kleave(" = -EKEYREJECTED [%x!=%x, %x,%x]", + (6 * 2 + 2) * 4 + keylen + tktlen, toklen, + keylen, tktlen); + goto reject; + } + + plen = sizeof(*token) + sizeof(*token->rxgk) + tktlen + keylen; + prep->quotalen = datalen + plen; + + plen -= sizeof(*token); + token = kzalloc(sizeof(*token), GFP_KERNEL); + if (!token) + goto nomem; + + token->rxgk = kzalloc(sizeof(*token->rxgk) + keylen, GFP_KERNEL); + if (!token->rxgk) + goto nomem_token; + + token->security_index = RXRPC_SECURITY_YFS_RXGK; + token->rxgk->begintime = xdr_dec64(xdr + 0 * 2); + token->rxgk->endtime = xdr_dec64(xdr + 1 * 2); + token->rxgk->level = tmp = xdr_dec64(xdr + 2 * 2); + if (tmp < -1LL || tmp > RXRPC_SECURITY_ENCRYPT) + goto reject_token; + token->rxgk->lifetime = xdr_dec64(xdr + 3 * 2); + token->rxgk->bytelife = xdr_dec64(xdr + 4 * 2); + token->rxgk->enctype = tmp = xdr_dec64(xdr + 5 * 2); + if (tmp < 0 || tmp > UINT_MAX) + goto reject_token; + token->rxgk->key.len = ntohl(key[-1]); + token->rxgk->key.data = token->rxgk->_key; + token->rxgk->ticket.len = ntohl(ticket[-1]); + + if (token->rxgk->endtime != 0) { + expiry = rxrpc_s64_to_time64(token->rxgk->endtime); + if (expiry < 0) + goto expired; + if (expiry < prep->expiry) + prep->expiry = expiry; + } + + memcpy(token->rxgk->key.data, key, token->rxgk->key.len); + + /* Pad the ticket so that we can use it directly in XDR */ + token->rxgk->ticket.data = kzalloc(round_up(token->rxgk->ticket.len, 4), + GFP_KERNEL); + if (!token->rxgk->ticket.data) + goto nomem_yrxgk; + memcpy(token->rxgk->ticket.data, ticket, token->rxgk->ticket.len); + + _debug("SCIX: %u", token->security_index); + _debug("EXPY: %llx", token->rxgk->endtime); + _debug("LIFE: %llx", token->rxgk->lifetime); + _debug("BYTE: %llx", token->rxgk->bytelife); + _debug("ENC : %u", token->rxgk->enctype); + _debug("LEVL: %u", token->rxgk->level); + _debug("KLEN: %u", token->rxgk->key.len); + _debug("TLEN: %u", token->rxgk->ticket.len); + _debug("KEY0: %*phN", token->rxgk->key.len, token->rxgk->key.data); + _debug("TICK: %*phN", + min_t(u32, token->rxgk->ticket.len, 32), token->rxgk->ticket.data); + + /* count the number of tokens attached */ + prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1); + + /* attach the data */ + for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0]; + *pptoken; + pptoken = &(*pptoken)->next) + continue; + *pptoken = token; + + _leave(" = 0"); + return 0; + +nomem_yrxgk: + kfree(token->rxgk); +nomem_token: + kfree(token); +nomem: + return -ENOMEM; +reject_token: + kfree(token); +reject: + return -EKEYREJECTED; +expired: + kfree(token->rxgk); + kfree(token); + return -EKEYEXPIRED; +} + /* * attempt to parse the data as the XDR format * - the caller guarantees we have more than 7 words @@ -228,6 +382,9 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) case RXRPC_SECURITY_RXKAD: ret2 = rxrpc_preparse_xdr_rxkad(prep, datalen, token, toklen); break; + case RXRPC_SECURITY_YFS_RXGK: + ret2 = rxrpc_preparse_xdr_yfs_rxgk(prep, datalen, token, toklen); + break; default: ret2 = -EPROTONOSUPPORT; break; @@ -390,6 +547,10 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token) case RXRPC_SECURITY_RXKAD: kfree(token->kad); break; + case RXRPC_SECURITY_YFS_RXGK: + kfree(token->rxgk->ticket.data); + kfree(token->rxgk); + break; default: pr_err("Unknown token type %x on rxrpc key\n", token->security_index); @@ -433,6 +594,9 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m) case RXRPC_SECURITY_RXKAD: seq_puts(m, "ka"); break; + case RXRPC_SECURITY_YFS_RXGK: + seq_puts(m, "ygk"); + break; default: /* we have a ticket we can't encode */ seq_printf(m, "%u", token->security_index); break; @@ -531,6 +695,8 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key); * * Generate a null RxRPC key that can be used to indicate anonymous security is * required for a particular domain. + * + * Return: The new key or a negative error code. */ struct key *rxrpc_get_null_key(const char *keyname) { @@ -595,6 +761,13 @@ static long rxrpc_read(const struct key *key, toksize += RND(token->kad->ticket_len); break; + case RXRPC_SECURITY_YFS_RXGK: + toksize += 6 * 8 + 2 * 4; + if (!token->no_leak_key) + toksize += RND(token->rxgk->key.len); + toksize += RND(token->rxgk->ticket.len); + break; + default: /* we have a ticket we can't encode */ pr_err("Unsupported key token type (%u)\n", token->security_index); @@ -674,6 +847,20 @@ static long rxrpc_read(const struct key *key, ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); break; + case RXRPC_SECURITY_YFS_RXGK: + ENCODE64(token->rxgk->begintime); + ENCODE64(token->rxgk->endtime); + ENCODE64(token->rxgk->level); + ENCODE64(token->rxgk->lifetime); + ENCODE64(token->rxgk->bytelife); + ENCODE64(token->rxgk->enctype); + if (token->no_leak_key) + ENCODE(0); + else + ENCODE_DATA(token->rxgk->key.len, token->rxgk->key.data); + ENCODE_DATA(token->rxgk->ticket.len, token->rxgk->ticket.data); + break; + default: pr_err("Unsupported key token type (%u)\n", token->security_index); diff --git a/net/rxrpc/oob.c b/net/rxrpc/oob.c new file mode 100644 index 000000000000..05ca9c1faa57 --- /dev/null +++ b/net/rxrpc/oob.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Out of band message handling (e.g. challenge-response) + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/export.h> +#include <linux/sched/signal.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +enum rxrpc_oob_command { + RXRPC_OOB_CMD_UNSET, + RXRPC_OOB_CMD_RESPOND, +} __mode(byte); + +struct rxrpc_oob_params { + u64 oob_id; /* ID number of message if reply */ + s32 abort_code; + enum rxrpc_oob_command command; + bool have_oob_id:1; +}; + +/* + * Post an out-of-band message for attention by the socket or kernel service + * associated with a reference call. + */ +void rxrpc_notify_socket_oob(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; + struct sock *sk; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + if (rx) { + sk = &rx->sk; + spin_lock_irq(&rx->recvmsg_lock); + + if (sk->sk_state < RXRPC_CLOSE) { + skb->skb_mstamp_ns = rx->oob_id_counter++; + rxrpc_get_skb(skb, rxrpc_skb_get_post_oob); + skb_queue_tail(&rx->recvmsg_oobq, skb); + + trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); + if (rx->app_ops) + rx->app_ops->notify_oob(sk, skb); + } + + spin_unlock_irq(&rx->recvmsg_lock); + if (!rx->app_ops && !sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk); + } + + rcu_read_unlock(); +} + +/* + * Locate the OOB message to respond to by its ID. + */ +static struct sk_buff *rxrpc_find_pending_oob(struct rxrpc_sock *rx, u64 oob_id) +{ + struct rb_node *p; + struct sk_buff *skb; + + p = rx->pending_oobq.rb_node; + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); + + if (oob_id < skb->skb_mstamp_ns) + p = p->rb_left; + else if (oob_id > skb->skb_mstamp_ns) + p = p->rb_right; + else + return skb; + } + + return NULL; +} + +/* + * Add an OOB message into the pending-response set. We always assign the next + * value from a 64-bit counter to the oob_id, so just assume we're always going + * to be on the right-hand edge of the tree and that the counter won't wrap. + * The tree is also given a ref to the message. + */ +void rxrpc_add_pending_oob(struct rxrpc_sock *rx, struct sk_buff *skb) +{ + struct rb_node **pp = &rx->pending_oobq.rb_node, *p = NULL; + + while (*pp) { + p = *pp; + pp = &(*pp)->rb_right; + } + + rb_link_node(&skb->rbnode, p, pp); + rb_insert_color(&skb->rbnode, &rx->pending_oobq); +} + +/* + * Extract control messages from the sendmsg() control buffer. + */ +static int rxrpc_sendmsg_oob_cmsg(struct msghdr *msg, struct rxrpc_oob_params *p) +{ + struct cmsghdr *cmsg; + int len; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - sizeof(struct cmsghdr); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_OOB_ID: + if (len != sizeof(p->oob_id) || p->have_oob_id) + return -EINVAL; + memcpy(&p->oob_id, CMSG_DATA(cmsg), sizeof(p->oob_id)); + p->have_oob_id = true; + break; + case RXRPC_RESPOND: + if (p->command != RXRPC_OOB_CMD_UNSET) + return -EINVAL; + p->command = RXRPC_OOB_CMD_RESPOND; + break; + case RXRPC_ABORT: + if (len != sizeof(p->abort_code) || p->abort_code) + return -EINVAL; + memcpy(&p->abort_code, CMSG_DATA(cmsg), sizeof(p->abort_code)); + if (p->abort_code == 0) + return -EINVAL; + break; + case RXRPC_RESP_RXGK_APPDATA: + if (p->command != RXRPC_OOB_CMD_RESPOND) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + switch (p->command) { + case RXRPC_OOB_CMD_RESPOND: + if (!p->have_oob_id) + return -EBADSLT; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Allow userspace to respond to an OOB using sendmsg(). + */ +static int rxrpc_respond_to_oob(struct rxrpc_sock *rx, + struct rxrpc_oob_params *p, + struct msghdr *msg) +{ + struct rxrpc_connection *conn; + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + int ret; + + skb = rxrpc_find_pending_oob(rx, p->oob_id); + if (skb) + rb_erase(&skb->rbnode, &rx->pending_oobq); + release_sock(&rx->sk); + if (!skb) + return -EBADSLT; + + sp = rxrpc_skb(skb); + + switch (p->command) { + case RXRPC_OOB_CMD_RESPOND: + ret = -EPROTO; + if (skb->mark != RXRPC_OOB_CHALLENGE) + break; + conn = sp->chall.conn; + ret = -EOPNOTSUPP; + if (!conn->security->sendmsg_respond_to_challenge) + break; + if (p->abort_code) { + rxrpc_abort_conn(conn, NULL, p->abort_code, -ECONNABORTED, + rxrpc_abort_response_sendmsg); + ret = 0; + } else { + ret = conn->security->sendmsg_respond_to_challenge(skb, msg); + } + break; + default: + ret = -EINVAL; + break; + } + + rxrpc_free_skb(skb, rxrpc_skb_put_oob); + return ret; +} + +/* + * Send an out-of-band message or respond to a received out-of-band message. + * - caller gives us the socket lock + * - the socket may be either a client socket or a server socket + */ +int rxrpc_sendmsg_oob(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) +{ + struct rxrpc_oob_params p = {}; + int ret; + + _enter(""); + + ret = rxrpc_sendmsg_oob_cmsg(msg, &p); + if (ret < 0) + goto error_release_sock; + + if (p.have_oob_id) + return rxrpc_respond_to_oob(rx, &p, msg); + + release_sock(&rx->sk); + + switch (p.command) { + default: + ret = -EINVAL; + break; + } + + _leave(" = %d", ret); + return ret; + +error_release_sock: + release_sock(&rx->sk); + return ret; +} + +/** + * rxrpc_kernel_query_oob - Query the parameters of an out-of-band message + * @oob: The message to query + * @_peer: Where to return the peer record + * @_peer_appdata: The application data attached to a peer record + * + * Extract useful parameters from an out-of-band message. The source peer + * parameters are returned through the argument list and the message type is + * returned. + * + * Return: + * * %RXRPC_OOB_CHALLENGE - Challenge wanting a response. + */ +enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(oob); + enum rxrpc_oob_type type = oob->mark; + + switch (type) { + case RXRPC_OOB_CHALLENGE: + *_peer = sp->chall.conn->peer; + *_peer_appdata = sp->chall.conn->peer->app_data; + break; + default: + WARN_ON_ONCE(1); + *_peer = NULL; + *_peer_appdata = 0; + break; + } + + return type; +} +EXPORT_SYMBOL(rxrpc_kernel_query_oob); + +/** + * rxrpc_kernel_dequeue_oob - Dequeue and return the front OOB message + * @sock: The socket to query + * @_type: Where to return the message type + * + * Dequeue the front OOB message, if there is one, and return it and + * its type. + * + * Return: The sk_buff representing the OOB message or %NULL if the queue was + * empty. + */ +struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock, + enum rxrpc_oob_type *_type) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct sk_buff *oob; + + oob = skb_dequeue(&rx->recvmsg_oobq); + if (oob) + *_type = oob->mark; + return oob; +} +EXPORT_SYMBOL(rxrpc_kernel_dequeue_oob); + +/** + * rxrpc_kernel_free_oob - Free an out-of-band message + * @oob: The OOB message to free + * + * Free an OOB message along with any resources it holds. + */ +void rxrpc_kernel_free_oob(struct sk_buff *oob) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(oob); + + switch (oob->mark) { + case RXRPC_OOB_CHALLENGE: + rxrpc_put_connection(sp->chall.conn, rxrpc_conn_put_oob); + break; + } + + rxrpc_free_skb(oob, rxrpc_skb_put_purge_oob); +} +EXPORT_SYMBOL(rxrpc_kernel_free_oob); + +/** + * rxrpc_kernel_query_challenge - Query the parameters of a challenge + * @challenge: The challenge to query + * @_peer: Where to return the peer record + * @_peer_appdata: The application data attached to a peer record + * @_service_id: Where to return the connection service ID + * @_security_index: Where to return the connection security index + * + * Extract useful parameters from a CHALLENGE message. + */ +void rxrpc_kernel_query_challenge(struct sk_buff *challenge, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata, + u16 *_service_id, u8 *_security_index) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(challenge); + + *_peer = sp->chall.conn->peer; + *_peer_appdata = sp->chall.conn->peer->app_data; + *_service_id = sp->hdr.serviceId; + *_security_index = sp->hdr.securityIndex; +} +EXPORT_SYMBOL(rxrpc_kernel_query_challenge); + +/** + * rxrpc_kernel_reject_challenge - Allow a kernel service to reject a challenge + * @challenge: The challenge to be rejected + * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: Indication as to why. + * + * Allow a kernel service to reject a challenge by aborting the connection if + * it's still in an abortable state. The error is returned so this function + * can be used with a return statement. + * + * Return: The %error parameter. + */ +int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code, + int error, enum rxrpc_abort_reason why) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(challenge); + + _enter("{%x},%d,%d,%u", sp->hdr.serial, abort_code, error, why); + + rxrpc_abort_conn(sp->chall.conn, NULL, abort_code, error, why); + return error; +} +EXPORT_SYMBOL(rxrpc_kernel_reject_challenge); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 95905b85a8d7..0af19bcdc80a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -18,7 +18,7 @@ extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -static ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) +ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) { struct sockaddr *sa = msg->msg_name; struct sock *sk = socket->sk; @@ -916,3 +916,61 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) peer->last_tx_at = ktime_get_seconds(); _leave(""); } + +/* + * Send a RESPONSE message. + */ +void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(response); + struct scatterlist sg[16]; + struct bio_vec bvec[16]; + struct msghdr msg; + size_t len = sp->resp.len; + __be32 wserial; + u32 serial = 0; + int ret, nr_sg; + + _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial); + + sg_init_table(sg, ARRAY_SIZE(sg)); + ret = skb_to_sgvec(response, sg, 0, len); + if (ret < 0) + goto fail; + nr_sg = ret; + + for (int i = 0; i < nr_sg; i++) + bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); + + iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len); + + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = MSG_SPLICE_PAGES; + + serial = rxrpc_get_next_serials(conn, 1); + wserial = htonl(serial); + + trace_rxrpc_tx_response(conn, serial, sp); + + ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial), + &wserial, sizeof(wserial)); + if (ret < 0) + goto fail; + + rxrpc_local_dont_fragment(conn->local, false); + + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + if (ret < 0) + goto fail; + + conn->peer->last_tx_at = ktime_get_seconds(); + return; + +fail: + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_response); + kleave(" = %d", ret); +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 71b6e07bf161..e2f35e6c04d6 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -475,6 +475,8 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) * @call: The call to query * * Get a record for the remote peer in a call. + * + * Return: The call's peer record. */ struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { @@ -486,7 +488,9 @@ EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT * @peer: The peer to query * - * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples. + * Get the call's peer smoothed RTT. + * + * Return: The RTT in uS or %UINT_MAX if we have no samples. */ unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { @@ -499,7 +503,10 @@ EXPORT_SYMBOL(rxrpc_kernel_get_srtt); * @peer: The peer to query * * Get a pointer to the address from a peer record. The caller is responsible - * for making sure that the address is not deallocated. + * for making sure that the address is not deallocated. A fake address will be + * substituted if %peer in NULL. + * + * Return: The rxrpc address record or a fake record. */ const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer) { @@ -512,7 +519,10 @@ EXPORT_SYMBOL(rxrpc_kernel_remote_srx); * @peer: The peer to query * * Get a pointer to the transport address from a peer record. The caller is - * responsible for making sure that the address is not deallocated. + * responsible for making sure that the address is not deallocated. A fake + * address will be substituted if %peer in NULL. + * + * Return: The transport address record or a fake record. */ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) { @@ -527,7 +537,9 @@ EXPORT_SYMBOL(rxrpc_kernel_remote_addr); * @app_data: The data to set * * Set the app-specific data on a peer. AF_RXRPC makes no effort to retain - * anything the data might refer to. The previous app_data is returned. + * anything the data might refer to. + * + * Return: The previous app_data. */ unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data) { @@ -540,6 +552,8 @@ EXPORT_SYMBOL(rxrpc_kernel_set_peer_data); * @peer: The peer to query * * Retrieve the app-specific data from a peer. + * + * Return: The peer's app data. */ unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer) { diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 42f70e4636f8..f8bfec12bc7e 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -181,4 +181,24 @@ struct rxkad_response { __be32 ticket_len; /* Kerberos ticket length */ } __packed; +/* + * GSSAPI security type-4 and type-6 data header. + */ +struct rxgk_header { + __be32 epoch; + __be32 cid; + __be32 call_number; + __be32 seq; + __be32 sec_index; + __be32 data_len; +} __packed; + +/* + * GSSAPI security type-4 and type-6 response packet header. + */ +struct rxgk_response { + __be64 start_time; + __be32 token_len; +} __packed; + #endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 32cd5f1d541d..86a27fb55a1c 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -155,6 +155,82 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) } /* + * Transcribe a call's user ID to a control message. + */ +static int rxrpc_recvmsg_user_id(struct rxrpc_call *call, struct msghdr *msg, + int flags) +{ + if (!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) + return 0; + + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + return put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + unsigned long idl = call->user_call_ID; + + return put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), &idl); + } +} + +/* + * Deal with a CHALLENGE packet. + */ +static int rxrpc_recvmsg_challenge(struct socket *sock, struct msghdr *msg, + struct sk_buff *challenge, unsigned int flags) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(challenge); + struct rxrpc_connection *conn = sp->chall.conn; + + return conn->security->challenge_to_recvmsg(conn, challenge, msg); +} + +/* + * Process OOB packets. Called with the socket locked. + */ +static int rxrpc_recvmsg_oob(struct socket *sock, struct msghdr *msg, + unsigned int flags) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct sk_buff *skb; + bool need_response = false; + int ret; + + skb = skb_peek(&rx->recvmsg_oobq); + if (!skb) + return -EAGAIN; + rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg); + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_OOB_ID, sizeof(u64), + &skb->skb_mstamp_ns); + if (ret < 0) + return ret; + + switch ((enum rxrpc_oob_type)skb->mark) { + case RXRPC_OOB_CHALLENGE: + need_response = true; + ret = rxrpc_recvmsg_challenge(sock, msg, skb, flags); + break; + default: + WARN_ONCE(1, "recvmsg() can't process unknown OOB type %u\n", + skb->mark); + ret = -EIO; + break; + } + + if (!(flags & MSG_PEEK)) + skb_unlink(skb, &rx->recvmsg_oobq); + if (need_response) + rxrpc_add_pending_oob(rx, skb); + else + rxrpc_free_skb(skb, rxrpc_skb_put_oob); + return ret; +} + +/* * Deliver messages to a call. This keeps processing packets until the buffer * is filled and we find either more DATA (returns 0) or the end of the DATA * (returns 1). If more packets are required, it returns -EAGAIN and if the @@ -165,6 +241,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, size_t len, int flags, size_t *_offset) { struct rxrpc_skb_priv *sp; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct sk_buff *skb; rxrpc_seq_t seq = 0; size_t remain; @@ -207,7 +284,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, sp->offset, sp->len, ret2); if (ret2 < 0) { - kdebug("verify = %d", ret2); ret = ret2; goto out; } @@ -255,6 +331,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (!(flags & MSG_PEEK)) rxrpc_rotate_rx_window(call); + + if (!rx->app_ops && + !skb_queue_empty_lockless(&rx->recvmsg_oobq)) { + trace_rxrpc_recvdata(call, rxrpc_recvmsg_oobq, seq, + rx_pkt_offset, rx_pkt_len, ret); + break; + } } out: @@ -262,6 +345,7 @@ out: call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; } + done: trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); @@ -301,6 +385,7 @@ try_again: /* Return immediately if a client socket has no outstanding calls */ if (RB_EMPTY_ROOT(&rx->calls) && list_empty(&rx->recvmsg_q) && + skb_queue_empty_lockless(&rx->recvmsg_oobq) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); return -EAGAIN; @@ -322,7 +407,8 @@ try_again: if (ret) goto wait_error; - if (list_empty(&rx->recvmsg_q)) { + if (list_empty(&rx->recvmsg_q) && + skb_queue_empty_lockless(&rx->recvmsg_oobq)) { if (signal_pending(current)) goto wait_interrupted; trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0); @@ -332,6 +418,15 @@ try_again: goto try_again; } + /* Deal with OOB messages before we consider getting normal data. */ + if (!skb_queue_empty_lockless(&rx->recvmsg_oobq)) { + ret = rxrpc_recvmsg_oob(sock, msg, flags); + release_sock(&rx->sk); + if (ret == -EAGAIN) + goto try_again; + goto error_no_call; + } + /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. * We also want to weed out calls that got requeued whilst we were @@ -342,7 +437,8 @@ try_again: call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!rxrpc_call_is_complete(call) && - skb_queue_empty(&call->recvmsg_queue)) { + skb_queue_empty(&call->recvmsg_queue) && + skb_queue_empty(&rx->recvmsg_oobq)) { list_del_init(&call->recvmsg_link); spin_unlock_irq(&rx->recvmsg_lock); release_sock(&rx->sk); @@ -377,21 +473,9 @@ try_again: if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - if (flags & MSG_CMSG_COMPAT) { - unsigned int id32 = call->user_call_ID; - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - sizeof(unsigned int), &id32); - } else { - unsigned long idl = call->user_call_ID; - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - sizeof(unsigned long), &idl); - } - if (ret < 0) - goto error_unlock_call; - } + ret = rxrpc_recvmsg_user_id(call, msg, flags); + if (ret < 0) + goto error_unlock_call; if (msg->msg_name && call->peer) { size_t len = sizeof(call->dest_srx); @@ -477,14 +561,14 @@ wait_error: * @_service: Where to store the actual service ID (may be upgraded) * * Allow a kernel service to receive data and pick up information about the - * state of a call. Returns 0 if got what was asked for and there's more - * available, 1 if we got what was asked for and we're at the end of the data - * and -EAGAIN if we need more data. + * state of a call. Note that *@_abort should also be initialised to %0. * - * Note that we may return -EAGAIN to drain empty packets at the end of the - * data, even if we've already copied over the requested data. + * Note that we may return %-EAGAIN to drain empty packets at the end + * of the data, even if we've already copied over the requested data. * - * *_abort should also be initialised to 0. + * Return: %0 if got what was asked for and there's more available, %1 + * if we got what was asked for and we're at the end of the data and + * %-EAGAIN if we need more data. */ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, struct iov_iter *iter, size_t *_len, diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c new file mode 100644 index 000000000000..1e19c605bcc8 --- /dev/null +++ b/net/rxrpc/rxgk.c @@ -0,0 +1,1371 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* GSSAPI-based RxRPC security + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/key-type.h> +#include "ar-internal.h" +#include "rxgk_common.h" + +/* + * Parse the information from a server key + */ +static int rxgk_preparse_server_key(struct key_preparsed_payload *prep) +{ + const struct krb5_enctype *krb5; + struct krb5_buffer *server_key = (void *)&prep->payload.data[2]; + unsigned int service, sec_class, kvno, enctype; + int n = 0; + + _enter("%zu", prep->datalen); + + if (sscanf(prep->orig_description, "%u:%u:%u:%u%n", + &service, &sec_class, &kvno, &enctype, &n) != 4) + return -EINVAL; + + if (prep->orig_description[n]) + return -EINVAL; + + krb5 = crypto_krb5_find_enctype(enctype); + if (!krb5) + return -ENOPKG; + + prep->payload.data[0] = (struct krb5_enctype *)krb5; + + if (prep->datalen != krb5->key_len) + return -EKEYREJECTED; + + server_key->len = prep->datalen; + server_key->data = kmemdup(prep->data, prep->datalen, GFP_KERNEL); + if (!server_key->data) + return -ENOMEM; + + _leave(" = 0"); + return 0; +} + +static void rxgk_free_server_key(union key_payload *payload) +{ + struct krb5_buffer *server_key = (void *)&payload->data[2]; + + kfree_sensitive(server_key->data); +} + +static void rxgk_free_preparse_server_key(struct key_preparsed_payload *prep) +{ + rxgk_free_server_key(&prep->payload); +} + +static void rxgk_destroy_server_key(struct key *key) +{ + rxgk_free_server_key(&key->payload); +} + +static void rxgk_describe_server_key(const struct key *key, struct seq_file *m) +{ + const struct krb5_enctype *krb5 = key->payload.data[0]; + + if (krb5) + seq_printf(m, ": %s", krb5->name); +} + +/* + * Handle rekeying the connection when we see our limits overrun or when the + * far side decided to rekey. + * + * Returns a ref on the context if successful or -ESTALE if the key is out of + * date. + */ +static struct rxgk_context *rxgk_rekey(struct rxrpc_connection *conn, + const u16 *specific_key_number) +{ + struct rxgk_context *gk, *dead = NULL; + unsigned int key_number, current_key, mask = ARRAY_SIZE(conn->rxgk.keys) - 1; + bool crank = false; + + _enter("%d", specific_key_number ? *specific_key_number : -1); + + mutex_lock(&conn->security_lock); + + current_key = conn->rxgk.key_number; + if (!specific_key_number) { + key_number = current_key; + } else { + if (*specific_key_number == (u16)current_key) + key_number = current_key; + else if (*specific_key_number == (u16)(current_key - 1)) + key_number = current_key - 1; + else if (*specific_key_number == (u16)(current_key + 1)) + goto crank_window; + else + goto bad_key; + } + + gk = conn->rxgk.keys[key_number & mask]; + if (!gk) + goto generate_key; + if (!specific_key_number && + test_bit(RXGK_TK_NEEDS_REKEY, &gk->flags)) + goto crank_window; + +grab: + refcount_inc(&gk->usage); + mutex_unlock(&conn->security_lock); + rxgk_put(dead); + return gk; + +crank_window: + trace_rxrpc_rxgk_rekey(conn, current_key, + specific_key_number ? *specific_key_number : -1); + if (current_key == UINT_MAX) + goto bad_key; + if (current_key + 1 == UINT_MAX) + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + + key_number = current_key + 1; + if (WARN_ON(conn->rxgk.keys[key_number & mask])) + goto bad_key; + crank = true; + +generate_key: + gk = conn->rxgk.keys[current_key & mask]; + gk = rxgk_generate_transport_key(conn, gk->key, key_number, GFP_NOFS); + if (IS_ERR(gk)) { + mutex_unlock(&conn->security_lock); + return gk; + } + + write_lock(&conn->security_use_lock); + if (crank) { + current_key++; + conn->rxgk.key_number = current_key; + dead = conn->rxgk.keys[(current_key - 2) & mask]; + conn->rxgk.keys[(current_key - 2) & mask] = NULL; + } + conn->rxgk.keys[current_key & mask] = gk; + write_unlock(&conn->security_use_lock); + goto grab; + +bad_key: + mutex_unlock(&conn->security_lock); + return ERR_PTR(-ESTALE); +} + +/* + * Get the specified keying context. + * + * Returns a ref on the context if successful or -ESTALE if the key is out of + * date. + */ +static struct rxgk_context *rxgk_get_key(struct rxrpc_connection *conn, + const u16 *specific_key_number) +{ + struct rxgk_context *gk; + unsigned int key_number, current_key, mask = ARRAY_SIZE(conn->rxgk.keys) - 1; + + _enter("{%u},%d", + conn->rxgk.key_number, specific_key_number ? *specific_key_number : -1); + + read_lock(&conn->security_use_lock); + + current_key = conn->rxgk.key_number; + if (!specific_key_number) { + key_number = current_key; + } else { + /* Only the bottom 16 bits of the key number are exposed in the + * header, so we try and keep the upper 16 bits in step. The + * whole 32 bits are used to generate the TK. + */ + if (*specific_key_number == (u16)current_key) + key_number = current_key; + else if (*specific_key_number == (u16)(current_key - 1)) + key_number = current_key - 1; + else if (*specific_key_number == (u16)(current_key + 1)) + goto rekey; + else + goto bad_key; + } + + gk = conn->rxgk.keys[key_number & mask]; + if (!gk) + goto slow_path; + if (!specific_key_number && + key_number < UINT_MAX) { + if (time_after(jiffies, gk->expiry) || + gk->bytes_remaining < 0) { + set_bit(RXGK_TK_NEEDS_REKEY, &gk->flags); + goto slow_path; + } + + if (test_bit(RXGK_TK_NEEDS_REKEY, &gk->flags)) + goto slow_path; + } + + refcount_inc(&gk->usage); + read_unlock(&conn->security_use_lock); + return gk; + +rekey: + _debug("rekey"); + if (current_key == UINT_MAX) + goto bad_key; + gk = conn->rxgk.keys[current_key & mask]; + if (gk) + set_bit(RXGK_TK_NEEDS_REKEY, &gk->flags); +slow_path: + read_unlock(&conn->security_use_lock); + return rxgk_rekey(conn, specific_key_number); +bad_key: + read_unlock(&conn->security_use_lock); + return ERR_PTR(-ESTALE); +} + +/* + * initialise connection security + */ +static int rxgk_init_connection_security(struct rxrpc_connection *conn, + struct rxrpc_key_token *token) +{ + struct rxgk_context *gk; + int ret; + + _enter("{%d,%u},{%x}", + conn->debug_id, conn->rxgk.key_number, key_serial(conn->key)); + + conn->security_ix = token->security_index; + conn->security_level = token->rxgk->level; + + if (rxrpc_conn_is_client(conn)) { + conn->rxgk.start_time = ktime_get(); + do_div(conn->rxgk.start_time, 100); + } + + gk = rxgk_generate_transport_key(conn, token->rxgk, conn->rxgk.key_number, + GFP_NOFS); + if (IS_ERR(gk)) + return PTR_ERR(gk); + conn->rxgk.enctype = gk->krb5->etype; + conn->rxgk.keys[gk->key_number & 3] = gk; + + switch (conn->security_level) { + case RXRPC_SECURITY_PLAIN: + case RXRPC_SECURITY_AUTH: + case RXRPC_SECURITY_ENCRYPT: + break; + default: + ret = -EKEYREJECTED; + goto error; + } + + ret = 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * Clean up the crypto on a call. + */ +static void rxgk_free_call_crypto(struct rxrpc_call *call) +{ +} + +/* + * Work out how much data we can put in a packet. + */ +static struct rxrpc_txbuf *rxgk_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) +{ + enum krb5_crypto_mode mode; + struct rxgk_context *gk; + struct rxrpc_txbuf *txb; + size_t shdr, alloc, limit, part, offset, gap; + + switch (call->conn->security_level) { + default: + alloc = umin(remain, RXRPC_JUMBO_DATALEN); + return rxrpc_alloc_data_txbuf(call, alloc, 1, gfp); + case RXRPC_SECURITY_AUTH: + shdr = 0; + mode = KRB5_CHECKSUM_MODE; + break; + case RXRPC_SECURITY_ENCRYPT: + shdr = sizeof(struct rxgk_header); + mode = KRB5_ENCRYPT_MODE; + break; + } + + gk = rxgk_get_key(call->conn, NULL); + if (IS_ERR(gk)) + return NULL; + + /* Work out the maximum amount of data that will fit. */ + alloc = RXRPC_JUMBO_DATALEN; + limit = crypto_krb5_how_much_data(gk->krb5, mode, &alloc, &offset); + + if (remain < limit - shdr) { + part = remain; + alloc = crypto_krb5_how_much_buffer(gk->krb5, mode, + shdr + part, &offset); + gap = 0; + } else { + part = limit - shdr; + gap = RXRPC_JUMBO_DATALEN - alloc; + alloc = RXRPC_JUMBO_DATALEN; + } + + rxgk_put(gk); + + txb = rxrpc_alloc_data_txbuf(call, alloc, 16, gfp); + if (!txb) + return NULL; + + txb->crypto_header = offset; + txb->sec_header = shdr; + txb->offset += offset + shdr; + txb->space = part; + + /* Clear excess space in the packet */ + if (gap) + memset(txb->data + alloc - gap, 0, gap); + return txb; +} + +/* + * Integrity mode (sign a packet - level 1 security) + */ +static int rxgk_secure_packet_integrity(const struct rxrpc_call *call, + struct rxgk_context *gk, + struct rxrpc_txbuf *txb) +{ + struct rxgk_header *hdr; + struct scatterlist sg[1]; + struct krb5_buffer metadata; + int ret = -ENOMEM; + + _enter(""); + + hdr = kzalloc(sizeof(*hdr), GFP_NOFS); + if (!hdr) + goto error_gk; + + hdr->epoch = htonl(call->conn->proto.epoch); + hdr->cid = htonl(call->cid); + hdr->call_number = htonl(call->call_id); + hdr->seq = htonl(txb->seq); + hdr->sec_index = htonl(call->security_ix); + hdr->data_len = htonl(txb->len); + metadata.len = sizeof(*hdr); + metadata.data = hdr; + + sg_init_table(sg, 1); + sg_set_buf(&sg[0], txb->data, txb->alloc_size); + + ret = crypto_krb5_get_mic(gk->krb5, gk->tx_Kc, &metadata, + sg, 1, txb->alloc_size, + txb->crypto_header, txb->sec_header + txb->len); + if (ret >= 0) { + txb->pkt_len = ret; + if (txb->alloc_size == RXRPC_JUMBO_DATALEN) + txb->jumboable = true; + gk->bytes_remaining -= ret; + } + kfree(hdr); +error_gk: + rxgk_put(gk); + _leave(" = %d", ret); + return ret; +} + +/* + * wholly encrypt a packet (level 2 security) + */ +static int rxgk_secure_packet_encrypted(const struct rxrpc_call *call, + struct rxgk_context *gk, + struct rxrpc_txbuf *txb) +{ + struct rxgk_header *hdr; + struct scatterlist sg[1]; + int ret; + + _enter("%x", txb->len); + + /* Insert the header into the buffer. */ + hdr = txb->data + txb->crypto_header; + hdr->epoch = htonl(call->conn->proto.epoch); + hdr->cid = htonl(call->cid); + hdr->call_number = htonl(call->call_id); + hdr->seq = htonl(txb->seq); + hdr->sec_index = htonl(call->security_ix); + hdr->data_len = htonl(txb->len); + + sg_init_table(sg, 1); + sg_set_buf(&sg[0], txb->data, txb->alloc_size); + + ret = crypto_krb5_encrypt(gk->krb5, gk->tx_enc, + sg, 1, txb->alloc_size, + txb->crypto_header, txb->sec_header + txb->len, + false); + if (ret >= 0) { + txb->pkt_len = ret; + if (txb->alloc_size == RXRPC_JUMBO_DATALEN) + txb->jumboable = true; + gk->bytes_remaining -= ret; + } + + rxgk_put(gk); + _leave(" = %d", ret); + return ret; +} + +/* + * checksum an RxRPC packet header + */ +static int rxgk_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + struct rxgk_context *gk; + int ret; + + _enter("{%d{%x}},{#%u},%u,", + call->debug_id, key_serial(call->conn->key), txb->seq, txb->len); + + gk = rxgk_get_key(call->conn, NULL); + if (IS_ERR(gk)) + return PTR_ERR(gk) == -ESTALE ? -EKEYREJECTED : PTR_ERR(gk); + + ret = key_validate(call->conn->key); + if (ret < 0) { + rxgk_put(gk); + return ret; + } + + call->security_enctype = gk->krb5->etype; + txb->cksum = htons(gk->key_number); + + switch (call->conn->security_level) { + case RXRPC_SECURITY_PLAIN: + rxgk_put(gk); + txb->pkt_len = txb->len; + return 0; + case RXRPC_SECURITY_AUTH: + return rxgk_secure_packet_integrity(call, gk, txb); + case RXRPC_SECURITY_ENCRYPT: + return rxgk_secure_packet_encrypted(call, gk, txb); + default: + rxgk_put(gk); + return -EPERM; + } +} + +/* + * Integrity mode (check the signature on a packet - level 1 security) + */ +static int rxgk_verify_packet_integrity(struct rxrpc_call *call, + struct rxgk_context *gk, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxgk_header *hdr; + struct krb5_buffer metadata; + unsigned int offset = sp->offset, len = sp->len; + size_t data_offset = 0, data_len = len; + u32 ac; + int ret = -ENOMEM; + + _enter(""); + + crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, + &data_offset, &data_len); + + hdr = kzalloc(sizeof(*hdr), GFP_NOFS); + if (!hdr) + goto put_gk; + + hdr->epoch = htonl(call->conn->proto.epoch); + hdr->cid = htonl(call->cid); + hdr->call_number = htonl(call->call_id); + hdr->seq = htonl(sp->hdr.seq); + hdr->sec_index = htonl(call->security_ix); + hdr->data_len = htonl(data_len); + + metadata.len = sizeof(*hdr); + metadata.data = hdr; + ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata, + skb, &offset, &len, &ac); + kfree(hdr); + if (ret == -EPROTO) { + rxrpc_abort_eproto(call, skb, ac, + rxgk_abort_1_verify_mic_eproto); + } else { + sp->offset = offset; + sp->len = len; + } + +put_gk: + rxgk_put(gk); + _leave(" = %d", ret); + return ret; +} + +/* + * Decrypt an encrypted packet (level 2 security). + */ +static int rxgk_verify_packet_encrypted(struct rxrpc_call *call, + struct rxgk_context *gk, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxgk_header hdr; + unsigned int offset = sp->offset, len = sp->len; + int ret; + u32 ac; + + _enter(""); + + ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); + if (ret == -EPROTO) + rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto); + if (ret < 0) + goto error; + + if (len < sizeof(hdr)) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + + /* Extract the header from the skb */ + ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr)); + if (ret < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_encdata); + goto error; + } + offset += sizeof(hdr); + len -= sizeof(hdr); + + if (ntohl(hdr.epoch) != call->conn->proto.epoch || + ntohl(hdr.cid) != call->cid || + ntohl(hdr.call_number) != call->call_id || + ntohl(hdr.seq) != sp->hdr.seq || + ntohl(hdr.sec_index) != call->security_ix || + ntohl(hdr.data_len) > len) { + ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON, + rxgk_abort_2_short_data); + goto error; + } + + sp->offset = offset; + sp->len = ntohl(hdr.data_len); + ret = 0; +error: + rxgk_put(gk); + _leave(" = %d", ret); + return ret; +} + +/* + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). + */ +static int rxgk_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxgk_context *gk; + u16 key_number = sp->hdr.cksum; + + _enter("{%d{%x}},{#%u}", + call->debug_id, key_serial(call->conn->key), sp->hdr.seq); + + gk = rxgk_get_key(call->conn, &key_number); + if (IS_ERR(gk)) { + switch (PTR_ERR(gk)) { + case -ESTALE: + return rxrpc_abort_eproto(call, skb, RXGK_BADKEYNO, + rxgk_abort_bad_key_number); + default: + return PTR_ERR(gk); + } + } + + call->security_enctype = gk->krb5->etype; + switch (call->conn->security_level) { + case RXRPC_SECURITY_PLAIN: + rxgk_put(gk); + return 0; + case RXRPC_SECURITY_AUTH: + return rxgk_verify_packet_integrity(call, gk, skb); + case RXRPC_SECURITY_ENCRYPT: + return rxgk_verify_packet_encrypted(call, gk, skb); + default: + rxgk_put(gk); + return -ENOANO; + } +} + +/* + * Allocate memory to hold a challenge or a response packet. We're not running + * in the io_thread, so we can't use ->tx_alloc. + */ +static struct page *rxgk_alloc_packet(size_t total_len) +{ + gfp_t gfp = GFP_NOFS; + int order; + + order = get_order(total_len); + if (order > 0) + gfp |= __GFP_COMP; + return alloc_pages(gfp, order); +} + +/* + * Issue a challenge. + */ +static int rxgk_issue_challenge(struct rxrpc_connection *conn) +{ + struct rxrpc_wire_header *whdr; + struct bio_vec bvec[1]; + struct msghdr msg; + struct page *page; + size_t len = sizeof(*whdr) + sizeof(conn->rxgk.nonce); + u32 serial; + int ret; + + _enter("{%d}", conn->debug_id); + + get_random_bytes(&conn->rxgk.nonce, sizeof(conn->rxgk.nonce)); + + /* We can't use conn->tx_alloc without a lock */ + page = rxgk_alloc_packet(sizeof(*whdr) + sizeof(conn->rxgk.nonce)); + if (!page) + return -ENOMEM; + + bvec_set_page(&bvec[0], page, len, 0); + iov_iter_bvec(&msg.msg_iter, WRITE, bvec, 1, len); + + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = MSG_SPLICE_PAGES; + + whdr = page_address(page); + whdr->epoch = htonl(conn->proto.epoch); + whdr->cid = htonl(conn->proto.cid); + whdr->callNumber = 0; + whdr->seq = 0; + whdr->type = RXRPC_PACKET_TYPE_CHALLENGE; + whdr->flags = conn->out_clientflag; + whdr->userStatus = 0; + whdr->securityIndex = conn->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(conn->service_id); + + memcpy(whdr + 1, conn->rxgk.nonce, sizeof(conn->rxgk.nonce)); + + serial = rxrpc_get_next_serials(conn, 1); + whdr->serial = htonl(serial); + + trace_rxrpc_tx_challenge(conn, serial, 0, *(u32 *)&conn->rxgk.nonce); + + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + if (ret > 0) + conn->peer->last_tx_at = ktime_get_seconds(); + __free_page(page); + + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_rxgk_challenge); + return -EAGAIN; + } + + trace_rxrpc_tx_packet(conn->debug_id, whdr, + rxrpc_tx_point_rxgk_challenge); + _leave(" = 0"); + return 0; +} + +/* + * Validate a challenge packet. + */ +static bool rxgk_validate_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + u8 nonce[20]; + + if (!conn->key) { + rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxgk_abort_chall_no_key); + return false; + } + + if (key_validate(conn->key) < 0) { + rxrpc_abort_conn(conn, skb, RXGK_EXPIRED, -EPROTO, + rxgk_abort_chall_key_expired); + return false; + } + + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + nonce, sizeof(nonce)) < 0) { + rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO, + rxgk_abort_chall_short); + return false; + } + + trace_rxrpc_rx_challenge(conn, sp->hdr.serial, 0, *(u32 *)nonce, 0); + return true; +} + +/** + * rxgk_kernel_query_challenge - Query RxGK-specific challenge parameters + * @challenge: The challenge packet to query + * + * Return: The Kerberos 5 encoding type for the challenged connection. + */ +u32 rxgk_kernel_query_challenge(struct sk_buff *challenge) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(challenge); + + return sp->chall.conn->rxgk.enctype; +} +EXPORT_SYMBOL(rxgk_kernel_query_challenge); + +/* + * Fill out the control message to pass to userspace to inform about the + * challenge. + */ +static int rxgk_challenge_to_recvmsg(struct rxrpc_connection *conn, + struct sk_buff *challenge, + struct msghdr *msg) +{ + struct rxgk_challenge chall; + + chall.base.service_id = conn->service_id; + chall.base.security_index = conn->security_ix; + chall.enctype = conn->rxgk.enctype; + + return put_cmsg(msg, SOL_RXRPC, RXRPC_CHALLENGED, sizeof(chall), &chall); +} + +/* + * Insert the requisite amount of XDR padding for the length given. + */ +static int rxgk_pad_out(struct sk_buff *response, size_t len, size_t offset) +{ + __be32 zero = 0; + size_t pad = xdr_round_up(len) - len; + int ret; + + if (!pad) + return 0; + + ret = skb_store_bits(response, offset, &zero, pad); + if (ret < 0) + return ret; + return pad; +} + +/* + * Insert the header into the response. + */ +static noinline ssize_t rxgk_insert_response_header(struct rxrpc_connection *conn, + struct rxgk_context *gk, + struct sk_buff *response, + size_t offset) +{ + struct rxrpc_skb_priv *rsp = rxrpc_skb(response); + + struct { + struct rxrpc_wire_header whdr; + __be32 start_time_msw; + __be32 start_time_lsw; + __be32 ticket_len; + } h; + int ret; + + rsp->resp.kvno = gk->key_number; + rsp->resp.version = gk->krb5->etype; + + h.whdr.epoch = htonl(conn->proto.epoch); + h.whdr.cid = htonl(conn->proto.cid); + h.whdr.callNumber = 0; + h.whdr.serial = 0; + h.whdr.seq = 0; + h.whdr.type = RXRPC_PACKET_TYPE_RESPONSE; + h.whdr.flags = conn->out_clientflag; + h.whdr.userStatus = 0; + h.whdr.securityIndex = conn->security_ix; + h.whdr.cksum = htons(gk->key_number); + h.whdr.serviceId = htons(conn->service_id); + h.start_time_msw = htonl(upper_32_bits(conn->rxgk.start_time)); + h.start_time_lsw = htonl(lower_32_bits(conn->rxgk.start_time)); + h.ticket_len = htonl(gk->key->ticket.len); + + ret = skb_store_bits(response, offset, &h, sizeof(h)); + return ret < 0 ? ret : sizeof(h); +} + +/* + * Construct the authenticator to go in the response packet + * + * struct RXGK_Authenticator { + * opaque nonce[20]; + * opaque appdata<>; + * RXGK_Level level; + * unsigned int epoch; + * unsigned int cid; + * unsigned int call_numbers<>; + * }; + */ +static ssize_t rxgk_construct_authenticator(struct rxrpc_connection *conn, + struct sk_buff *challenge, + const struct krb5_buffer *appdata, + struct sk_buff *response, + size_t offset) +{ + struct { + u8 nonce[20]; + __be32 appdata_len; + } a; + struct { + __be32 level; + __be32 epoch; + __be32 cid; + __be32 call_numbers_count; + __be32 call_numbers[4]; + } b; + int ret; + + ret = skb_copy_bits(challenge, sizeof(struct rxrpc_wire_header), + a.nonce, sizeof(a.nonce)); + if (ret < 0) + return -EPROTO; + + a.appdata_len = htonl(appdata->len); + + ret = skb_store_bits(response, offset, &a, sizeof(a)); + if (ret < 0) + return ret; + offset += sizeof(a); + + if (appdata->len) { + ret = skb_store_bits(response, offset, appdata->data, appdata->len); + if (ret < 0) + return ret; + offset += appdata->len; + + ret = rxgk_pad_out(response, appdata->len, offset); + if (ret < 0) + return ret; + offset += ret; + } + + b.level = htonl(conn->security_level); + b.epoch = htonl(conn->proto.epoch); + b.cid = htonl(conn->proto.cid); + b.call_numbers_count = htonl(4); + b.call_numbers[0] = htonl(conn->channels[0].call_counter); + b.call_numbers[1] = htonl(conn->channels[1].call_counter); + b.call_numbers[2] = htonl(conn->channels[2].call_counter); + b.call_numbers[3] = htonl(conn->channels[3].call_counter); + + ret = skb_store_bits(response, offset, &b, sizeof(b)); + if (ret < 0) + return ret; + return sizeof(a) + xdr_round_up(appdata->len) + sizeof(b); +} + +static ssize_t rxgk_encrypt_authenticator(struct rxrpc_connection *conn, + struct rxgk_context *gk, + struct sk_buff *response, + size_t offset, + size_t alloc_len, + size_t auth_offset, + size_t auth_len) +{ + struct scatterlist sg[16]; + int nr_sg; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nr_sg = skb_to_sgvec(response, sg, offset, alloc_len); + if (unlikely(nr_sg < 0)) + return nr_sg; + return crypto_krb5_encrypt(gk->krb5, gk->resp_enc, sg, nr_sg, alloc_len, + auth_offset, auth_len, false); +} + +/* + * Construct the response. + * + * struct RXGK_Response { + * rxgkTime start_time; + * RXGK_Data token; + * opaque authenticator<RXGK_MAXAUTHENTICATOR> + * }; + */ +static int rxgk_construct_response(struct rxrpc_connection *conn, + struct sk_buff *challenge, + struct krb5_buffer *appdata) +{ + struct rxrpc_skb_priv *csp, *rsp; + struct rxgk_context *gk; + struct sk_buff *response; + size_t len, auth_len, authx_len, offset, auth_offset, authx_offset; + __be32 tmp; + int ret; + + gk = rxgk_get_key(conn, NULL); + if (IS_ERR(gk)) + return PTR_ERR(gk); + + auth_len = 20 + (4 + appdata->len) + 12 + (1 + 4) * 4; + authx_len = crypto_krb5_how_much_buffer(gk->krb5, KRB5_ENCRYPT_MODE, + auth_len, &auth_offset); + len = sizeof(struct rxrpc_wire_header) + + 8 + (4 + xdr_round_up(gk->key->ticket.len)) + (4 + authx_len); + + response = alloc_skb_with_frags(0, len, 0, &ret, GFP_NOFS); + if (!response) + goto error; + rxrpc_new_skb(response, rxrpc_skb_new_response_rxgk); + response->len = len; + response->data_len = len; + + ret = rxgk_insert_response_header(conn, gk, response, 0); + if (ret < 0) + goto error; + offset = ret; + + ret = skb_store_bits(response, offset, gk->key->ticket.data, gk->key->ticket.len); + if (ret < 0) + goto error; + offset += gk->key->ticket.len; + ret = rxgk_pad_out(response, gk->key->ticket.len, offset); + if (ret < 0) + goto error; + + authx_offset = offset + ret + 4; /* Leave a gap for the length. */ + + ret = rxgk_construct_authenticator(conn, challenge, appdata, response, + authx_offset + auth_offset); + if (ret < 0) + goto error; + auth_len = ret; + + ret = rxgk_encrypt_authenticator(conn, gk, response, + authx_offset, authx_len, + auth_offset, auth_len); + if (ret < 0) + goto error; + authx_len = ret; + + tmp = htonl(authx_len); + ret = skb_store_bits(response, authx_offset - 4, &tmp, 4); + if (ret < 0) + goto error; + + ret = rxgk_pad_out(response, authx_len, authx_offset + authx_len); + if (ret < 0) + goto error; + len = authx_offset + authx_len + ret; + + if (len != response->len) { + response->len = len; + response->data_len = len; + } + + csp = rxrpc_skb(challenge); + rsp = rxrpc_skb(response); + rsp->resp.len = len; + rsp->resp.challenge_serial = csp->hdr.serial; + rxrpc_post_response(conn, response); + response = NULL; + ret = 0; + +error: + rxrpc_free_skb(response, rxrpc_skb_put_response); + rxgk_put(gk); + _leave(" = %d", ret); + return ret; +} + +/* + * Respond to a challenge packet. + */ +static int rxgk_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *challenge, + struct krb5_buffer *appdata) +{ + _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); + + if (key_validate(conn->key) < 0) + return rxrpc_abort_conn(conn, NULL, RXGK_EXPIRED, -EPROTO, + rxgk_abort_chall_key_expired); + + return rxgk_construct_response(conn, challenge, appdata); +} + +static int rxgk_respond_to_challenge_no_appdata(struct rxrpc_connection *conn, + struct sk_buff *challenge) +{ + struct krb5_buffer appdata = {}; + + return rxgk_respond_to_challenge(conn, challenge, &appdata); +} + +/** + * rxgk_kernel_respond_to_challenge - Respond to a challenge with appdata + * @challenge: The challenge to respond to + * @appdata: The application data to include in the RESPONSE authenticator + * + * Allow a kernel application to respond to a CHALLENGE with application data + * to be included in the RxGK RESPONSE Authenticator. + * + * Return: %0 if successful and a negative error code otherwise. + */ +int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge, + struct krb5_buffer *appdata) +{ + struct rxrpc_skb_priv *csp = rxrpc_skb(challenge); + + return rxgk_respond_to_challenge(csp->chall.conn, challenge, appdata); +} +EXPORT_SYMBOL(rxgk_kernel_respond_to_challenge); + +/* + * Parse sendmsg() control message and respond to challenge. We need to see if + * there's an appdata to fish out. + */ +static int rxgk_sendmsg_respond_to_challenge(struct sk_buff *challenge, + struct msghdr *msg) +{ + struct krb5_buffer appdata = {}; + struct cmsghdr *cmsg; + + for_each_cmsghdr(cmsg, msg) { + if (cmsg->cmsg_level != SOL_RXRPC || + cmsg->cmsg_type != RXRPC_RESP_RXGK_APPDATA) + continue; + if (appdata.data) + return -EINVAL; + appdata.data = CMSG_DATA(cmsg); + appdata.len = cmsg->cmsg_len - sizeof(struct cmsghdr); + } + + return rxgk_kernel_respond_to_challenge(challenge, &appdata); +} + +/* + * Verify the authenticator. + * + * struct RXGK_Authenticator { + * opaque nonce[20]; + * opaque appdata<>; + * RXGK_Level level; + * unsigned int epoch; + * unsigned int cid; + * unsigned int call_numbers<>; + * }; + */ +static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, + const struct krb5_enctype *krb5, + struct sk_buff *skb, + __be32 *p, __be32 *end) +{ + u32 app_len, call_count, level, epoch, cid, i; + + _enter(""); + + if (memcmp(p, conn->rxgk.nonce, 20) != 0) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_bad_nonce); + p += 20 / sizeof(__be32); + + app_len = ntohl(*p++); + if (app_len > (end - p) * sizeof(__be32)) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_short_applen); + + p += xdr_round_up(app_len) / sizeof(__be32); + if (end - p < 4) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_short_applen); + + level = ntohl(*p++); + epoch = ntohl(*p++); + cid = ntohl(*p++); + call_count = ntohl(*p++); + + if (level != conn->security_level || + epoch != conn->proto.epoch || + cid != conn->proto.cid || + call_count > 4) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_bad_param); + + if (end - p < call_count) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_short_call_list); + + for (i = 0; i < call_count; i++) { + u32 call_id = ntohl(*p++); + + if (call_id > INT_MAX) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_bad_callid); + + if (call_id < conn->channels[i].call_counter) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_call_ctr); + + if (call_id > conn->channels[i].call_counter) { + if (conn->channels[i].call) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_call_state); + + conn->channels[i].call_counter = call_id; + } + } + + _leave(" = 0"); + return 0; +} + +/* + * Extract the authenticator and verify it. + */ +static int rxgk_verify_authenticator(struct rxrpc_connection *conn, + const struct krb5_enctype *krb5, + struct sk_buff *skb, + unsigned int auth_offset, unsigned int auth_len) +{ + void *auth; + __be32 *p; + int ret; + + auth = kmalloc(auth_len, GFP_NOFS); + if (!auth) + return -ENOMEM; + + ret = skb_copy_bits(skb, auth_offset, auth, auth_len); + if (ret < 0) { + ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_short_auth); + goto error; + } + + p = auth; + ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, p + auth_len); +error: + kfree(auth); + return ret; +} + +/* + * Verify a response. + * + * struct RXGK_Response { + * rxgkTime start_time; + * RXGK_Data token; + * opaque authenticator<RXGK_MAXAUTHENTICATOR> + * }; + */ +static int rxgk_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + const struct krb5_enctype *krb5; + struct rxrpc_key_token *token; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxgk_response rhdr; + struct rxgk_context *gk; + struct key *key = NULL; + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); + unsigned int token_offset, token_len; + unsigned int auth_offset, auth_len; + __be32 xauth_len; + int ret, ec; + + _enter("{%d}", conn->debug_id); + + /* Parse the RXGK_Response object */ + if (sizeof(rhdr) + sizeof(__be32) > len) + goto short_packet; + + if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0) + goto short_packet; + offset += sizeof(rhdr); + len -= sizeof(rhdr); + + token_offset = offset; + token_len = ntohl(rhdr.token_len); + if (xdr_round_up(token_len) + sizeof(__be32) > len) + goto short_packet; + + trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len); + + offset += xdr_round_up(token_len); + len -= xdr_round_up(token_len); + + if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0) + goto short_packet; + offset += sizeof(xauth_len); + len -= sizeof(xauth_len); + + auth_offset = offset; + auth_len = ntohl(xauth_len); + if (auth_len < len) + goto short_packet; + if (auth_len & 3) + goto inconsistent; + if (auth_len < 20 + 9 * 4) + goto auth_too_short; + + /* We need to extract and decrypt the token and instantiate a session + * key for it. This bit, however, is application-specific. If + * possible, we use a default parser, but we might end up bumping this + * to the app to deal with - which might mean a round trip to + * userspace. + */ + ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key); + if (ret < 0) + goto out; + + /* We now have a key instantiated from the decrypted ticket. We can + * pass this to the application so that they can parse the ticket + * content and we can use the session key it contains to derive the + * keys we need. + * + * Note that we have to switch enctype at this point as the enctype of + * the ticket doesn't necessarily match that of the transport. + */ + token = key->payload.data[0]; + conn->security_level = token->rxgk->level; + conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time); + + gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS); + if (IS_ERR(gk)) { + ret = PTR_ERR(gk); + goto cant_get_token; + } + + krb5 = gk->krb5; + + trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len); + + /* Decrypt, parse and verify the authenticator. */ + ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb, + &auth_offset, &auth_len, &ec); + if (ret < 0) { + rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret, + rxgk_abort_resp_auth_dec); + goto out; + } + + ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len); + if (ret < 0) + goto out; + + conn->key = key; + key = NULL; + ret = 0; +out: + key_put(key); + _leave(" = %d", ret); + return ret; + +inconsistent: + ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, + rxgk_abort_resp_xdr_align); + goto out; +auth_too_short: + ret = rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO, + rxgk_abort_resp_short_auth); + goto out; +short_packet: + ret = rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO, + rxgk_abort_resp_short_packet); + goto out; + +cant_get_token: + switch (ret) { + case -ENOMEM: + goto temporary_error; + case -EINVAL: + ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED, + rxgk_abort_resp_internal_error); + goto out; + case -ENOPKG: + ret = rxrpc_abort_conn(conn, skb, KRB5_PROG_KEYTYPE_NOSUPP, + -EKEYREJECTED, rxgk_abort_resp_nopkg); + goto out; + } + +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + goto out; +} + +/* + * clear the connection security + */ +static void rxgk_clear(struct rxrpc_connection *conn) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(conn->rxgk.keys); i++) + rxgk_put(conn->rxgk.keys[i]); +} + +/* + * Initialise the RxGK security service. + */ +static int rxgk_init(void) +{ + return 0; +} + +/* + * Clean up the RxGK security service. + */ +static void rxgk_exit(void) +{ +} + +/* + * RxRPC YFS GSSAPI-based security + */ +const struct rxrpc_security rxgk_yfs = { + .name = "yfs-rxgk", + .security_index = RXRPC_SECURITY_YFS_RXGK, + .no_key_abort = RXGK_NOTAUTH, + .init = rxgk_init, + .exit = rxgk_exit, + .preparse_server_key = rxgk_preparse_server_key, + .free_preparse_server_key = rxgk_free_preparse_server_key, + .destroy_server_key = rxgk_destroy_server_key, + .describe_server_key = rxgk_describe_server_key, + .init_connection_security = rxgk_init_connection_security, + .alloc_txbuf = rxgk_alloc_txbuf, + .secure_packet = rxgk_secure_packet, + .verify_packet = rxgk_verify_packet, + .free_call_crypto = rxgk_free_call_crypto, + .issue_challenge = rxgk_issue_challenge, + .validate_challenge = rxgk_validate_challenge, + .challenge_to_recvmsg = rxgk_challenge_to_recvmsg, + .sendmsg_respond_to_challenge = rxgk_sendmsg_respond_to_challenge, + .respond_to_challenge = rxgk_respond_to_challenge_no_appdata, + .verify_response = rxgk_verify_response, + .clear = rxgk_clear, + .default_decode_ticket = rxgk_yfs_decode_ticket, +}; diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c new file mode 100644 index 000000000000..b94b77a1c317 --- /dev/null +++ b/net/rxrpc/rxgk_app.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Application-specific bits for GSSAPI-based RxRPC security + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/key-type.h> +#include "ar-internal.h" +#include "rxgk_common.h" + +/* + * Decode a default-style YFS ticket in a response and turn it into an + * rxrpc-type key. + * + * struct rxgk_key { + * afs_uint32 enctype; + * opaque key<>; + * }; + * + * struct RXGK_AuthName { + * afs_int32 kind; + * opaque data<AUTHDATAMAX>; + * opaque display<AUTHPRINTABLEMAX>; + * }; + * + * struct RXGK_Token { + * rxgk_key K0; + * RXGK_Level level; + * rxgkTime starttime; + * afs_int32 lifetime; + * afs_int32 bytelife; + * rxgkTime expirationtime; + * struct RXGK_AuthName identities<>; + * }; + */ +int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int ticket_offset, unsigned int ticket_len, + struct key **_key) +{ + struct rxrpc_key_token *token; + const struct cred *cred = current_cred(); // TODO - use socket creds + struct key *key; + size_t pre_ticket_len, payload_len; + unsigned int klen, enctype; + void *payload, *ticket; + __be32 *t, *p, *q, tmp[2]; + int ret; + + _enter(""); + + /* Get the session key length */ + ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp)); + if (ret < 0) + return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, + rxgk_abort_resp_short_yfs_klen); + enctype = ntohl(tmp[0]); + klen = ntohl(tmp[1]); + + if (klen > ticket_len - 10 * sizeof(__be32)) + return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, + rxgk_abort_resp_short_yfs_key); + + pre_ticket_len = ((5 + 14) * sizeof(__be32) + + xdr_round_up(klen) + + sizeof(__be32)); + payload_len = pre_ticket_len + xdr_round_up(ticket_len); + + payload = kzalloc(payload_len, GFP_NOFS); + if (!payload) + return -ENOMEM; + + /* We need to fill out the XDR form for a key payload that we can pass + * to add_key(). Start by copying in the ticket so that we can parse + * it. + */ + ticket = payload + pre_ticket_len; + ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len); + if (ret < 0) { + ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, + rxgk_abort_resp_short_yfs_tkt); + goto error; + } + + /* Fill out the form header. */ + p = payload; + p[0] = htonl(0); /* Flags */ + p[1] = htonl(1); /* len(cellname) */ + p[2] = htonl(0x20000000); /* Cellname " " */ + p[3] = htonl(1); /* #tokens */ + p[4] = htonl(15 * sizeof(__be32) + xdr_round_up(klen) + + xdr_round_up(ticket_len)); /* Token len */ + + /* Now fill in the body. Most of this we can just scrape directly from + * the ticket. + */ + t = ticket + sizeof(__be32) * 2 + xdr_round_up(klen); + q = payload + 5 * sizeof(__be32); + q[0] = htonl(RXRPC_SECURITY_YFS_RXGK); + q[1] = t[1]; /* begintime - msw */ + q[2] = t[2]; /* - lsw */ + q[3] = t[5]; /* endtime - msw */ + q[4] = t[6]; /* - lsw */ + q[5] = 0; /* level - msw */ + q[6] = t[0]; /* - lsw */ + q[7] = 0; /* lifetime - msw */ + q[8] = t[3]; /* - lsw */ + q[9] = 0; /* bytelife - msw */ + q[10] = t[4]; /* - lsw */ + q[11] = 0; /* enctype - msw */ + q[12] = htonl(enctype); /* - lsw */ + q[13] = htonl(klen); /* Key length */ + + q += 14; + + memcpy(q, ticket + sizeof(__be32) * 2, klen); + q += xdr_round_up(klen) / 4; + q[0] = htonl(ticket_len); + q++; + if (WARN_ON((unsigned long)q != (unsigned long)ticket)) { + ret = -EIO; + goto error; + } + + /* Ticket read in with skb_copy_bits above */ + q += xdr_round_up(ticket_len) / 4; + if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) { + ret = -EIO; + goto error; + } + + /* Now turn that into a key. */ + key = key_alloc(&key_type_rxrpc, "x", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, // TODO: Use socket owner + KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(key)) { + _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); + ret = PTR_ERR(key); + goto error; + } + + _debug("key %d", key_serial(key)); + + ret = key_instantiate_and_link(key, payload, payload_len, NULL, NULL); + if (ret < 0) + goto error_key; + + token = key->payload.data[0]; + token->no_leak_key = true; + *_key = key; + key = NULL; + ret = 0; + goto error; + +error_key: + key_put(key); +error: + kfree_sensitive(payload); + _leave(" = %d", ret); + return ret; +} + +/* + * Extract the token and set up a session key from the details. + * + * struct RXGK_TokenContainer { + * afs_int32 kvno; + * afs_int32 enctype; + * opaque encrypted_token<>; + * }; + * + * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1] + */ +int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int token_offset, unsigned int token_len, + struct key **_key) +{ + const struct krb5_enctype *krb5; + const struct krb5_buffer *server_secret; + struct crypto_aead *token_enc = NULL; + struct key *server_key; + unsigned int ticket_offset, ticket_len; + u32 kvno, enctype; + int ret, ec; + + struct { + __be32 kvno; + __be32 enctype; + __be32 token_len; + } container; + + /* Decode the RXGK_TokenContainer object. This tells us which server + * key we should be using. We can then fetch the key, get the secret + * and set up the crypto to extract the token. + */ + if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0) + return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO, + rxgk_abort_resp_tok_short); + + kvno = ntohl(container.kvno); + enctype = ntohl(container.enctype); + ticket_len = ntohl(container.token_len); + ticket_offset = token_offset + sizeof(container); + + if (xdr_round_up(ticket_len) > token_len - 3 * 4) + return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO, + rxgk_abort_resp_tok_short); + + _debug("KVNO %u", kvno); + _debug("ENC %u", enctype); + _debug("TLEN %u", ticket_len); + + server_key = rxrpc_look_up_server_security(conn, skb, kvno, enctype); + if (IS_ERR(server_key)) + goto cant_get_server_key; + + down_read(&server_key->sem); + server_secret = (const void *)&server_key->payload.data[2]; + ret = rxgk_set_up_token_cipher(server_secret, &token_enc, enctype, &krb5, GFP_NOFS); + up_read(&server_key->sem); + key_put(server_key); + if (ret < 0) + goto cant_get_token; + + /* We can now decrypt and parse the token/ticket. This allows us to + * gain access to K0, from which we can derive the transport key and + * thence decode the authenticator. + */ + ret = rxgk_decrypt_skb(krb5, token_enc, skb, + &ticket_offset, &ticket_len, &ec); + crypto_free_aead(token_enc); + token_enc = NULL; + if (ret < 0) + return rxrpc_abort_conn(conn, skb, ec, ret, + rxgk_abort_resp_tok_dec); + + ret = conn->security->default_decode_ticket(conn, skb, ticket_offset, + ticket_len, _key); + if (ret < 0) + goto cant_get_token; + + _leave(" = 0"); + return ret; + +cant_get_server_key: + ret = PTR_ERR(server_key); + switch (ret) { + case -ENOMEM: + goto temporary_error; + case -ENOKEY: + case -EKEYREJECTED: + case -EKEYEXPIRED: + case -EKEYREVOKED: + case -EPERM: + return rxrpc_abort_conn(conn, skb, RXGK_BADKEYNO, -EKEYREJECTED, + rxgk_abort_resp_tok_nokey); + default: + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED, + rxgk_abort_resp_tok_keyerr); + } + +cant_get_token: + switch (ret) { + case -ENOMEM: + goto temporary_error; + case -EINVAL: + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED, + rxgk_abort_resp_tok_internal_error); + case -ENOPKG: + return rxrpc_abort_conn(conn, skb, KRB5_PROG_KEYTYPE_NOSUPP, + -EKEYREJECTED, rxgk_abort_resp_tok_nopkg); + } + +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + return ret; +} diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h new file mode 100644 index 000000000000..7370a5655985 --- /dev/null +++ b/net/rxrpc/rxgk_common.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Common bits for GSSAPI-based RxRPC security. + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <crypto/krb5.h> +#include <crypto/skcipher.h> +#include <crypto/hash.h> + +/* + * Per-key number context. This is replaced when the connection is rekeyed. + */ +struct rxgk_context { + refcount_t usage; + unsigned int key_number; /* Rekeying number (goes in the rx header) */ + unsigned long flags; +#define RXGK_TK_NEEDS_REKEY 0 /* Set if this needs rekeying */ + unsigned long expiry; /* Expiration time of this key */ + long long bytes_remaining; /* Remaining Tx lifetime of this key */ + const struct krb5_enctype *krb5; /* RxGK encryption type */ + const struct rxgk_key *key; + + /* We need up to 7 keys derived from the transport key, but we don't + * actually need the transport key. Each key is derived by + * DK(TK,constant). + */ + struct crypto_aead *tx_enc; /* Transmission key */ + struct crypto_aead *rx_enc; /* Reception key */ + struct crypto_shash *tx_Kc; /* Transmission checksum key */ + struct crypto_shash *rx_Kc; /* Reception checksum key */ + struct crypto_aead *resp_enc; /* Response packet enc key */ +}; + +#define xdr_round_up(x) (round_up((x), sizeof(__be32))) +#define xdr_object_len(x) (4 + xdr_round_up(x)) + +/* + * rxgk_app.c + */ +int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int ticket_offset, unsigned int ticket_len, + struct key **_key); +int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int token_offset, unsigned int token_len, + struct key **_key); + +/* + * rxgk_kdf.c + */ +void rxgk_put(struct rxgk_context *gk); +struct rxgk_context *rxgk_generate_transport_key(struct rxrpc_connection *conn, + const struct rxgk_key *key, + unsigned int key_number, + gfp_t gfp); +int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key, + struct crypto_aead **token_key, + unsigned int enctype, + const struct krb5_enctype **_krb5, + gfp_t gfp); + +/* + * Apply decryption and checksumming functions to part of an skbuff. The + * offset and length are updated to reflect the actual content of the encrypted + * region. + */ +static inline +int rxgk_decrypt_skb(const struct krb5_enctype *krb5, + struct crypto_aead *aead, + struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len, + int *_error_code) +{ + struct scatterlist sg[16]; + size_t offset = 0, len = *_len; + int nr_sg, ret; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nr_sg = skb_to_sgvec(skb, sg, *_offset, len); + if (unlikely(nr_sg < 0)) + return nr_sg; + + ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg, + &offset, &len); + switch (ret) { + case 0: + *_offset += offset; + *_len = len; + break; + case -EPROTO: + case -EBADMSG: + *_error_code = RXGK_SEALEDINCON; + break; + default: + break; + } + + return ret; +} + +/* + * Check the MIC on a region of an skbuff. The offset and length are updated + * to reflect the actual content of the secure region. + */ +static inline +int rxgk_verify_mic_skb(const struct krb5_enctype *krb5, + struct crypto_shash *shash, + const struct krb5_buffer *metadata, + struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len, + u32 *_error_code) +{ + struct scatterlist sg[16]; + size_t offset = 0, len = *_len; + int nr_sg, ret; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nr_sg = skb_to_sgvec(skb, sg, *_offset, len); + if (unlikely(nr_sg < 0)) + return nr_sg; + + ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg, + &offset, &len); + switch (ret) { + case 0: + *_offset += offset; + *_len = len; + break; + case -EPROTO: + case -EBADMSG: + *_error_code = RXGK_SEALEDINCON; + break; + default: + break; + } + + return ret; +} diff --git a/net/rxrpc/rxgk_kdf.c b/net/rxrpc/rxgk_kdf.c new file mode 100644 index 000000000000..b4db5aa30e5b --- /dev/null +++ b/net/rxrpc/rxgk_kdf.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxGK transport key derivation. + * + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/key-type.h> +#include <linux/slab.h> +#include <keys/rxrpc-type.h> +#include "ar-internal.h" +#include "rxgk_common.h" + +#define round16(x) (((x) + 15) & ~15) + +/* + * Constants used to derive the keys and hmacs actually used for doing stuff. + */ +#define RXGK_CLIENT_ENC_PACKET 1026U // 0x402 +#define RXGK_CLIENT_MIC_PACKET 1027U // 0x403 +#define RXGK_SERVER_ENC_PACKET 1028U // 0x404 +#define RXGK_SERVER_MIC_PACKET 1029U // 0x405 +#define RXGK_CLIENT_ENC_RESPONSE 1030U // 0x406 +#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c + +static void rxgk_free(struct rxgk_context *gk) +{ + if (gk->tx_Kc) + crypto_free_shash(gk->tx_Kc); + if (gk->rx_Kc) + crypto_free_shash(gk->rx_Kc); + if (gk->tx_enc) + crypto_free_aead(gk->tx_enc); + if (gk->rx_enc) + crypto_free_aead(gk->rx_enc); + if (gk->resp_enc) + crypto_free_aead(gk->resp_enc); + kfree(gk); +} + +void rxgk_put(struct rxgk_context *gk) +{ + if (gk && refcount_dec_and_test(&gk->usage)) + rxgk_free(gk); +} + +/* + * Transport key derivation function. + * + * TK = random-to-key(PRF+(K0, L, + * epoch || cid || start_time || key_number)) + * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-11 sec 8.3] + */ +static int rxgk_derive_transport_key(struct rxrpc_connection *conn, + struct rxgk_context *gk, + const struct rxgk_key *rxgk, + struct krb5_buffer *TK, + gfp_t gfp) +{ + const struct krb5_enctype *krb5 = gk->krb5; + struct krb5_buffer conn_info; + unsigned int L = krb5->key_bytes; + __be32 *info; + u8 *buffer; + int ret; + + _enter(""); + + conn_info.len = sizeof(__be32) * 5; + + buffer = kzalloc(round16(conn_info.len), gfp); + if (!buffer) + return -ENOMEM; + + conn_info.data = buffer; + + info = (__be32 *)conn_info.data; + info[0] = htonl(conn->proto.epoch); + info[1] = htonl(conn->proto.cid); + info[2] = htonl(conn->rxgk.start_time >> 32); + info[3] = htonl(conn->rxgk.start_time >> 0); + info[4] = htonl(gk->key_number); + + ret = crypto_krb5_calc_PRFplus(krb5, &rxgk->key, L, &conn_info, TK, gfp); + kfree_sensitive(buffer); + _leave(" = %d", ret); + return ret; +} + +/* + * Set up the ciphers for the usage keys. + */ +static int rxgk_set_up_ciphers(struct rxrpc_connection *conn, + struct rxgk_context *gk, + const struct rxgk_key *rxgk, + gfp_t gfp) +{ + const struct krb5_enctype *krb5 = gk->krb5; + struct crypto_shash *shash; + struct crypto_aead *aead; + struct krb5_buffer TK; + bool service = rxrpc_conn_is_service(conn); + int ret; + u8 *buffer; + + buffer = kzalloc(krb5->key_bytes, gfp); + if (!buffer) + return -ENOMEM; + + TK.len = krb5->key_bytes; + TK.data = buffer; + + ret = rxgk_derive_transport_key(conn, gk, rxgk, &TK, gfp); + if (ret < 0) + goto out; + + aead = crypto_krb5_prepare_encryption(krb5, &TK, RXGK_CLIENT_ENC_RESPONSE, gfp); + if (IS_ERR(aead)) + goto aead_error; + gk->resp_enc = aead; + + if (crypto_aead_blocksize(gk->resp_enc) != krb5->block_len || + crypto_aead_authsize(gk->resp_enc) != krb5->cksum_len) { + pr_notice("algo inconsistent with krb5 table %u!=%u or %u!=%u\n", + crypto_aead_blocksize(gk->resp_enc), krb5->block_len, + crypto_aead_authsize(gk->resp_enc), krb5->cksum_len); + ret = -EINVAL; + goto out; + } + + if (service) { + switch (conn->security_level) { + case RXRPC_SECURITY_AUTH: + shash = crypto_krb5_prepare_checksum( + krb5, &TK, RXGK_SERVER_MIC_PACKET, gfp); + if (IS_ERR(shash)) + goto hash_error; + gk->tx_Kc = shash; + shash = crypto_krb5_prepare_checksum( + krb5, &TK, RXGK_CLIENT_MIC_PACKET, gfp); + if (IS_ERR(shash)) + goto hash_error; + gk->rx_Kc = shash; + break; + case RXRPC_SECURITY_ENCRYPT: + aead = crypto_krb5_prepare_encryption( + krb5, &TK, RXGK_SERVER_ENC_PACKET, gfp); + if (IS_ERR(aead)) + goto aead_error; + gk->tx_enc = aead; + aead = crypto_krb5_prepare_encryption( + krb5, &TK, RXGK_CLIENT_ENC_PACKET, gfp); + if (IS_ERR(aead)) + goto aead_error; + gk->rx_enc = aead; + break; + } + } else { + switch (conn->security_level) { + case RXRPC_SECURITY_AUTH: + shash = crypto_krb5_prepare_checksum( + krb5, &TK, RXGK_CLIENT_MIC_PACKET, gfp); + if (IS_ERR(shash)) + goto hash_error; + gk->tx_Kc = shash; + shash = crypto_krb5_prepare_checksum( + krb5, &TK, RXGK_SERVER_MIC_PACKET, gfp); + if (IS_ERR(shash)) + goto hash_error; + gk->rx_Kc = shash; + break; + case RXRPC_SECURITY_ENCRYPT: + aead = crypto_krb5_prepare_encryption( + krb5, &TK, RXGK_CLIENT_ENC_PACKET, gfp); + if (IS_ERR(aead)) + goto aead_error; + gk->tx_enc = aead; + aead = crypto_krb5_prepare_encryption( + krb5, &TK, RXGK_SERVER_ENC_PACKET, gfp); + if (IS_ERR(aead)) + goto aead_error; + gk->rx_enc = aead; + break; + } + } + + ret = 0; +out: + kfree_sensitive(buffer); + return ret; +aead_error: + ret = PTR_ERR(aead); + goto out; +hash_error: + ret = PTR_ERR(shash); + goto out; +} + +/* + * Derive a transport key for a connection and then derive a bunch of usage + * keys from it and set up ciphers using them. + */ +struct rxgk_context *rxgk_generate_transport_key(struct rxrpc_connection *conn, + const struct rxgk_key *key, + unsigned int key_number, + gfp_t gfp) +{ + struct rxgk_context *gk; + unsigned long lifetime; + int ret = -ENOPKG; + + _enter(""); + + gk = kzalloc(sizeof(*gk), GFP_KERNEL); + if (!gk) + return ERR_PTR(-ENOMEM); + refcount_set(&gk->usage, 1); + gk->key = key; + gk->key_number = key_number; + + gk->krb5 = crypto_krb5_find_enctype(key->enctype); + if (!gk->krb5) + goto err_tk; + + ret = rxgk_set_up_ciphers(conn, gk, key, gfp); + if (ret) + goto err_tk; + + /* Set the remaining number of bytes encrypted with this key that may + * be transmitted before rekeying. Note that the spec has been + * interpreted differently on this point... + */ + switch (key->bytelife) { + case 0: + case 63: + gk->bytes_remaining = LLONG_MAX; + break; + case 1 ... 62: + gk->bytes_remaining = 1LL << key->bytelife; + break; + default: + gk->bytes_remaining = key->bytelife; + break; + } + + /* Set the time after which rekeying must occur */ + if (key->lifetime) { + lifetime = min_t(u64, key->lifetime, INT_MAX / HZ); + lifetime *= HZ; + } else { + lifetime = MAX_JIFFY_OFFSET; + } + gk->expiry = jiffies + lifetime; + return gk; + +err_tk: + rxgk_put(gk); + _leave(" = %d", ret); + return ERR_PTR(ret); +} + +/* + * Use the server secret key to set up the ciphers that will be used to extract + * the token from a response packet. + */ +int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key, + struct crypto_aead **token_aead, + unsigned int enctype, + const struct krb5_enctype **_krb5, + gfp_t gfp) +{ + const struct krb5_enctype *krb5; + struct crypto_aead *aead; + + krb5 = crypto_krb5_find_enctype(enctype); + if (!krb5) + return -ENOPKG; + + aead = crypto_krb5_prepare_encryption(krb5, server_key, RXGK_SERVER_ENC_TOKEN, gfp); + if (IS_ERR(aead)) + return PTR_ERR(aead); + + *_krb5 = krb5; + *token_aead = aead; + return 0; +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6cb37b0eb77f..3657c0661cdc 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -177,8 +177,10 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem if (!txb) return NULL; - txb->offset += shdr; - txb->space = part; + txb->crypto_header = 0; + txb->sec_header = shdr; + txb->offset += shdr; + txb->space = part; return txb; } @@ -683,6 +685,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); + trace_rxrpc_tx_challenge(conn, serial, 0, conn->rxkad.nonce); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, @@ -698,62 +702,6 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) } /* - * send a Kerberos security response - */ -static int rxkad_send_response(struct rxrpc_connection *conn, - struct rxrpc_host_header *hdr, - struct rxkad_response *resp, - const struct rxkad_key *s2) -{ - struct rxrpc_wire_header whdr; - struct msghdr msg; - struct kvec iov[3]; - size_t len; - u32 serial; - int ret; - - _enter(""); - - msg.msg_name = &conn->peer->srx.transport; - msg.msg_namelen = conn->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&whdr, 0, sizeof(whdr)); - whdr.epoch = htonl(hdr->epoch); - whdr.cid = htonl(hdr->cid); - whdr.type = RXRPC_PACKET_TYPE_RESPONSE; - whdr.flags = conn->out_clientflag; - whdr.securityIndex = hdr->securityIndex; - whdr.serviceId = htons(hdr->serviceId); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = resp; - iov[1].iov_len = sizeof(*resp); - iov[2].iov_base = (void *)s2->ticket; - iov[2].iov_len = s2->ticket_len; - - len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; - - serial = rxrpc_get_next_serial(conn); - whdr.serial = htonl(serial); - - rxrpc_local_dont_fragment(conn->local, false); - ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len); - if (ret < 0) { - trace_rxrpc_tx_fail(conn->debug_id, serial, ret, - rxrpc_tx_point_rxkad_response); - return -EAGAIN; - } - - conn->peer->last_tx_at = ktime_get_seconds(); - _leave(" = 0"); - return 0; -} - -/* * calculate the response checksum */ static void rxkad_calc_response_checksum(struct rxkad_response *response) @@ -772,12 +720,21 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response) * encrypt the response packet */ static int rxkad_encrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, + struct sk_buff *response, const struct rxkad_key *s2) { struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg[1]; + size_t encsize = sizeof(((struct rxkad_response *)0)->encrypted); + int ret; + + sg_init_table(sg, ARRAY_SIZE(sg)); + ret = skb_to_sgvec(response, sg, + sizeof(struct rxrpc_wire_header) + + offsetof(struct rxkad_response, encrypted), encsize); + if (ret < 0) + return ret; req = skcipher_request_alloc(&conn->rxkad.cipher->base, GFP_NOFS); if (!req) @@ -786,89 +743,206 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv)); - sg_init_table(sg, 1); - sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); skcipher_request_set_sync_tfm(req, conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); - crypto_skcipher_encrypt(req); + skcipher_request_set_crypt(req, sg, sg, encsize, iv.x); + ret = crypto_skcipher_encrypt(req); skcipher_request_free(req); - return 0; + return ret; } /* - * respond to a challenge packet + * Validate a challenge packet. */ -static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb) +static bool rxkad_validate_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb) { - const struct rxrpc_key_token *token; struct rxkad_challenge challenge; - struct rxkad_response *resp; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - u32 version, nonce, min_level; - int ret = -EPROTO; + u32 version, min_level; + int ret; _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); - if (!conn->key) - return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, - rxkad_abort_chall_no_key); + if (!conn->key) { + rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxkad_abort_chall_no_key); + return false; + } ret = key_validate(conn->key); - if (ret < 0) - return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, - rxkad_abort_chall_key_expired); + if (ret < 0) { + rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_chall_key_expired); + return false; + } if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &challenge, sizeof(challenge)) < 0) - return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, - rxkad_abort_chall_short); + &challenge, sizeof(challenge)) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_chall_short); + return false; + } version = ntohl(challenge.version); - nonce = ntohl(challenge.nonce); + sp->chall.rxkad_nonce = ntohl(challenge.nonce); min_level = ntohl(challenge.min_level); - trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); + trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, + sp->chall.rxkad_nonce, min_level); + + if (version != RXKAD_VERSION) { + rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_chall_version); + return false; + } - if (version != RXKAD_VERSION) - return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, - rxkad_abort_chall_version); + if (conn->security_level < min_level) { + rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, + rxkad_abort_chall_level); + return false; + } + return true; +} + +/* + * Insert the header into the response. + */ +static noinline +int rxkad_insert_response_header(struct rxrpc_connection *conn, + const struct rxrpc_key_token *token, + struct sk_buff *challenge, + struct sk_buff *response, + size_t *offset) +{ + struct rxrpc_skb_priv *csp = rxrpc_skb(challenge); + struct { + struct rxrpc_wire_header whdr; + struct rxkad_response resp; + } h; + int ret; - if (conn->security_level < min_level) - return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, - rxkad_abort_chall_level); + h.whdr.epoch = htonl(conn->proto.epoch); + h.whdr.cid = htonl(conn->proto.cid); + h.whdr.callNumber = 0; + h.whdr.serial = 0; + h.whdr.seq = 0; + h.whdr.type = RXRPC_PACKET_TYPE_RESPONSE; + h.whdr.flags = conn->out_clientflag; + h.whdr.userStatus = 0; + h.whdr.securityIndex = conn->security_ix; + h.whdr.cksum = 0; + h.whdr.serviceId = htons(conn->service_id); + h.resp.version = htonl(RXKAD_VERSION); + h.resp.__pad = 0; + h.resp.encrypted.epoch = htonl(conn->proto.epoch); + h.resp.encrypted.cid = htonl(conn->proto.cid); + h.resp.encrypted.checksum = 0; + h.resp.encrypted.securityIndex = htonl(conn->security_ix); + h.resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter); + h.resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter); + h.resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter); + h.resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter); + h.resp.encrypted.inc_nonce = htonl(csp->chall.rxkad_nonce + 1); + h.resp.encrypted.level = htonl(conn->security_level); + h.resp.kvno = htonl(token->kad->kvno); + h.resp.ticket_len = htonl(token->kad->ticket_len); + + rxkad_calc_response_checksum(&h.resp); + + ret = skb_store_bits(response, *offset, &h, sizeof(h)); + *offset += sizeof(h); + return ret; +} + +/* + * respond to a challenge packet + */ +static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *challenge) +{ + const struct rxrpc_key_token *token; + struct rxrpc_skb_priv *csp, *rsp; + struct sk_buff *response; + size_t len, offset = 0; + int ret = -EPROTO; + + _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); + + ret = key_validate(conn->key); + if (ret < 0) + return rxrpc_abort_conn(conn, challenge, RXKADEXPIRED, ret, + rxkad_abort_chall_key_expired); token = conn->key->payload.data[0]; /* build the response packet */ - resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); - if (!resp) - return -ENOMEM; + len = sizeof(struct rxrpc_wire_header) + + sizeof(struct rxkad_response) + + token->kad->ticket_len; + + response = alloc_skb_with_frags(0, len, 0, &ret, GFP_NOFS); + if (!response) + goto error; + rxrpc_new_skb(response, rxrpc_skb_new_response_rxkad); + response->len = len; + response->data_len = len; + + offset = 0; + ret = rxkad_insert_response_header(conn, token, challenge, response, + &offset); + if (ret < 0) + goto error; + + ret = rxkad_encrypt_response(conn, response, token->kad); + if (ret < 0) + goto error; + + ret = skb_store_bits(response, offset, token->kad->ticket, + token->kad->ticket_len); + if (ret < 0) + goto error; - resp->version = htonl(RXKAD_VERSION); - resp->encrypted.epoch = htonl(conn->proto.epoch); - resp->encrypted.cid = htonl(conn->proto.cid); - resp->encrypted.securityIndex = htonl(conn->security_ix); - resp->encrypted.inc_nonce = htonl(nonce + 1); - resp->encrypted.level = htonl(conn->security_level); - resp->kvno = htonl(token->kad->kvno); - resp->ticket_len = htonl(token->kad->ticket_len); - resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter); - resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter); - resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter); - resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter); - - /* calculate the response checksum and then do the encryption */ - rxkad_calc_response_checksum(resp); - ret = rxkad_encrypt_response(conn, resp, token->kad); - if (ret == 0) - ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); - kfree(resp); + csp = rxrpc_skb(challenge); + rsp = rxrpc_skb(response); + rsp->resp.len = len; + rsp->resp.challenge_serial = csp->hdr.serial; + rxrpc_post_response(conn, response); + response = NULL; + ret = 0; + +error: + rxrpc_free_skb(response, rxrpc_skb_put_response); return ret; } /* + * RxKAD does automatic response only as there's nothing to manage that isn't + * already in the key. + */ +static int rxkad_sendmsg_respond_to_challenge(struct sk_buff *challenge, + struct msghdr *msg) +{ + return -EINVAL; +} + +/** + * rxkad_kernel_respond_to_challenge - Respond to a challenge with appdata + * @challenge: The challenge to respond to + * + * Allow a kernel application to respond to a CHALLENGE. + * + * Return: %0 if successful and a negative error code otherwise. + */ +int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge) +{ + struct rxrpc_skb_priv *csp = rxrpc_skb(challenge); + + return rxkad_respond_to_challenge(csp->chall.conn, challenge); +} +EXPORT_SYMBOL(rxkad_kernel_respond_to_challenge); + +/* * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, @@ -1276,6 +1350,8 @@ const struct rxrpc_security rxkad = { .verify_packet = rxkad_verify_packet, .free_call_crypto = rxkad_free_call_crypto, .issue_challenge = rxkad_issue_challenge, + .validate_challenge = rxkad_validate_challenge, + .sendmsg_respond_to_challenge = rxkad_sendmsg_respond_to_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, .clear = rxkad_clear, diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index e848a4777b8c..0377301156b0 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "rxperf: " fmt #include <linux/module.h> #include <linux/slab.h> +#include <crypto/krb5.h> #include <net/sock.h> #include <net/af_rxrpc.h> #define RXRPC_TRACE_ONLY_DEFINE_ENUMS @@ -136,6 +137,12 @@ static void rxperf_notify_end_reply_tx(struct sock *sock, RXPERF_CALL_SV_AWAIT_ACK); } +static const struct rxrpc_kernel_ops rxperf_rxrpc_callback_ops = { + .notify_new_call = rxperf_rx_new_call, + .discard_new_call = rxperf_rx_discard_new_call, + .user_attach_call = rxperf_rx_attach, +}; + /* * Charge the incoming call preallocation. */ @@ -161,7 +168,6 @@ static void rxperf_charge_preallocation(struct work_struct *work) if (rxrpc_kernel_charge_accept(rxperf_socket, rxperf_notify_rx, - rxperf_rx_attach, (unsigned long)call, GFP_KERNEL, call->debug_id) < 0) @@ -209,8 +215,7 @@ static int rxperf_open_socket(void) if (ret < 0) goto error_2; - rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call, - rxperf_rx_discard_new_call); + rxrpc_kernel_set_notifications(socket, &rxperf_rxrpc_callback_ops); ret = kernel_listen(socket, INT_MAX); if (ret < 0) @@ -546,9 +551,9 @@ static int rxperf_process_call(struct rxperf_call *call) } /* - * Add a key to the security keyring. + * Add an rxkad key to the security keyring. */ -static int rxperf_add_key(struct key *keyring) +static int rxperf_add_rxkad_key(struct key *keyring) { key_ref_t kref; int ret; @@ -574,6 +579,47 @@ static int rxperf_add_key(struct key *keyring) return ret; } +#ifdef CONFIG_RXGK +/* + * Add a yfs-rxgk key to the security keyring. + */ +static int rxperf_add_yfs_rxgk_key(struct key *keyring, u32 enctype) +{ + const struct krb5_enctype *krb5 = crypto_krb5_find_enctype(enctype); + key_ref_t kref; + char name[64]; + int ret; + u8 key[32]; + + if (!krb5 || krb5->key_len > sizeof(key)) + return 0; + + /* The key is just { 0, 1, 2, 3, 4, ... } */ + for (int i = 0; i < krb5->key_len; i++) + key[i] = i; + + sprintf(name, "%u:6:1:%u", RX_PERF_SERVICE, enctype); + + kref = key_create_or_update(make_key_ref(keyring, true), + "rxrpc_s", name, + key, krb5->key_len, + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | + KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA); + + if (IS_ERR(kref)) { + pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref)); + return PTR_ERR(kref); + } + + ret = key_link(keyring, key_ref_to_ptr(kref)); + if (ret < 0) + pr_err("Can't link rxperf server key: %d\n", ret); + key_ref_put(kref); + return ret; +} +#endif + /* * Initialise the rxperf server. */ @@ -603,9 +649,29 @@ static int __init rxperf_init(void) goto error_keyring; } rxperf_sec_keyring = keyring; - ret = rxperf_add_key(keyring); + ret = rxperf_add_rxkad_key(keyring); + if (ret < 0) + goto error_key; +#ifdef CONFIG_RXGK + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96); + if (ret < 0) + goto error_key; + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES256_CTS_HMAC_SHA1_96); + if (ret < 0) + goto error_key; + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES128_CTS_HMAC_SHA256_128); + if (ret < 0) + goto error_key; + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES256_CTS_HMAC_SHA384_192); + if (ret < 0) + goto error_key; + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_CAMELLIA128_CTS_CMAC); + if (ret < 0) + goto error_key; + ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_CAMELLIA256_CTS_CMAC); if (ret < 0) goto error_key; +#endif ret = rxperf_open_socket(); if (ret < 0) diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 9784adc8f275..078d91a6b77f 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -20,6 +20,9 @@ static const struct rxrpc_security *rxrpc_security_types[] = { #ifdef CONFIG_RXKAD [RXRPC_SECURITY_RXKAD] = &rxkad, #endif +#ifdef CONFIG_RXGK + [RXRPC_SECURITY_YFS_RXGK] = &rxgk_yfs, +#endif }; int __init rxrpc_init_security(void) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 84dc6c94f23b..ebbb78b842de 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -607,7 +607,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, struct rxrpc_send_params *p) - __releases(&rx->sk.sk_lock.slock) + __releases(&rx->sk.sk_lock) __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; @@ -657,7 +657,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, * - the socket may be either a client socket or a server socket */ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) - __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call; bool dropped_lock = false; @@ -759,14 +758,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; - } else if (p.command == RXRPC_CMD_SEND_ABORT) { + goto out_put_unlock; + } + + switch (p.command) { + case RXRPC_CMD_SEND_ABORT: rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, rxrpc_abort_call_sendmsg); ret = 0; - } else if (p.command != RXRPC_CMD_SEND_DATA) { - ret = -EINVAL; - } else { + break; + case RXRPC_CMD_SEND_DATA: ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); + break; + default: + ret = -EINVAL; + break; } out_put_unlock: @@ -794,6 +800,8 @@ error_release_sock: * appropriate to sending data. No control data should be supplied in @msg, * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. + * + * Return: %0 if successful and a negative error code otherwise. */ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, size_t len, @@ -829,8 +837,9 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @error: Local error value * @why: Indication as to why. * - * Allow a kernel service to abort a call, if it's still in an abortable state - * and return true if the call was aborted, false if it was already complete. + * Allow a kernel service to abort a call if it's still in an abortable state. + * + * Return: %true if the call was aborted, %false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, enum rxrpc_abort_reason why) diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index e51940589ee5..36b05fd842a7 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -152,6 +152,8 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) * * Set the server security keyring on an rxrpc socket. This is used to provide * the encryption keys for a kernel service. + * + * Return: %0 if successful and a negative error code otherwise. */ int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring) { @@ -169,3 +171,43 @@ int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring) return ret; } EXPORT_SYMBOL(rxrpc_sock_set_security_keyring); + +/** + * rxrpc_sock_set_manage_response - Set the manage-response flag for a kernel service + * @sk: The socket to set the keyring on + * @set: True to set, false to clear the flag + * + * Set the flag on an rxrpc socket to say that the caller wants to manage the + * RESPONSE packet and the user-defined data it may contain. Setting this + * means that recvmsg() will return messages with RXRPC_CHALLENGED in the + * control message buffer containing information about the challenge. + * + * The user should respond to the challenge by passing RXRPC_RESPOND or + * RXRPC_RESPOND_ABORT control messages with sendmsg() to the same call. + * Supplementary control messages, such as RXRPC_RESP_RXGK_APPDATA, may be + * included to indicate the parts the user wants to supply. + * + * The server will be passed the response data with a RXRPC_RESPONDED control + * message when it gets the first data from each call. + * + * Note that this is only honoured by security classes that need auxiliary data + * (e.g. RxGK). Those that don't offer the facility (e.g. RxKAD) respond + * without consulting userspace. + * + * Return: The previous setting. + */ +int rxrpc_sock_set_manage_response(struct sock *sk, bool set) +{ + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret; + + lock_sock(sk); + ret = !!test_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags); + if (set) + set_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags); + else + clear_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags); + release_sock(sk); + return ret; +} +EXPORT_SYMBOL(rxrpc_sock_set_manage_response); diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index c550991d48fa..29767038691a 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -60,14 +60,6 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return txb; } -void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) -{ - int r; - - __refcount_inc(&txb->ref, &r); - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r + 1, what); -} - void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { int r = refcount_read(&txb->ref); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a800127effcd..9f0b3f943fca 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -403,6 +403,18 @@ config NET_SCH_ETS If unsure, say N. +config NET_SCH_BPF + bool "BPF-based Qdisc" + depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF + help + This option allows BPF-based queueing disiplines. With BPF struct_ops, + users can implement supported operators in Qdisc_ops using BPF programs. + The queue holding skb can be built with BPF maps or graphs. + + Say Y here if you want to use BPF-based Qdisc. + + If unsure, say N. + menuconfig NET_SCH_DEFAULT bool "Allow override default queue discipline" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 82c3f78ca486..904d784902d1 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o +obj-$(CONFIG_NET_SCH_BPF) += bpf_qdisc.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 839790043256..057e20cef375 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1461,17 +1461,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; - struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; + struct nlattr *tb[TCA_ACT_MAX_PRIO + 2]; struct tc_action *act; size_t sz = 0; int err; int i; - err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO + 1, nla, NULL, extack); if (err < 0) return err; + /* The nested attributes are parsed as types, but they are really an + * array of actions. So we parse one more than we can handle, and return + * an error if the last one is set (as that indicates that the request + * contained more than the maximum number of actions). + */ + if (tb[TCA_ACT_MAX_PRIO + 1]) { + NL_SET_ERR_MSG_FMT(extack, + "Only %d actions supported per filter", + TCA_ACT_MAX_PRIO); + return -EINVAL; + } + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5b3814365924..5f01f567c934 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,7 +30,29 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); #define MIRRED_NEST_LIMIT 4 -static DEFINE_PER_CPU(unsigned int, mirred_nest_level); + +#ifndef CONFIG_PREEMPT_RT +static u8 tcf_mirred_nest_level_inc_return(void) +{ + return __this_cpu_inc_return(softnet_data.xmit.sched_mirred_nest); +} + +static void tcf_mirred_nest_level_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.sched_mirred_nest); +} + +#else +static u8 tcf_mirred_nest_level_inc_return(void) +{ + return current->net_xmit.sched_mirred_nest++; +} + +static void tcf_mirred_nest_level_dec(void) +{ + current->net_xmit.sched_mirred_nest--; +} +#endif static bool tcf_mirred_is_act_redirect(int action) { @@ -423,7 +445,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, int m_eaction; u32 blockid; - nest_level = __this_cpu_inc_return(mirred_nest_level); + nest_level = tcf_mirred_nest_level_inc_return(); if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); @@ -454,7 +476,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, retval); dec_nest_level: - __this_cpu_dec(mirred_nest_level); + tcf_mirred_nest_level_dec(); return retval; } diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c new file mode 100644 index 000000000000..7ea8b54b2ab1 --- /dev/null +++ b/net/sched/bpf_qdisc.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> +#include <linux/bpf_verifier.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/filter.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + +#define QDISC_OP_IDX(op) (offsetof(struct Qdisc_ops, op) / sizeof(void (*)(void))) +#define QDISC_MOFF_IDX(moff) (moff / sizeof(void (*)(void))) + +static struct bpf_struct_ops bpf_Qdisc_ops; + +struct bpf_sched_data { + struct qdisc_watchdog watchdog; +}; + +struct bpf_sk_buff_ptr { + struct sk_buff *skb; +}; + +static int bpf_qdisc_init(struct btf *btf) +{ + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_qdisc_ids, struct, Qdisc) +BTF_ID_LIST_SINGLE(bpf_sk_buff_ids, struct, sk_buff) +BTF_ID_LIST_SINGLE(bpf_sk_buff_ptr_ids, struct, bpf_sk_buff_ptr) + +static bool bpf_qdisc_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + struct btf *btf = prog->aux->attach_btf; + u32 arg; + + arg = btf_ctx_arg_idx(btf, prog->aux->attach_func_proto, off); + if (prog->aux->attach_st_ops_member_off == offsetof(struct Qdisc_ops, enqueue)) { + if (arg == 2 && type == BPF_READ) { + info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED; + info->btf = btf; + info->btf_id = bpf_sk_buff_ptr_ids[0]; + return true; + } + } + + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_qdisc_qdisc_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, size_t *end) +{ + switch (off) { + case offsetof(struct Qdisc, limit): + *end = offsetofend(struct Qdisc, limit); + break; + case offsetof(struct Qdisc, q) + offsetof(struct qdisc_skb_head, qlen): + *end = offsetof(struct Qdisc, q) + offsetofend(struct qdisc_skb_head, qlen); + break; + case offsetof(struct Qdisc, qstats) ... offsetofend(struct Qdisc, qstats) - 1: + *end = offsetofend(struct Qdisc, qstats); + break; + default: + return -EACCES; + } + + return 0; +} + +static int bpf_qdisc_sk_buff_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, size_t *end) +{ + switch (off) { + case offsetof(struct sk_buff, tstamp): + *end = offsetofend(struct sk_buff, tstamp); + break; + case offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data[0]) ... + offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, + data[QDISC_CB_PRIV_LEN - 1]): + *end = offsetof(struct sk_buff, cb) + + offsetofend(struct qdisc_skb_cb, data[QDISC_CB_PRIV_LEN - 1]); + break; + default: + return -EACCES; + } + + return 0; +} + +static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size) +{ + const struct btf_type *t, *skbt, *qdisct; + size_t end; + int err; + + skbt = btf_type_by_id(reg->btf, bpf_sk_buff_ids[0]); + qdisct = btf_type_by_id(reg->btf, bpf_qdisc_ids[0]); + t = btf_type_by_id(reg->btf, reg->btf_id); + + if (t == skbt) { + err = bpf_qdisc_sk_buff_access(log, reg, off, &end); + } else if (t == qdisct) { + err = bpf_qdisc_qdisc_access(log, reg, off, &end); + } else { + bpf_log(log, "only read is supported\n"); + return -EACCES; + } + + if (err) { + bpf_log(log, "no write support to %s at off %d\n", + btf_name_by_offset(reg->btf, t->name_off), off); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, + "write access at off %d with size %d beyond the member of %s ended at %zu\n", + off, size, btf_name_by_offset(reg->btf, t->name_off), end); + return -EACCES; + } + + return 0; +} + +BTF_ID_LIST(bpf_qdisc_init_prologue_ids) +BTF_ID(func, bpf_qdisc_init_prologue) + +static int bpf_qdisc_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, init)) + return 0; + + /* r6 = r1; // r6 will be "u64 *ctx". r1 is "u64 *ctx". + * r2 = r1[16]; // r2 will be "struct netlink_ext_ack *extack" + * r1 = r1[0]; // r1 will be "struct Qdisc *sch" + * r0 = bpf_qdisc_init_prologue(r1, r2); + * if r0 == 0 goto pc+1; + * BPF_EXIT; + * r1 = r6; // r1 will be "u64 *ctx". + */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 16); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_init_prologue_ids[0]); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1); + *insn++ = BPF_EXIT_INSN(); + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + +BTF_ID_LIST(bpf_qdisc_reset_destroy_epilogue_ids) +BTF_ID(func, bpf_qdisc_reset_destroy_epilogue) + +static int bpf_qdisc_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog, + s16 ctx_stack_off) +{ + struct bpf_insn *insn = insn_buf; + + if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, reset) && + prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, destroy)) + return 0; + + /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" + * r1 = r1[0]; // r1 will be "struct Qdisc *sch" + * r0 = bpf_qdisc_reset_destroy_epilogue(r1); + * BPF_EXIT; + */ + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_reset_destroy_epilogue_ids[0]); + *insn++ = BPF_EXIT_INSN(); + + return insn - insn_buf; +} + +__bpf_kfunc_start_defs(); + +/* bpf_skb_get_hash - Get the flow hash of an skb. + * @skb: The skb to get the flow hash from. + */ +__bpf_kfunc u32 bpf_skb_get_hash(struct sk_buff *skb) +{ + return skb_get_hash(skb); +} + +/* bpf_kfree_skb - Release an skb's reference and drop it immediately. + * @skb: The skb whose reference to be released and dropped. + */ +__bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb) +{ + kfree_skb(skb); +} + +/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list. + * @skb: The skb whose reference to be released and dropped. + * @to_free_list: The list of skbs to be dropped. + */ +__bpf_kfunc void bpf_qdisc_skb_drop(struct sk_buff *skb, + struct bpf_sk_buff_ptr *to_free_list) +{ + __qdisc_drop(skb, (struct sk_buff **)to_free_list); +} + +/* bpf_qdisc_watchdog_schedule - Schedule a qdisc to a later time using a timer. + * @sch: The qdisc to be scheduled. + * @expire: The expiry time of the timer. + * @delta_ns: The slack range of the timer. + */ +__bpf_kfunc void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_schedule_range_ns(&q->watchdog, expire, delta_ns); +} + +/* bpf_qdisc_init_prologue - Hidden kfunc called in prologue of .init. */ +__bpf_kfunc int bpf_qdisc_init_prologue(struct Qdisc *sch, + struct netlink_ext_ack *extack) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *p; + + qdisc_watchdog_init(&q->watchdog, sch); + + if (sch->parent != TC_H_ROOT) { + /* If qdisc_lookup() returns NULL, it means .init is called by + * qdisc_create_dflt() in mq/mqprio_init and the parent qdisc + * has not been added to qdisc_hash yet. + */ + p = qdisc_lookup(dev, TC_H_MAJ(sch->parent)); + if (p && !(p->flags & TCQ_F_MQROOT)) { + NL_SET_ERR_MSG(extack, "BPF qdisc only supported on root or mq"); + return -EINVAL; + } + } + + return 0; +} + +/* bpf_qdisc_reset_destroy_epilogue - Hidden kfunc called in epilogue of .reset + * and .destroy + */ +__bpf_kfunc void bpf_qdisc_reset_destroy_epilogue(struct Qdisc *sch) +{ + struct bpf_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_cancel(&q->watchdog); +} + +/* bpf_qdisc_bstats_update - Update Qdisc basic statistics + * @sch: The qdisc from which an skb is dequeued. + * @skb: The skb to be dequeued. + */ +__bpf_kfunc void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb) +{ + bstats_update(&sch->bstats, skb); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(qdisc_kfunc_ids) +BTF_ID_FLAGS(func, bpf_skb_get_hash, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfree_skb, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_qdisc_skb_drop, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_init_prologue, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_qdisc_bstats_update, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(qdisc_kfunc_ids) + +BTF_SET_START(qdisc_common_kfunc_set) +BTF_ID(func, bpf_skb_get_hash) +BTF_ID(func, bpf_kfree_skb) +BTF_ID(func, bpf_dynptr_from_skb) +BTF_SET_END(qdisc_common_kfunc_set) + +BTF_SET_START(qdisc_enqueue_kfunc_set) +BTF_ID(func, bpf_qdisc_skb_drop) +BTF_ID(func, bpf_qdisc_watchdog_schedule) +BTF_SET_END(qdisc_enqueue_kfunc_set) + +BTF_SET_START(qdisc_dequeue_kfunc_set) +BTF_ID(func, bpf_qdisc_watchdog_schedule) +BTF_ID(func, bpf_qdisc_bstats_update) +BTF_SET_END(qdisc_dequeue_kfunc_set) + +enum qdisc_ops_kf_flags { + QDISC_OPS_KF_COMMON = 0, + QDISC_OPS_KF_ENQUEUE = 1 << 0, + QDISC_OPS_KF_DEQUEUE = 1 << 1, +}; + +static const u32 qdisc_ops_context_flags[] = { + [QDISC_OP_IDX(enqueue)] = QDISC_OPS_KF_ENQUEUE, + [QDISC_OP_IDX(dequeue)] = QDISC_OPS_KF_DEQUEUE, + [QDISC_OP_IDX(init)] = QDISC_OPS_KF_COMMON, + [QDISC_OP_IDX(reset)] = QDISC_OPS_KF_COMMON, + [QDISC_OP_IDX(destroy)] = QDISC_OPS_KF_COMMON, +}; + +static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + u32 moff, flags; + + if (!btf_id_set8_contains(&qdisc_kfunc_ids, kfunc_id)) + return 0; + + if (prog->aux->st_ops != &bpf_Qdisc_ops) + return -EACCES; + + moff = prog->aux->attach_st_ops_member_off; + flags = qdisc_ops_context_flags[QDISC_MOFF_IDX(moff)]; + + if ((flags & QDISC_OPS_KF_ENQUEUE) && + btf_id_set_contains(&qdisc_enqueue_kfunc_set, kfunc_id)) + return 0; + + if ((flags & QDISC_OPS_KF_DEQUEUE) && + btf_id_set_contains(&qdisc_dequeue_kfunc_set, kfunc_id)) + return 0; + + if (btf_id_set_contains(&qdisc_common_kfunc_set, kfunc_id)) + return 0; + + return -EACCES; +} + +static const struct btf_kfunc_id_set bpf_qdisc_kfunc_set = { + .owner = THIS_MODULE, + .set = &qdisc_kfunc_ids, + .filter = bpf_qdisc_kfunc_filter, +}; + +static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = { + .get_func_proto = bpf_base_func_proto, + .is_valid_access = bpf_qdisc_is_valid_access, + .btf_struct_access = bpf_qdisc_btf_struct_access, + .gen_prologue = bpf_qdisc_gen_prologue, + .gen_epilogue = bpf_qdisc_gen_epilogue, +}; + +static int bpf_qdisc_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct Qdisc_ops *uqdisc_ops; + struct Qdisc_ops *qdisc_ops; + u32 moff; + + uqdisc_ops = (const struct Qdisc_ops *)udata; + qdisc_ops = (struct Qdisc_ops *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct Qdisc_ops, priv_size): + if (uqdisc_ops->priv_size) + return -EINVAL; + qdisc_ops->priv_size = sizeof(struct bpf_sched_data); + return 1; + case offsetof(struct Qdisc_ops, peek): + qdisc_ops->peek = qdisc_peek_dequeued; + return 0; + case offsetof(struct Qdisc_ops, id): + if (bpf_obj_name_cpy(qdisc_ops->id, uqdisc_ops->id, + sizeof(qdisc_ops->id)) <= 0) + return -EINVAL; + return 1; + } + + return 0; +} + +static int bpf_qdisc_reg(void *kdata, struct bpf_link *link) +{ + return register_qdisc(kdata); +} + +static void bpf_qdisc_unreg(void *kdata, struct bpf_link *link) +{ + return unregister_qdisc(kdata); +} + +static int bpf_qdisc_validate(void *kdata) +{ + struct Qdisc_ops *ops = (struct Qdisc_ops *)kdata; + + if (!ops->enqueue || !ops->dequeue || !ops->init || + !ops->reset || !ops->destroy) + return -EINVAL; + + return 0; +} + +static int Qdisc_ops__enqueue(struct sk_buff *skb__ref, struct Qdisc *sch, + struct sk_buff **to_free) +{ + return 0; +} + +static struct sk_buff *Qdisc_ops__dequeue(struct Qdisc *sch) +{ + return NULL; +} + +static int Qdisc_ops__init(struct Qdisc *sch, struct nlattr *arg, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static void Qdisc_ops__reset(struct Qdisc *sch) +{ +} + +static void Qdisc_ops__destroy(struct Qdisc *sch) +{ +} + +static struct Qdisc_ops __bpf_ops_qdisc_ops = { + .enqueue = Qdisc_ops__enqueue, + .dequeue = Qdisc_ops__dequeue, + .init = Qdisc_ops__init, + .reset = Qdisc_ops__reset, + .destroy = Qdisc_ops__destroy, +}; + +static struct bpf_struct_ops bpf_Qdisc_ops = { + .verifier_ops = &bpf_qdisc_verifier_ops, + .reg = bpf_qdisc_reg, + .unreg = bpf_qdisc_unreg, + .validate = bpf_qdisc_validate, + .init_member = bpf_qdisc_init_member, + .init = bpf_qdisc_init, + .name = "Qdisc_ops", + .cfi_stubs = &__bpf_ops_qdisc_ops, + .owner = THIS_MODULE, +}; + +BTF_ID_LIST(bpf_sk_buff_dtor_ids) +BTF_ID(func, bpf_kfree_skb) + +static int __init bpf_qdisc_kfunc_init(void) +{ + int ret; + const struct btf_id_dtor_kfunc skb_kfunc_dtors[] = { + { + .btf_id = bpf_sk_buff_ids[0], + .kfunc_btf_id = bpf_sk_buff_dtor_ids[0] + }, + }; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_qdisc_kfunc_set); + ret = ret ?: register_btf_id_dtor_kfuncs(skb_kfunc_dtors, + ARRAY_SIZE(skb_kfunc_dtors), + THIS_MODULE); + ret = ret ?: register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops); + + return ret; +} +late_initcall(bpf_qdisc_kfunc_init); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f74a097f54ae..c5e3673aadbe 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -25,6 +25,7 @@ #include <linux/hrtimer.h> #include <linux/slab.h> #include <linux/hashtable.h> +#include <linux/bpf.h> #include <net/netdev_lock.h> #include <net/net_namespace.h> @@ -207,7 +208,7 @@ static struct Qdisc_ops *qdisc_lookup_default(const char *name) for (q = qdisc_base; q; q = q->next) { if (!strcmp(name, q->id)) { - if (!try_module_get(q->owner)) + if (!bpf_try_module_get(q, q->owner)) q = NULL; break; } @@ -237,7 +238,7 @@ int qdisc_set_default(const char *name) if (ops) { /* Set new default */ - module_put(default_qdisc_ops->owner); + bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner); default_qdisc_ops = ops; } write_unlock(&qdisc_mod_lock); @@ -359,7 +360,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) read_lock(&qdisc_mod_lock); for (q = qdisc_base; q; q = q->next) { if (nla_strcmp(kind, q->id) == 0) { - if (!try_module_get(q->owner)) + if (!bpf_try_module_get(q, q->owner)) q = NULL; break; } @@ -1370,7 +1371,7 @@ err_out3: netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: - module_put(ops->owner); + bpf_module_put(ops, ops->owner); err_out: *errp = err; return NULL; @@ -1782,7 +1783,7 @@ static void request_qdisc_module(struct nlattr *kind) ops = qdisc_lookup_ops(kind); if (ops) { - module_put(ops->owner); + bpf_module_put(ops, ops->owner); return; } diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index ce63414185fd..d1d87dce7f3f 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -16,14 +16,18 @@ struct sch_frag_data { unsigned int l2_len; u8 l2_data[VLAN_ETH_HLEN]; int (*xmit)(struct sk_buff *skb); + local_lock_t bh_lock; }; -static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage); +static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static int sch_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct sch_frag_data *data = this_cpu_ptr(&sch_frag_data_storage); + lockdep_assert_held(&data->bh_lock); if (skb_cow_head(skb, data->l2_len) < 0) { kfree_skb(skb); return -ENOMEM; @@ -95,6 +99,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, struct rtable sch_frag_rt = { 0 }; unsigned long orig_dst; + local_lock_nested_bh(&sch_frag_data_storage.bh_lock); sch_frag_prepare_frag(skb, xmit); dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); @@ -105,11 +110,13 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, IPCB(skb)->frag_max_size = mru; ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit); + local_unlock_nested_bh(&sch_frag_data_storage.bh_lock); refdst_drop(orig_dst); } else if (skb_protocol(skb, true) == htons(ETH_P_IPV6)) { unsigned long orig_dst; struct rt6_info sch_frag_rt; + local_lock_nested_bh(&sch_frag_data_storage.bh_lock); sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, @@ -122,6 +129,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb, sch_frag_xmit); + local_unlock_nested_bh(&sch_frag_data_storage.bh_lock); refdst_drop(orig_dst); } else { net_warn_ratelimited("Fail frag %s: eth=%x, MRU=%d, MTU=%d\n", diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 514b1b6ac681..08e0e3aff976 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -24,6 +24,7 @@ #include <linux/if_vlan.h> #include <linux/skb_array.h> #include <linux/if_macvlan.h> +#include <linux/bpf.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -1001,14 +1002,14 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, { struct Qdisc *sch; - if (!try_module_get(ops->owner)) { + if (!bpf_try_module_get(ops, ops->owner)) { NL_SET_ERR_MSG(extack, "Failed to increase module reference counter"); return NULL; } sch = qdisc_alloc(dev_queue, ops, extack); if (IS_ERR(sch)) { - module_put(ops->owner); + bpf_module_put(ops, ops->owner); return NULL; } sch->parent = parentid; @@ -1078,7 +1079,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); lockdep_unregister_key(&qdisc->root_lock_key); - module_put(ops->owner); + bpf_module_put(ops, ops->owner); netdev_put(dev, &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 760152e751c7..5793d71852b8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -736,24 +736,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return peer; } -/* Delete a transport address from an association. */ -void sctp_assoc_del_peer(struct sctp_association *asoc, - const union sctp_addr *addr) -{ - struct list_head *pos; - struct list_head *temp; - struct sctp_transport *transport; - - list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { - transport = list_entry(pos, struct sctp_transport, transports); - if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) { - /* Do book keeping for removing the peer and free it. */ - sctp_assoc_rm_peer(asoc, transport); - break; - } - } -} - /* Lookup a transport by address. */ struct sctp_transport *sctp_assoc_lookup_paddr( const struct sctp_association *asoc, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f80208edd6a5..3ead591c72fd 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -115,14 +115,6 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk) skb->destructor = sctp_control_release_owner; } -/* What was the inbound interface for this chunk? */ -int sctp_chunk_iif(const struct sctp_chunk *chunk) -{ - struct sk_buff *skb = chunk->skb; - - return SCTP_INPUT_CB(skb)->af->skb_iif(skb); -} - /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 2: The ECN capable field is reserved for future use of diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 95696f42647e..d946bfb424c7 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -485,19 +485,6 @@ int strp_init(struct strparser *strp, struct sock *sk, } EXPORT_SYMBOL_GPL(strp_init); -/* Sock process lock held (lock_sock) */ -void __strp_unpause(struct strparser *strp) -{ - strp->paused = 0; - - if (strp->need_bytes) { - if (strp_peek_len(strp) < strp->need_bytes) - return; - } - strp_read_sock(strp); -} -EXPORT_SYMBOL_GPL(__strp_unpause); - void strp_unpause(struct strparser *strp) { strp->paused = 0; diff --git a/net/tipc/link.c b/net/tipc/link.c index 18be6ff4c3db..3ee44d731700 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2228,7 +2228,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; - strncpy(if_name, data, TIPC_MAX_IF_NAME); + strscpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { diff --git a/net/tipc/node.c b/net/tipc/node.c index ccf5e427f43e..cb43f2016a70 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1581,7 +1581,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, tipc_link_name(link), len); + strscpy(linkname, tipc_link_name(link), len); err = 0; } tipc_node_read_unlock(node); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f78a2492826f..2ab20821d6bb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -950,13 +950,6 @@ static void unix_close(struct sock *sk, long timeout) */ } -static void unix_unhash(struct sock *sk) -{ - /* Nothing to do here, unix socket does not need a ->unhash(). - * This is merely for sockmap. - */ -} - static bool unix_bpf_bypass_getsockopt(int level, int optname) { if (level == SOL_SOCKET) { @@ -987,7 +980,6 @@ struct proto unix_stream_proto = { .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, - .unhash = unix_unhash, .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_stream_bpf_update_proto, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 7f7de6d88096..6e7b727c781c 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -87,7 +87,7 @@ static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, uarg = msg_zerocopy_realloc(sk_vsock(vsk), iter->count, - NULL); + NULL, false); if (!uarg) return -1; @@ -107,8 +107,7 @@ static int virtio_transport_fill_skb(struct sk_buff *skb, { if (zcopy) return __zerocopy_sg_from_iter(info->msg, NULL, skb, - &info->msg->msg_iter, - len); + &info->msg->msg_iter, len, NULL); return memcpy_from_msg(skb_put(skb, len), info->msg, len); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f039a7d0d6f7..fd5f79266471 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -469,6 +469,8 @@ nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = { [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 }, [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 }, [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG }, + [NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID] = + NLA_POLICY_MAX(NLA_U8, IEEE80211_MLD_MAX_NUM_LINKS), }; static const struct nla_policy @@ -833,6 +835,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, + [NL80211_ATTR_EML_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), @@ -5523,11 +5526,13 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, static int nl80211_parse_mbssid_config(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct nlattr *attrs, struct cfg80211_mbssid_config *config, u8 num_elems) { struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1]; + int tx_link_id = -1; if (!wiphy->mbssid_max_interfaces) return -EOPNOTSUPP; @@ -5551,6 +5556,9 @@ static int nl80211_parse_mbssid_config(struct wiphy *wiphy, (!config->index && !num_elems)) return -EINVAL; + if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID]) + tx_link_id = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID]); + if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) { u32 tx_ifindex = nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]); @@ -5572,10 +5580,25 @@ static int nl80211_parse_mbssid_config(struct wiphy *wiphy, } config->tx_wdev = tx_netdev->ieee80211_ptr; + /* Caller should call dev_put(config->tx_wdev) from this point */ + + if (config->tx_wdev->valid_links) { + if (tx_link_id == -1 || + !(config->tx_wdev->valid_links & BIT(tx_link_id))) + return -ENOLINK; + + config->tx_link_id = tx_link_id; + } } else { + if (tx_link_id >= 0 && tx_link_id != link_id) + return -EINVAL; + config->tx_wdev = dev->ieee80211_ptr; } } else if (!config->index) { + if (tx_link_id >= 0 && tx_link_id != link_id) + return -EINVAL; + config->tx_wdev = dev->ieee80211_ptr; } else { return -EINVAL; @@ -6325,7 +6348,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { - err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, + err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, link_id, info->attrs[NL80211_ATTR_MBSSID_CONFIG], ¶ms->mbssid_config, params->beacon.mbssid_ies ? @@ -7118,6 +7141,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; } + /* Accept EMLSR capabilities only for AP client before association */ + if (statype != CFG80211_STA_AP_CLIENT_UNASSOC && + params->eml_cap_present) + return -EINVAL; + switch (statype) { case CFG80211_STA_AP_MLME_CLIENT: /* Use this only for authorizing/unauthorizing a station */ @@ -7473,6 +7501,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); + if (info->attrs[NL80211_ATTR_EML_CAPABILITY]) { + params.eml_cap_present = true; + params.eml_cap = + nla_get_u16(info->attrs[NL80211_ATTR_EML_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); @@ -7631,6 +7665,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_EML_CAPABILITY]) { + params.eml_cap_present = true; + params.eml_cap = + nla_get_u16(info->attrs[NL80211_ATTR_EML_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index c5181a9044ad..aa9788f20d0d 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -267,13 +267,17 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, void xp_clear_dev(struct xsk_buff_pool *pool) { + struct net_device *netdev = pool->netdev; + if (!pool->netdev) return; + netdev_lock_ops(netdev); xp_disable_drv_zc(pool); xsk_clear_pool_at_qid(pool->netdev, pool->queue_id); - dev_put(pool->netdev); pool->netdev = NULL; + netdev_unlock_ops(netdev); + dev_put(netdev); } static void xp_release_deferred(struct work_struct *work) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 622445f041d3..cb1e12740c87 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -952,32 +952,28 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; -static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list, - struct list_head *dev_to_kill) +static void __net_exit xfrmi_exit_rtnl(struct net *net, + struct list_head *dev_to_kill) { - struct net *net; + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + struct xfrm_if __rcu **xip; + struct xfrm_if *xi; + int i; - ASSERT_RTNL(); - list_for_each_entry(net, net_exit_list, exit_list) { - struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); - struct xfrm_if __rcu **xip; - struct xfrm_if *xi; - int i; - - for (i = 0; i < XFRMI_HASH_SIZE; i++) { - for (xip = &xfrmn->xfrmi[i]; - (xi = rtnl_dereference(*xip)) != NULL; - xip = &xi->next) - unregister_netdevice_queue(xi->dev, dev_to_kill); - } - xi = rtnl_dereference(xfrmn->collect_md_xfrmi); - if (xi) + for (i = 0; i < XFRMI_HASH_SIZE; i++) { + for (xip = &xfrmn->xfrmi[i]; + (xi = rtnl_net_dereference(net, *xip)) != NULL; + xip = &xi->next) unregister_netdevice_queue(xi->dev, dev_to_kill); } + + xi = rtnl_net_dereference(net, xfrmn->collect_md_xfrmi); + if (xi) + unregister_netdevice_queue(xi->dev, dev_to_kill); } static struct pernet_operations xfrmi_net_ops = { - .exit_batch_rtnl = xfrmi_exit_batch_rtnl, + .exit_rtnl = xfrmi_exit_rtnl, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index 82f0a301683f..ebf95d48e86c 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -9,9 +9,13 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> -static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4); +static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv4) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; #if IS_ENABLED(CONFIG_IPV6) -static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6); +static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv6) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; #endif struct nat_keepalive { @@ -56,10 +60,12 @@ static int nat_keepalive_send_ipv4(struct sk_buff *skb, skb_dst_set(skb, &rt->dst); - sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4); + local_lock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock); + sk = this_cpu_read(nat_keepalive_sk_ipv4.sock); sock_net_set(sk, net); err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos); sock_net_set(sk, &init_net); + local_unlock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock); return err; } @@ -89,15 +95,19 @@ static int nat_keepalive_send_ipv6(struct sk_buff *skb, fl6.fl6_sport = ka->encap_sport; fl6.fl6_dport = ka->encap_dport; - sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6); + local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); + sk = this_cpu_read(nat_keepalive_sk_ipv6.sock); sock_net_set(sk, net); dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); - if (IS_ERR(dst)) + if (IS_ERR(dst)) { + local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return PTR_ERR(dst); + } skb_dst_set(skb, dst); err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); sock_net_set(sk, &init_net); + local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return err; } #endif @@ -202,7 +212,7 @@ static void nat_keepalive_work(struct work_struct *work) (ctx.next_run - ctx.now) * HZ); } -static int nat_keepalive_sk_init(struct sock * __percpu *socks, +static int nat_keepalive_sk_init(struct sock_bh_locked __percpu *socks, unsigned short family) { struct sock *sk; @@ -214,22 +224,22 @@ static int nat_keepalive_sk_init(struct sock * __percpu *socks, if (err < 0) goto err; - *per_cpu_ptr(socks, i) = sk; + per_cpu_ptr(socks, i)->sock = sk; } return 0; err: for_each_possible_cpu(i) - inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); + inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock); return err; } -static void nat_keepalive_sk_fini(struct sock * __percpu *socks) +static void nat_keepalive_sk_fini(struct sock_bh_locked __percpu *socks) { int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); + inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock); } void xfrm_nat_keepalive_state_updated(struct xfrm_state *x) diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index b7997541f7ee..f93d9145ab8a 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -31,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto) switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_DCCP: case IPPROTO_ESP: case IPPROTO_SCTP: case IPPROTO_UDPLITE: diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3d22bf863eec..298c8f8d7719 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4804,7 +4804,7 @@ sub process { } # do not use BUG() or variants - if ($line =~ /\b(?!AA_|BUILD_|DCCP_|IDA_|KVM_|RWLOCK_|snd_|SPIN_)(?:[a-zA-Z_]*_)?BUG(?:_ON)?(?:_[A-Z_]+)?\s*\(/) { + if ($line =~ /\b(?!AA_|BUILD_|IDA_|KVM_|RWLOCK_|snd_|SPIN_)(?:[a-zA-Z_]*_)?BUG(?:_ON)?(?:_[A-Z_]+)?\s*\(/) { my $msg_level = \&WARN; $msg_level = \&CHK if ($file); &{$msg_level}("AVOID_BUG", diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 1b942b4908a2..7d623b00495c 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -24,7 +24,6 @@ #include <net/ipv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include <linux/dccp.h> #include <linux/sctp.h> #include <linux/lsm_audit.h> #include <linux/security.h> @@ -68,13 +67,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ad->u.net->dport = uh->dest; break; } - case IPPROTO_DCCP: { - struct dccp_hdr *dh = dccp_hdr(skb); - - ad->u.net->sport = dh->dccph_sport; - ad->u.net->dport = dh->dccph_dport; - break; - } case IPPROTO_SCTP: { struct sctphdr *sh = sctp_hdr(skb); @@ -140,17 +132,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb, ad->u.net->dport = uh->dest; break; } - case IPPROTO_DCCP: { - struct dccp_hdr _dccph, *dh; - - dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); - if (dh == NULL) - break; - - ad->u.net->sport = dh->dccph_sport; - ad->u.net->dport = dh->dccph_dport; - break; - } case IPPROTO_SCTP: { struct sctphdr _sctph, *sh; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e7a7dcab81db..b2695785610a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -65,7 +65,6 @@ #include <net/netlink.h> #include <linux/tcp.h> #include <linux/udp.h> -#include <linux/dccp.h> #include <linux/sctp.h> #include <net/sctp/structs.h> #include <linux/quota.h> @@ -1191,8 +1190,6 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_ICMP_SOCKET; else return SECCLASS_RAWIP_SOCKET; - case SOCK_DCCP: - return SECCLASS_DCCP_SOCKET; default: return SECCLASS_RAWIP_SOCKET; } @@ -4392,22 +4389,6 @@ static int selinux_parse_skb_ipv4(struct sk_buff *skb, break; } - case IPPROTO_DCCP: { - struct dccp_hdr _dccph, *dh; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - offset += ihlen; - dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); - if (dh == NULL) - break; - - ad->u.net->sport = dh->dccph_sport; - ad->u.net->dport = dh->dccph_dport; - break; - } - #if IS_ENABLED(CONFIG_IP_SCTP) case IPPROTO_SCTP: { struct sctphdr _sctph, *sh; @@ -4486,18 +4467,6 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb, break; } - case IPPROTO_DCCP: { - struct dccp_hdr _dccph, *dh; - - dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); - if (dh == NULL) - break; - - ad->u.net->sport = dh->dccph_sport; - ad->u.net->dport = dh->dccph_dport; - break; - } - #if IS_ENABLED(CONFIG_IP_SCTP) case IPPROTO_SCTP: { struct sctphdr _sctph, *sh; @@ -4849,10 +4818,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in node_perm = UDP_SOCKET__NODE_BIND; break; - case SECCLASS_DCCP_SOCKET: - node_perm = DCCP_SOCKET__NODE_BIND; - break; - case SECCLASS_SCTP_SOCKET: node_perm = SCTP_SOCKET__NODE_BIND; break; @@ -4908,11 +4873,10 @@ static int selinux_socket_connect_helper(struct socket *sock, return 0; /* - * If a TCP, DCCP or SCTP socket, check name_connect permission + * If a TCP or SCTP socket, check name_connect permission * for the port. */ if (sksec->sclass == SECCLASS_TCP_SOCKET || - sksec->sclass == SECCLASS_DCCP_SOCKET || sksec->sclass == SECCLASS_SCTP_SOCKET) { struct common_audit_data ad; struct lsm_network_audit net = {0,}; @@ -4957,9 +4921,6 @@ static int selinux_socket_connect_helper(struct socket *sock, case SECCLASS_TCP_SOCKET: perm = TCP_SOCKET__NAME_CONNECT; break; - case SECCLASS_DCCP_SOCKET: - perm = DCCP_SOCKET__NAME_CONNECT; - break; case SECCLASS_SCTP_SOCKET: perm = SCTP_SOCKET__NAME_CONNECT; break; diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 04a9b480885e..5665aa5e7853 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -127,8 +127,6 @@ const struct security_class_mapping secclass_map[] = { { "key", { "view", "read", "write", "search", "link", "setattr", "create", NULL } }, - { "dccp_socket", - { COMMON_SOCK_PERMS, "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, { "capability2", { COMMON_CAP2_PERMS, NULL } }, diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 3a95986b134f..2c0b07f9fbbd 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -98,7 +98,6 @@ static const struct nlmsg_perm nlmsg_route_perms[] = { static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, - { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DESTROY, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE }, }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 99833168604e..fc340a6f0dde 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -24,7 +24,6 @@ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> -#include <linux/dccp.h> #include <linux/icmpv6.h> #include <linux/slab.h> #include <linux/mutex.h> @@ -4061,7 +4060,6 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip) __be16 frag_off; struct tcphdr _tcph, *th; struct udphdr _udph, *uh; - struct dccp_hdr _dccph, *dh; sip->sin6_port = 0; @@ -4090,11 +4088,6 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip) if (uh != NULL) sip->sin6_port = uh->source; break; - case IPPROTO_DCCP: - dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); - if (dh != NULL) - sip->sin6_port = dh->dccph_sport; - break; } return proto; } @@ -4216,7 +4209,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) case PF_INET6: proto = smk_skb_to_addr_ipv6(skb, &sadd); if (proto != IPPROTO_UDP && proto != IPPROTO_UDPLITE && - proto != IPPROTO_TCP && proto != IPPROTO_DCCP) + proto != IPPROTO_TCP) break; #ifdef SMACK_IPV6_SECMARK_LABELING skp = smack_from_skb(skb); diff --git a/tools/Makefile b/tools/Makefile index 5e1254eb66de..c31cbbd12c45 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -41,6 +41,7 @@ help: @echo ' mm - misc mm tools' @echo ' wmi - WMI interface examples' @echo ' x86_energy_perf_policy - Intel energy policy tool' + @echo ' ynl - ynl headers, library, and python tool' @echo '' @echo 'You can do:' @echo ' $$ make -C tools/ <tool>_install' @@ -118,11 +119,14 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) +ynl: FORCE + $(call descend,net/ynl) + all: acpi counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio mm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - debugging tracing thermal thermometer thermal-engine + debugging tracing thermal thermometer thermal-engine ynl acpi_install: $(call descend,power/$(@:_install=),install) @@ -157,13 +161,16 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) +ynl_install: + $(call descend,net/$(@:_install=),install) + install: acpi_install counter_install cpupower_install gpio_install \ hv_install firewire_install iio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install mm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ wmi_install debugging_install intel-speed-select_install \ - tracing_install thermometer_install thermal-engine_install + tracing_install thermometer_install thermal-engine_install ynl_install acpi_clean: $(call descend,power/acpi,clean) @@ -214,12 +221,15 @@ freefall_clean: build_clean: $(call descend,build,clean) +ynl_clean: + $(call descend,net/$(@:_clean=),clean) + clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \ intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \ - sched_ext_clean + sched_ext_clean ynl_clean .PHONY: FORCE diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7600bf62dbdf..7eb9571786b8 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -219,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e0605403f977..fdcee6a71e0f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1283,6 +1283,7 @@ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, BPF_TC_EGRESS = 1 << 1, BPF_TC_CUSTOM = 1 << 2, + BPF_TC_QDISC = 1 << 3, }; #define BPF_TC_PARENT(a, b) \ @@ -1297,9 +1298,11 @@ struct bpf_tc_hook { int ifindex; enum bpf_tc_attach_point attach_point; __u32 parent; + __u32 handle; + const char *qdisc; size_t :0; }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc struct bpf_tc_opts { size_t sz; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 68a2def17175..c997e69d507f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); -static int clsact_config(struct libbpf_nla_req *req) +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) { req->tc.tcm_parent = TC_H_CLSACT; req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req) return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ + const char *qdisc = OPTS_GET(hook, qdisc, NULL); + + req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); + req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + + return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} + static int attach_point_to_config(struct bpf_tc_hook *hook, qdisc_config_t *config) { @@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook, return 0; case BPF_TC_CUSTOM: return -EOPNOTSUPP; + case BPF_TC_QDISC: + *config = &qdisc_config; + return 0; default: return -EINVAL; } @@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) req.tc.tcm_family = AF_UNSPEC; req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); - ret = config(&req); + ret = config(&req, hook); if (ret < 0) return ret; @@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) case BPF_TC_INGRESS: case BPF_TC_EGRESS: return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_QDISC: case BPF_TC_INGRESS | BPF_TC_EGRESS: return libbpf_err(tc_qdisc_delete(hook)); case BPF_TC_CUSTOM: diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index f3269ce39e5b..a5e6093903fb 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -20,13 +20,18 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) +CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h) CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) +CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ + $(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h) +CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 21f9e299dc75..6603ad8d4ce1 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -25,7 +25,7 @@ SPECS_DIR:=../../../../Documentation/netlink/specs GENS_PATHS=$(shell grep -nrI --files-without-match \ 'protocol: netlink' \ $(SPECS_DIR)) -GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) +GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) rt-addr rt-route SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 3c09a7bbfba5..5debb09491e7 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -94,6 +94,9 @@ struct ynl_ntf_base_type { unsigned char data[] __attribute__((aligned(8))); }; +struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags); +struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id); + struct nlmsghdr * ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); struct nlmsghdr * diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index c4da34048ef8..a0b54ad4c073 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -191,12 +191,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ", str ? " (" : ""); - start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len); + start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len); end = ynl_nlmsg_end_addr(ys->nlh); off = ys->err.attr_offs; off -= sizeof(struct nlmsghdr); - off -= ys->family->hdr_len; + off -= ys->req_hdr_len; n += ynl_err_walk(ys, start, end, off, ys->req_policy, &bad_attr[n], sizeof(bad_attr) - n, NULL); @@ -216,14 +216,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ", bad_attr[0] ? ", " : (str ? " (" : "")); - start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len); + start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len); end = ynl_nlmsg_end_addr(ys->nlh); nest_pol = ys->req_policy; if (tb[NLMSGERR_ATTR_MISS_NEST]) { off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]); off -= sizeof(struct nlmsghdr); - off -= ys->family->hdr_len; + off -= ys->req_hdr_len; n += ynl_err_walk(ys, start, end, off, ys->req_policy, &miss_attr[n], sizeof(miss_attr) - n, @@ -451,14 +451,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags, return nlh; } -void ynl_msg_start_req(struct ynl_sock *ys, __u32 id) +struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags) { - ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK); + return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | flags); } -void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id) +struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id) { - ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); + return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); } struct nlmsghdr * @@ -663,6 +663,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) struct sockaddr_nl addr; struct ynl_sock *ys; socklen_t addrlen; + int sock_type; int one = 1; ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE); @@ -675,7 +676,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE]; ys->ntf_last_next = &ys->ntf_first; - ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC; + + ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type); if (ys->socket < 0) { __perr(yse, "failed to create a netlink socket"); goto err_free_sock; @@ -708,8 +711,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) ys->portid = addr.nl_pid; ys->seq = random(); - - if (ynl_sock_read_family(ys, yf->name)) { + if (yf->is_classic) { + ys->family_id = yf->classic_id; + } else if (ynl_sock_read_family(ys, yf->name)) { if (yse) memcpy(yse, &ys->err, sizeof(*yse)); goto err_close_sock; @@ -791,13 +795,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) struct ynl_parse_arg yarg = { .ys = ys, }; const struct ynl_ntf_info *info; struct ynl_ntf_base_type *rsp; - struct genlmsghdr *gehdr; + __u32 cmd; int ret; - gehdr = ynl_nlmsg_data(nlh); - if (gehdr->cmd >= ys->family->ntf_info_size) + if (ys->family->is_classic) { + cmd = nlh->nlmsg_type; + } else { + struct genlmsghdr *gehdr; + + gehdr = ynl_nlmsg_data(nlh); + cmd = gehdr->cmd; + } + + if (cmd >= ys->family->ntf_info_size) return YNL_PARSE_CB_ERROR; - info = &ys->family->ntf_info[gehdr->cmd]; + info = &ys->family->ntf_info[cmd]; if (!info->cb) return YNL_PARSE_CB_ERROR; @@ -811,7 +823,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) goto err_free; rsp->family = nlh->nlmsg_type; - rsp->cmd = gehdr->cmd; + rsp->cmd = cmd; *ys->ntf_last_next = rsp; ys->ntf_last_next = &rsp->next; @@ -863,17 +875,22 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg) static int ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd) { - struct genlmsghdr *gehdr; + if (ys->family->is_classic) { + if (nlh->nlmsg_type != rsp_cmd) + return ynl_ntf_parse(ys, nlh); + } else { + struct genlmsghdr *gehdr; - if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) { - yerr(ys, YNL_ERROR_INV_RESP, - "Kernel responded with truncated message"); - return -1; - } + if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) { + yerr(ys, YNL_ERROR_INV_RESP, + "Kernel responded with truncated message"); + return -1; + } - gehdr = ynl_nlmsg_data(nlh); - if (gehdr->cmd != rsp_cmd) - return ynl_ntf_parse(ys, nlh); + gehdr = ynl_nlmsg_data(nlh); + if (gehdr->cmd != rsp_cmd) + return ynl_ntf_parse(ys, nlh); + } return 0; } diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index 6cd570b283ea..32efeb224829 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -2,6 +2,7 @@ #ifndef __YNL_C_H #define __YNL_C_H 1 +#include <stdbool.h> #include <stddef.h> #include <linux/genetlink.h> #include <linux/types.h> @@ -48,6 +49,8 @@ struct ynl_family { /* private: */ const char *name; size_t hdr_len; + bool is_classic; + __u16 classic_id; const struct ynl_ntf_info *ntf_info; unsigned int ntf_info_size; }; @@ -77,11 +80,25 @@ struct ynl_sock { struct nlmsghdr *nlh; const struct ynl_policy_nest *req_policy; + size_t req_hdr_len; unsigned char *tx_buf; unsigned char *rx_buf; unsigned char raw_buf[]; }; +/** + * struct ynl_string - parsed individual string + * @len: length of the string (excluding terminating character) + * @str: value of the string + * + * Parsed and nul-terminated string. This struct is only used for arrays of + * strings. Non-array string members are placed directly in respective types. + */ +struct ynl_string { + unsigned int len; + char str[]; +}; + struct ynl_sock * ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e); void ynl_sock_destroy(struct ynl_sock *ys); diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index 794e3c7dcc65..33ccc5c1843b 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -144,16 +144,17 @@ def main(): ops = [ (item[0], json.loads(item[1]), args.flags or []) for item in args.multi ] reply = ynl.do_multi(ops) output(reply) - except NlError as e: - print(e) - exit(1) - if args.ntf: - try: + if args.ntf: for msg in ynl.poll_ntf(duration=args.duration): output(msg) - except KeyboardInterrupt: - pass + except NlError as e: + print(e) + exit(1) + except KeyboardInterrupt: + pass + except BrokenPipeError: + pass if __name__ == "__main__": diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a7f08edbc235..68dfb2cd13af 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -142,19 +142,19 @@ class Type(SpecAttr): return self.is_recursive() and not ri.op def presence_type(self): - return 'bit' + return 'present' def presence_member(self, space, type_filter): if self.presence_type() != type_filter: return - if self.presence_type() == 'bit': + if self.presence_type() == 'present': pfx = '__' if space == 'user' else '' return f"{pfx}u32 {self.c_name}:1;" - if self.presence_type() == 'len': + if self.presence_type() in {'len', 'count'}: pfx = '__' if space == 'user' else '' - return f"{pfx}u32 {self.c_name}_len;" + return f"{pfx}u32 {self.c_name};" def _complex_member_type(self, ri): return None @@ -163,7 +163,7 @@ class Type(SpecAttr): return False def _free_lines(self, ri, var, ref): - if self.is_multi_val() or self.presence_type() == 'len': + if self.is_multi_val() or self.presence_type() in {'count', 'len'}: return [f'free({var}->{ref}{self.c_name});'] return [] @@ -175,21 +175,21 @@ class Type(SpecAttr): def arg_member(self, ri): member = self._complex_member_type(ri) if member: - arg = [member + ' *' + self.c_name] + spc = ' ' if member[-1] != '*' else '' + arg = [member + spc + '*' + self.c_name] if self.presence_type() == 'count': arg += ['unsigned int n_' + self.c_name] return arg raise Exception(f"Struct member not implemented for class type {self.type}") def struct_member(self, ri): - if self.is_multi_val(): - ri.cw.p(f"unsigned int n_{self.c_name};") member = self._complex_member_type(ri) if member: ptr = '*' if self.is_multi_val() else '' if self.is_recursive_for_op(ri): ptr = '*' - ri.cw.p(f"{member} {ptr}{self.c_name};") + spc = ' ' if member[-1] != '*' else '' + ri.cw.p(f"{member}{spc}{ptr}{self.c_name};") return members = self.arg_member(ri) for one in members: @@ -215,10 +215,9 @@ class Type(SpecAttr): cw.p(f'[{self.enum_name}] = {"{"} .name = "{self.name}", {typol}{"}"},') def _attr_put_line(self, ri, var, line): - if self.presence_type() == 'bit': - ri.cw.p(f"if ({var}->_present.{self.c_name})") - elif self.presence_type() == 'len': - ri.cw.p(f"if ({var}->_present.{self.c_name}_len)") + presence = self.presence_type() + if presence in {'present', 'len'}: + ri.cw.p(f"if ({var}->_{presence}.{self.c_name})") ri.cw.p(f"{line};") def _attr_put_simple(self, ri, var, put_type): @@ -248,7 +247,7 @@ class Type(SpecAttr): if not self.is_multi_val(): ri.cw.p("if (ynl_attr_validate(yarg, attr))") ri.cw.p("return YNL_PARSE_CB_ERROR;") - if self.presence_type() == 'bit': + if self.presence_type() == 'present': ri.cw.p(f"{var}->_present.{self.c_name} = 1;") if init_lines: @@ -279,7 +278,8 @@ class Type(SpecAttr): presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" # Every layer below last is a nest, so we know it uses bit presence # last layer is "self" and may be a complex type - if i == len(ref) - 1 and self.presence_type() != 'bit': + if i == len(ref) - 1 and self.presence_type() != 'present': + presence = f"{var}->{'.'.join(ref[:i] + [''])}_{self.presence_type()}.{ref[i]}" continue code.append(presence + ' = 1;') ref_path = '.'.join(ref[:-1]) @@ -355,26 +355,10 @@ class TypeScalar(Type): if 'byte-order' in attr: self.byte_order_comment = f" /* {attr['byte-order']} */" - if 'enum' in self.attr: - enum = self.family.consts[self.attr['enum']] - low, high = enum.value_range() - if 'min' not in self.checks: - if low != 0 or self.type[0] == 's': - self.checks['min'] = low - if 'max' not in self.checks: - self.checks['max'] = high - - if 'min' in self.checks and 'max' in self.checks: - if self.get_limit('min') > self.get_limit('max'): - raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}') - self.checks['range'] = True - - low = min(self.get_limit('min', 0), self.get_limit('max', 0)) - high = max(self.get_limit('min', 0), self.get_limit('max', 0)) - if low < 0 and self.type[0] == 'u': - raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type') - if low < -32768 or high > 32767: - self.checks['full-range'] = True + # Classic families have some funny enums, don't bother + # computing checks, since we only need them for kernel policies + if not family.is_classic(): + self._init_checks() # Added by resolve(): self.is_bitfield = None @@ -399,6 +383,31 @@ class TypeScalar(Type): else: self.type_name = '__' + self.type + def _init_checks(self): + if 'enum' in self.attr: + enum = self.family.consts[self.attr['enum']] + low, high = enum.value_range() + if low == None and high == None: + self.checks['sparse'] = True + else: + if 'min' not in self.checks: + if low != 0 or self.type[0] == 's': + self.checks['min'] = low + if 'max' not in self.checks: + self.checks['max'] = high + + if 'min' in self.checks and 'max' in self.checks: + if self.get_limit('min') > self.get_limit('max'): + raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}') + self.checks['range'] = True + + low = min(self.get_limit('min', 0), self.get_limit('max', 0)) + high = max(self.get_limit('min', 0), self.get_limit('max', 0)) + if low < 0 and self.type[0] == 'u': + raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type') + if low < -32768 or high > 32767: + self.checks['full-range'] = True + def _attr_policy(self, policy): if 'flags-mask' in self.checks or self.is_bitfield: if self.is_bitfield: @@ -417,6 +426,8 @@ class TypeScalar(Type): return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})" elif 'max' in self.checks: return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})" + elif 'sparse' in self.checks: + return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)" return super()._attr_policy(policy) def _attr_typol(self): @@ -488,7 +499,7 @@ class TypeString(Type): self._attr_put_simple(ri, var, 'str') def _attr_get(self, ri, var): - len_mem = var + '->_present.' + self.c_name + '_len' + len_mem = var + '->_len.' + self.c_name return [f"{len_mem} = len;", f"{var}->{self.c_name} = malloc(len + 1);", f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);", @@ -497,10 +508,10 @@ class TypeString(Type): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"{presence}_len = strlen({self.c_name});", - f"{member} = malloc({presence}_len + 1);", - f'memcpy({member}, {self.c_name}, {presence}_len);', - f'{member}[{presence}_len] = 0;'] + return [f"{presence} = strlen({self.c_name});", + f"{member} = malloc({presence} + 1);", + f'memcpy({member}, {self.c_name}, {presence});', + f'{member}[{presence}] = 0;'] class TypeBinary(Type): @@ -539,20 +550,71 @@ class TypeBinary(Type): def attr_put(self, ri, var): self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " + - f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)") + f"{var}->{self.c_name}, {var}->_len.{self.c_name})") + + def _attr_get(self, ri, var): + len_mem = var + '->_len.' + self.c_name + return [f"{len_mem} = len;", + f"{var}->{self.c_name} = malloc(len);", + f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \ + ['len = ynl_attr_data_len(attr);'], \ + ['unsigned int len;'] + + def _setter_lines(self, ri, member, presence): + return [f"{presence} = len;", + f"{member} = malloc({presence});", + f'memcpy({member}, {self.c_name}, {presence});'] + + +class TypeBinaryStruct(TypeBinary): + def struct_member(self, ri): + ri.cw.p(f'struct {c_lower(self.get("struct"))} *{self.c_name};') def _attr_get(self, ri, var): - len_mem = var + '->_present.' + self.c_name + '_len' + struct_sz = 'sizeof(struct ' + c_lower(self.get("struct")) + ')' + len_mem = var + '->_' + self.presence_type() + '.' + self.c_name return [f"{len_mem} = len;", + f"if (len < {struct_sz})", + f"{var}->{self.c_name} = calloc(1, {struct_sz});", + "else", + f"{var}->{self.c_name} = malloc(len);", + f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \ + ['len = ynl_attr_data_len(attr);'], \ + ['unsigned int len;'] + + +class TypeBinaryScalarArray(TypeBinary): + def arg_member(self, ri): + return [f'__{self.get("sub-type")} *{self.c_name}', 'size_t count'] + + def presence_type(self): + return 'count' + + def struct_member(self, ri): + ri.cw.p(f'__{self.get("sub-type")} *{self.c_name};') + + def attr_put(self, ri, var): + presence = self.presence_type() + ri.cw.block_start(line=f"if ({var}->_{presence}.{self.c_name})") + ri.cw.p(f"i = {var}->_{presence}.{self.c_name} * sizeof(__{self.get('sub-type')});") + ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, " + + f"{var}->{self.c_name}, i);") + ri.cw.block_end() + + def _attr_get(self, ri, var): + len_mem = var + '->_count.' + self.c_name + return [f"{len_mem} = len / sizeof(__{self.get('sub-type')});", + f"len = {len_mem} * sizeof(__{self.get('sub-type')});", f"{var}->{self.c_name} = malloc(len);", f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \ ['len = ynl_attr_data_len(attr);'], \ ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"{presence}_len = len;", - f"{member} = malloc({presence}_len);", - f'memcpy({member}, {self.c_name}, {presence}_len);'] + return [f"{presence} = count;", + f"count *= sizeof(__{self.get('sub-type')});", + f"{member} = malloc(count);", + f'memcpy({member}, {self.c_name}, count);'] class TypeBitfield32(Type): @@ -638,22 +700,40 @@ class TypeMultiAttr(Type): def _complex_member_type(self, ri): if 'type' not in self.attr or self.attr['type'] == 'nest': return self.nested_struct_type + elif self.attr['type'] == 'binary' and 'struct' in self.attr: + return None # use arg_member() + elif self.attr['type'] == 'string': + return 'struct ynl_string *' elif self.attr['type'] in scalars: scalar_pfx = '__' if ri.ku_space == 'user' else '' return scalar_pfx + self.attr['type'] else: raise Exception(f"Sub-type {self.attr['type']} not supported yet") + def arg_member(self, ri): + if self.type == 'binary' and 'struct' in self.attr: + return [f'struct {c_lower(self.attr["struct"])} *{self.c_name}', + f'unsigned int n_{self.c_name}'] + return super().arg_member(ri) + def free_needs_iter(self): - return 'type' not in self.attr or self.attr['type'] == 'nest' + return self.attr['type'] in {'nest', 'string'} def _free_lines(self, ri, var, ref): lines = [] if self.attr['type'] in scalars: lines += [f"free({var}->{ref}{self.c_name});"] + elif self.attr['type'] == 'binary': + lines += [f"free({var}->{ref}{self.c_name});"] + elif self.attr['type'] == 'string': + lines += [ + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", + f"free({var}->{ref}{self.c_name}[i]);", + f"free({var}->{ref}{self.c_name});", + ] elif 'type' not in self.attr or self.attr['type'] == 'nest': lines += [ - f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', f"free({var}->{ref}{self.c_name});", ] @@ -673,18 +753,22 @@ class TypeMultiAttr(Type): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") + elif self.attr['type'] == 'binary' and 'struct' in self.attr: + ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)") + ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}[i], sizeof(struct {c_lower(self.attr['struct'])}));") + elif self.attr['type'] == 'string': + ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)") + ri.cw.p(f"ynl_attr_put_str(nlh, {self.enum_name}, {var}->{self.c_name}[i]->str);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet") def _setter_lines(self, ri, member, presence): - # For multi-attr we have a count, not presence, hack up the presence - presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -702,12 +786,22 @@ class TypeArrayNest(Type): elif self.attr['sub-type'] in scalars: scalar_pfx = '__' if ri.ku_space == 'user' else '' return scalar_pfx + self.attr['sub-type'] + elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: + return None # use arg_member() else: raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") + def arg_member(self, ri): + if self.sub_type == 'binary' and 'exact-len' in self.checks: + return [f'unsigned char (*{self.c_name})[{self.checks["exact-len"]}]', + f'unsigned int n_{self.c_name}'] + return super().arg_member(ri) + def _attr_typol(self): if self.attr['sub-type'] in scalars: return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, ' + elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: + return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, ' else: return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -717,10 +811,28 @@ class TypeArrayNest(Type): 'ynl_attr_for_each_nested(attr2, attr) {', '\tif (ynl_attr_validate(yarg, attr2))', '\t\treturn YNL_PARSE_CB_ERROR;', - f'\t{var}->n_{self.c_name}++;', + f'\t{var}->_count.{self.c_name}++;', '}'] return get_lines, None, local_vars + def attr_put(self, ri, var): + ri.cw.p(f'array = ynl_attr_nest_start(nlh, {self.enum_name});') + if self.sub_type in scalars: + put_type = self.sub_type + ri.cw.block_start(line=f'for (i = 0; i < {var}->_count.{self.c_name}; i++)') + ri.cw.p(f"ynl_attr_put_{put_type}(nlh, i, {var}->{self.c_name}[i]);") + ri.cw.block_end() + elif self.sub_type == 'binary' and 'exact-len' in self.checks: + ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)') + ri.cw.p(f"ynl_attr_put(nlh, i, {var}->{self.c_name}[i], {self.checks['exact-len']});") + else: + raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet") + ri.cw.p('ynl_attr_nest_end(nlh, array);') + + def _setter_lines(self, ri, member, presence): + return [f"{member} = {self.c_name};", + f"{presence} = n_{self.c_name};"] + class TypeNestTypeValue(Type): def _complex_member_type(self, ri): @@ -809,6 +921,12 @@ class Struct: raise Exception("Inheriting different members not supported") self.inherited = [c_lower(x) for x in sorted(self._inherited)] + def free_needs_iter(self): + for _, attr in self.attr_list: + if attr.free_needs_iter(): + return True + return False + class EnumEntry(SpecEnumEntry): def __init__(self, enum_set, yaml, prev, value_start): @@ -862,7 +980,7 @@ class EnumSet(SpecEnumSet): high = max([x.value for x in self.entries.values()]) if high - low + 1 != len(self.entries): - raise Exception("Can't get value range for a noncontiguous enum") + return None, None return low, high @@ -909,13 +1027,18 @@ class AttrSet(SpecAttrSet): elif elem['type'] == 'string': t = TypeString(self.family, self, elem, value) elif elem['type'] == 'binary': - t = TypeBinary(self.family, self, elem, value) + if 'struct' in elem: + t = TypeBinaryStruct(self.family, self, elem, value) + elif elem.get('sub-type') in scalars: + t = TypeBinaryScalarArray(self.family, self, elem, value) + else: + t = TypeBinary(self.family, self, elem, value) elif elem['type'] == 'bitfield32': t = TypeBitfield32(self.family, self, elem, value) elif elem['type'] == 'nest': t = TypeNest(self.family, self, elem, value) elif elem['type'] == 'indexed-array' and 'sub-type' in elem: - if elem["sub-type"] in ['nest', 'u32']: + if elem["sub-type"] in ['binary', 'nest', 'u32']: t = TypeArrayNest(self.family, self, elem, value) else: raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}') @@ -932,6 +1055,14 @@ class AttrSet(SpecAttrSet): class Operation(SpecOperation): def __init__(self, family, yaml, req_value, rsp_value): + # Fill in missing operation properties (for fixed hdr-only msgs) + for mode in ['do', 'dump', 'event']: + for direction in ['request', 'reply']: + try: + yaml[mode][direction].setdefault('attributes', []) + except KeyError: + pass + super().__init__(family, yaml, req_value, rsp_value) self.render_name = c_lower(family.ident_name + '_' + self.name) @@ -993,9 +1124,6 @@ class Family(SpecFamily): def resolve(self): self.resolve_up(super()) - if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}: - raise Exception("Codegen only supported for genetlink") - self.c_name = c_lower(self.ident_name) if 'name-prefix' in self.yaml['operations']: self.op_prefix = c_upper(self.yaml['operations']['name-prefix']) @@ -1018,7 +1146,7 @@ class Family(SpecFamily): # dict space-name -> 'request': set(attrs), 'reply': set(attrs) self.root_sets = dict() - # dict space-name -> set('request', 'reply') + # dict space-name -> Struct self.pure_nested_structs = dict() self._mark_notify() @@ -1042,6 +1170,9 @@ class Family(SpecFamily): def new_operation(self, elem, req_value, rsp_value): return Operation(self, elem, req_value, rsp_value) + def is_classic(self): + return self.proto == 'netlink-raw' + def _mark_notify(self): for op in self.msgs.values(): if 'notify' in op: @@ -1234,11 +1365,19 @@ class RenderInfo: self.op = op self.fixed_hdr = None + self.fixed_hdr_len = 'ys->family->hdr_len' if op and op.fixed_header: self.fixed_hdr = 'struct ' + c_lower(op.fixed_header) + if op.fixed_header != family.fixed_header: + if family.is_classic(): + self.fixed_hdr_len = f"sizeof({self.fixed_hdr})" + else: + raise Exception(f"Per-op fixed header not supported, yet") + # 'do' and 'dump' response parsing is identical self.type_consistent = True + self.type_oneside = False if op_mode != 'do' and 'dump' in op: if 'do' in op: if ('reply' in op['do']) != ('reply' in op["dump"]): @@ -1246,7 +1385,8 @@ class RenderInfo: elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]: self.type_consistent = False else: - self.type_consistent = False + self.type_consistent = True + self.type_oneside = True self.attr_set = attr_set if not self.attr_set: @@ -1264,7 +1404,7 @@ class RenderInfo: self.struct = dict() if op_mode == 'notify': - op_mode = 'do' + op_mode = 'do' if 'do' in op else 'dump' for op_dir in ['request', 'reply']: if op: type_list = [] @@ -1274,6 +1414,12 @@ class RenderInfo: if op_mode == 'event': self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes']) + def type_empty(self, key): + return len(self.struct[key].attr_list) == 0 and self.fixed_hdr is None + + def needs_nlflags(self, direction): + return self.op_mode == 'do' and direction == 'request' and self.family.is_classic() + class CodeWriter: def __init__(self, nlib, out_file=None, overwrite=True): @@ -1330,6 +1476,7 @@ class CodeWriter: if self._silent_block: ind += 1 self._silent_block = line.endswith(')') and CodeWriter._is_cond(line) + self._silent_block |= line.strip() == 'else' if line[0] == '#': ind = 0 if add_ind: @@ -1540,7 +1687,9 @@ def op_prefix(ri, direction, deref=False): suffix += f"{direction_to_suffix[direction]}" else: if direction == 'request': - suffix += '_req_dump' + suffix += '_req' + if not ri.type_oneside: + suffix += '_dump' else: if ri.type_consistent: if deref: @@ -1677,10 +1826,15 @@ def put_req_nested(ri, struct): local_vars.append('struct nlattr *nest;') init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + has_anest = False + has_count = False for _, arg in struct.member_list(): - if arg.presence_type() == 'count': - local_vars.append('unsigned int i;') - break + has_anest |= arg.type == 'indexed-array' + has_count |= arg.presence_type() == 'count' + if has_anest: + local_vars.append('struct nlattr *array;') + if has_count: + local_vars.append('unsigned int i;') put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() @@ -1707,13 +1861,18 @@ def _multi_parse(ri, struct, init_lines, local_vars): if ri.fixed_hdr: local_vars += ['void *hdr;'] iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)" + if ri.op.fixed_header != ri.family.fixed_header: + if ri.family.is_classic(): + iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({ri.fixed_hdr}))" + else: + raise Exception(f"Per-op fixed header not supported, yet") array_nests = set() multi_attrs = set() needs_parg = False for arg, aspec in struct.member_list(): if aspec['type'] == 'indexed-array' and 'sub-type' in aspec: - if aspec["sub-type"] == 'nest': + if aspec["sub-type"] in {'binary', 'nest'}: local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') array_nests.add(arg) elif aspec['sub-type'] in scalars: @@ -1746,7 +1905,10 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f'dst->{arg} = {arg};') if ri.fixed_hdr: - ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));') + if ri.family.is_classic(): + ri.cw.p('hdr = ynl_nlmsg_data(nlh);') + else: + ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));') ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));") for anest in sorted(all_multi): aspec = struct[anest] @@ -1772,7 +1934,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.block_start(line=f"if (n_{aspec.c_name})") ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") - ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") + ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') if 'nested-attributes' in aspec: ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") @@ -1783,6 +1945,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p('return YNL_PARSE_CB_ERROR;') elif aspec.sub_type in scalars: ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);") + elif aspec.sub_type == 'binary' and 'exact-len' in aspec.checks: + # Length is validated by typol + ri.cw.p(f'memcpy(dst->{aspec.c_name}[i], ynl_attr_data(attr), {aspec.checks["exact-len"]});') else: raise Exception(f"Nest parsing type not supported in {aspec['name']}") ri.cw.p('i++;') @@ -1794,7 +1959,7 @@ def _multi_parse(ri, struct, init_lines, local_vars): aspec = struct[anest] ri.cw.block_start(line=f"if (n_{aspec.c_name})") ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") - ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") + ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') if 'nested-attributes' in aspec: ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") @@ -1806,8 +1971,22 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p('return YNL_PARSE_CB_ERROR;') elif aspec.type in scalars: ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);") + elif aspec.type == 'binary' and 'struct' in aspec: + ri.cw.p('size_t len = ynl_attr_data_len(attr);') + ri.cw.nl() + ri.cw.p(f'if (len > sizeof(dst->{aspec.c_name}[0]))') + ri.cw.p(f'len = sizeof(dst->{aspec.c_name}[0]);') + ri.cw.p(f"memcpy(&dst->{aspec.c_name}[i], ynl_attr_data(attr), len);") + elif aspec.type == 'string': + ri.cw.p('unsigned int len;') + ri.cw.nl() + ri.cw.p('len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));') + ri.cw.p(f'dst->{aspec.c_name}[i] = malloc(sizeof(struct ynl_string) + len + 1);') + ri.cw.p(f"dst->{aspec.c_name}[i]->len = len;") + ri.cw.p(f"memcpy(dst->{aspec.c_name}[i]->str, ynl_attr_get_str(attr), len);") + ri.cw.p(f"dst->{aspec.c_name}[i]->str[len] = 0;") else: - raise Exception('Nest parsing type not supported yet') + raise Exception(f'Nest parsing of type {aspec.type} not supported yet') ri.cw.p('i++;') ri.cw.block_end() ri.cw.block_end() @@ -1898,9 +2077,13 @@ def print_req(ri): ri.cw.block_start() ri.cw.write_func_lvar(local_vars) - ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") + if ri.family.is_classic(): + ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name}, req->_nlmsg_flags);") + else: + ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") + ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};") if 'reply' in ri.op[ri.op_mode]: ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") ri.cw.nl() @@ -1967,7 +2150,10 @@ def print_dump(ri): else: ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};') ri.cw.nl() - ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") + if ri.family.is_classic(): + ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});") + else: + ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") if ri.fixed_hdr: ri.cw.p("hdr_len = sizeof(req->_hdr);") @@ -1977,6 +2163,7 @@ def print_dump(ri): if "request" in ri.op[ri.op_mode]: ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") + ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};") ri.cw.nl() for _, attr in ri.struct["request"].member_list(): attr.attr_put(ri, "req") @@ -2022,32 +2209,45 @@ def print_free_prototype(ri, direction, suffix=';'): ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix) +def print_nlflags_set(ri, direction): + name = op_prefix(ri, direction) + ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags", + [f"struct {name} *req", "__u16 nl_flags"]) + ri.cw.block_start() + ri.cw.p('req->_nlmsg_flags = nl_flags;') + ri.cw.block_end() + ri.cw.nl() + + def _print_type(ri, direction, struct): suffix = f'_{ri.type_name}{direction_to_suffix[direction]}' if not direction and ri.type_name_conflict: suffix += '_' - if ri.op_mode == 'dump': + if ri.op_mode == 'dump' and not ri.type_oneside: suffix += '_dump' ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}") + if ri.needs_nlflags(direction): + ri.cw.p('__u16 _nlmsg_flags;') + ri.cw.nl() if ri.fixed_hdr: ri.cw.p(ri.fixed_hdr + ' _hdr;') ri.cw.nl() - meta_started = False - for _, attr in struct.member_list(): - for type_filter in ['len', 'bit']: + for type_filter in ['present', 'len', 'count']: + meta_started = False + for _, attr in struct.member_list(): line = attr.presence_member(ri.ku_space, type_filter) if line: if not meta_started: ri.cw.block_start(line=f"struct") meta_started = True ri.cw.p(line) - if meta_started: - ri.cw.block_end(line='_present;') - ri.cw.nl() + if meta_started: + ri.cw.block_end(line=f'_{type_filter};') + ri.cw.nl() for arg in struct.inherited: ri.cw.p(f"__u32 {arg};") @@ -2071,6 +2271,9 @@ def print_type_helpers(ri, direction, deref=False): print_free_prototype(ri, direction) ri.cw.nl() + if ri.needs_nlflags(direction): + print_nlflags_set(ri, direction) + if ri.ku_space == 'user' and direction == 'request': for _, attr in ri.struct[direction].member_list(): attr.setter(ri, ri.attr_set, direction, deref=deref) @@ -2078,7 +2281,7 @@ def print_type_helpers(ri, direction, deref=False): def print_req_type_helpers(ri): - if len(ri.struct["request"].attr_list) == 0: + if ri.type_empty("request"): return print_alloc_wrapper(ri, "request") print_type_helpers(ri, "request") @@ -2101,7 +2304,7 @@ def print_parse_prototype(ri, direction, terminate=True): def print_req_type(ri): - if len(ri.struct["request"].attr_list) == 0: + if ri.type_empty("request"): return print_type(ri, "request") @@ -2139,11 +2342,9 @@ def print_wrapped_type(ri): def _free_type_members_iter(ri, struct): - for _, attr in struct.member_list(): - if attr.free_needs_iter(): - ri.cw.p('unsigned int i;') - ri.cw.nl() - break + if struct.free_needs_iter(): + ri.cw.p('unsigned int i;') + ri.cw.nl() def _free_type_members(ri, var, struct, ref=''): @@ -2280,6 +2481,46 @@ def print_kernel_policy_ranges(family, cw): cw.nl() +def print_kernel_policy_sparse_enum_validates(family, cw): + first = True + for _, attr_set in family.attr_sets.items(): + if attr_set.subset_of: + continue + + for _, attr in attr_set.items(): + if not attr.request: + continue + if not attr.enum_name: + continue + if 'sparse' not in attr.checks: + continue + + if first: + cw.p('/* Sparse enums validation callbacks */') + first = False + + sign = '' if attr.type[0] == 'u' else '_signed' + suffix = 'ULL' if attr.type[0] == 'u' else 'LL' + cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate', + ['const struct nlattr *attr', 'struct netlink_ext_ack *extack']) + cw.block_start() + cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))') + enum = family.consts[attr['enum']] + first_entry = True + for entry in enum.entries.values(): + if first_entry: + first_entry = False + else: + cw.p('fallthrough;') + cw.p(f'case {entry.c_name}:') + cw.p('return 0;') + cw.block_end() + cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");') + cw.p('return -EINVAL;') + cw.block_end() + cw.nl() + + def print_kernel_op_table_fwd(family, cw, terminate): exported = not kernel_can_gen_family_struct(family) @@ -2739,7 +2980,11 @@ def render_uapi(family, cw): def _render_user_ntf_entry(ri, op): - ri.cw.block_start(line=f"[{op.enum_name}] = ") + if not ri.family.is_classic(): + ri.cw.block_start(line=f"[{op.enum_name}] = ") + else: + crud_op = ri.family.req_by_value[op.rsp_value] + ri.cw.block_start(line=f"[{crud_op.enum_name}] = ") ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),") ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,") ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,") @@ -2754,7 +2999,7 @@ def render_user_family(family, cw, prototype): return if family.ntfs: - cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ") + cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ") for ntf_op_name, ntf_op in family.ntfs.items(): if 'notify' in ntf_op: op = family.ops[ntf_op['notify']] @@ -2774,13 +3019,19 @@ def render_user_family(family, cw, prototype): cw.block_start(f'{symbol} = ') cw.p(f'.name\t\t= "{family.c_name}",') - if family.fixed_header: + if family.is_classic(): + cw.p(f'.is_classic\t= true,') + cw.p(f'.classic_id\t= {family.get("protonum")},') + if family.is_classic(): + if family.fixed_header: + cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),') + elif family.fixed_header: cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),') else: cw.p('.hdr_len\t= sizeof(struct genlmsghdr),') if family.ntfs: - cw.p(f".ntf_info\t= {family['name']}_ntf_info,") - cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),") + cw.p(f".ntf_info\t= {family.c_name}_ntf_info,") + cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),") cw.block_end(line=';') @@ -2887,7 +3138,7 @@ def main(): cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') headers = [] - for definition in parsed['definitions']: + for definition in parsed['definitions'] + parsed['attribute-sets']: if 'header' in definition: headers.append(definition['header']) if args.mode == 'user': @@ -2941,6 +3192,7 @@ def main(): print_kernel_family_struct_hdr(parsed, cw) else: print_kernel_policy_ranges(parsed, cw) + print_kernel_policy_sparse_enum_validates(parsed, cw) for _, struct in sorted(parsed.pure_nested_structs.items()): if struct.request: @@ -3009,7 +3261,7 @@ def main(): ri = RenderInfo(cw, parsed, args.mode, op, 'dump') print_req_type(ri) print_req_type_helpers(ri) - if not ri.type_consistent: + if not ri.type_consistent or ri.type_oneside: print_rsp_type(ri) print_wrapped_type(ri) print_dump_prototype(ri) @@ -3087,7 +3339,7 @@ def main(): if 'dump' in op: cw.p(f"/* {op.enum_name} - dump */") ri = RenderInfo(cw, parsed, args.mode, op, "dump") - if not ri.type_consistent: + if not ri.type_consistent or ri.type_oneside: parse_rsp_msg(ri, deref=True) print_req_free(ri) print_dump_type_free(ri) diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py index 6c56d0d726b4..0cb6348e28d3 100755 --- a/tools/net/ynl/pyynl/ynl_gen_rst.py +++ b/tools/net/ynl/pyynl/ynl_gen_rst.py @@ -392,7 +392,7 @@ def parse_arguments() -> argparse.Namespace: def parse_yaml_file(filename: str) -> str: - """Transform the YAML specified by filename into a rst-formmated string""" + """Transform the YAML specified by filename into an RST-formatted string""" with open(filename, "r", encoding="utf-8") as spec_file: yaml_data = yaml.safe_load(spec_file) content = parse_yaml(yaml_data) diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index dda6686257a7..7f9781cf532f 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -2,4 +2,6 @@ ethtool devlink netdev ovs -page-pool
\ No newline at end of file +page-pool +rt-addr +rt-route diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c index d2611d7ebab4..ac9dfb01f280 100644 --- a/tools/net/ynl/samples/devlink.c +++ b/tools/net/ynl/samples/devlink.c @@ -22,6 +22,7 @@ int main(int argc, char **argv) ynl_dump_foreach(devs, d) { struct devlink_info_get_req *info_req; struct devlink_info_get_rsp *info_rsp; + unsigned i; printf("%s/%s:\n", d->bus_name, d->dev_name); @@ -34,11 +35,11 @@ int main(int argc, char **argv) if (!info_rsp) goto err_free_devs; - if (info_rsp->_present.info_driver_name_len) + if (info_rsp->_len.info_driver_name) printf(" driver: %s\n", info_rsp->info_driver_name); - if (info_rsp->n_info_version_running) + if (info_rsp->_count.info_version_running) printf(" running fw:\n"); - for (unsigned i = 0; i < info_rsp->n_info_version_running; i++) + for (i = 0; i < info_rsp->_count.info_version_running; i++) printf(" %s: %s\n", info_rsp->info_version_running[i].info_version_name, info_rsp->info_version_running[i].info_version_value); diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c new file mode 100644 index 000000000000..2edde5c36b18 --- /dev/null +++ b/tools/net/ynl/samples/rt-addr.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> + +#include <ynl.h> + +#include <arpa/inet.h> +#include <net/if.h> + +#include "rt-addr-user.h" + +static void rt_addr_print(struct rt_addr_getaddr_rsp *a) +{ + char ifname[IF_NAMESIZE]; + char addr_str[64]; + const char *addr; + const char *name; + + name = if_indextoname(a->_hdr.ifa_index, ifname); + if (name) + printf("%16s: ", name); + + switch (a->_len.address) { + case 4: + addr = inet_ntop(AF_INET, a->address, + addr_str, sizeof(addr_str)); + break; + case 16: + addr = inet_ntop(AF_INET6, a->address, + addr_str, sizeof(addr_str)); + break; + default: + addr = NULL; + break; + } + if (addr) + printf("%s", addr); + else + printf("[%d]", a->_len.address); + + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct rt_addr_getaddr_list *rsp; + struct rt_addr_getaddr_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_addr_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + req = rt_addr_getaddr_req_alloc(); + if (!req) + goto err_destroy; + + rsp = rt_addr_getaddr_dump(ys, req); + rt_addr_getaddr_req_free(req); + if (!rsp) + goto err_close; + + if (ynl_dump_empty(rsp)) + fprintf(stderr, "Error: no addresses reported\n"); + ynl_dump_foreach(rsp, addr) + rt_addr_print(addr); + rt_addr_getaddr_list_free(rsp); + + ynl_sock_destroy(ys); + return 0; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_destroy: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c new file mode 100644 index 000000000000..7427104a96df --- /dev/null +++ b/tools/net/ynl/samples/rt-route.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> + +#include <ynl.h> + +#include <arpa/inet.h> +#include <net/if.h> + +#include "rt-route-user.h" + +static void rt_route_print(struct rt_route_getroute_rsp *r) +{ + char ifname[IF_NAMESIZE]; + char route_str[64]; + const char *route; + const char *name; + + /* Ignore local */ + if (r->_hdr.rtm_table == RT_TABLE_LOCAL) + return; + + if (r->_present.oif) { + name = if_indextoname(r->oif, ifname); + if (name) + printf("oif: %-16s ", name); + } + + if (r->_len.dst) { + route = inet_ntop(r->_hdr.rtm_family, r->dst, + route_str, sizeof(route_str)); + printf("dst: %s/%d", route, r->_hdr.rtm_dst_len); + } + + if (r->_len.gateway) { + route = inet_ntop(r->_hdr.rtm_family, r->gateway, + route_str, sizeof(route_str)); + printf("gateway: %s ", route); + } + + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct rt_route_getroute_req_dump *req; + struct rt_route_getroute_list *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_route_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + req = rt_route_getroute_req_dump_alloc(); + if (!req) + goto err_destroy; + + rsp = rt_route_getroute_dump(ys, req); + rt_route_getroute_req_dump_free(req); + if (!rsp) + goto err_close; + + if (ynl_dump_empty(rsp)) + fprintf(stderr, "Error: no routeesses reported\n"); + ynl_dump_foreach(rsp, route) + rt_route_print(route); + rt_route_getroute_list_free(rsp); + + ynl_sock_destroy(ys); + return 0; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_destroy: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 80fb84fa3cfc..a0a6ba47d600 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -71,6 +71,7 @@ TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/netfilter TARGETS += net/openvswitch +TARGETS += net/ovpn TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c378d5d07e02..3201a962b3dc 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -71,8 +71,10 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=y +CONFIG_NET_SCH_BPF=y CONFIG_NET_SCH_FQ=y CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCH_HTB=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c new file mode 100644 index 000000000000..730357cd0c9a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pkt_sched.h> +#include <linux/rtnetlink.h> +#include <test_progs.h> + +#include "network_helpers.h" +#include "bpf_qdisc_fifo.skel.h" +#include "bpf_qdisc_fq.skel.h" +#include "bpf_qdisc_fail__incompl_ops.skel.h" + +#define LO_IFINDEX 1 + +static const unsigned int total_bytes = 10 * 1024 * 1024; + +static void do_test(char *qdisc) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_ROOT, + .handle = 0x8000000, + .qdisc = qdisc); + int srv_fd = -1, cli_fd = -1; + int err; + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(err, "attach qdisc")) + return; + + srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(srv_fd, "start server")) + goto done; + + cli_fd = connect_to_fd(srv_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect to client")) + goto done; + + err = send_recv_data(srv_fd, cli_fd, total_bytes); + ASSERT_OK(err, "send_recv_data"); + +done: + if (srv_fd != -1) + close(srv_fd); + if (cli_fd != -1) + close(cli_fd); + + bpf_tc_hook_destroy(&hook); +} + +static void test_fifo(void) +{ + struct bpf_qdisc_fifo *fifo_skel; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + do_test("bpf_fifo"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_fq(void) +{ + struct bpf_qdisc_fq *fq_skel; + + fq_skel = bpf_qdisc_fq__open_and_load(); + if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach")) + goto out; + + do_test("bpf_fq"); +out: + bpf_qdisc_fq__destroy(fq_skel); +} + +static void test_qdisc_attach_to_mq(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + hook.ifindex = if_nametoindex("veth0"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + err = bpf_tc_hook_create(&hook); + ASSERT_OK(err, "attach qdisc"); + + bpf_tc_hook_destroy(&hook); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_qdisc_attach_to_non_root(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "tc qdisc add dev lo root handle 1: htb"); + SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit"); + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_ERR(err, "attach qdisc")) + bpf_tc_hook_destroy(&hook); + +out_del_htb: + SYS(out, "tc qdisc delete dev lo root htb"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_incompl_ops(void) +{ + struct bpf_qdisc_fail__incompl_ops *skel; + struct bpf_link *link; + + skel = bpf_qdisc_fail__incompl_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.test); + if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops")) + bpf_link__destroy(link); + + bpf_qdisc_fail__incompl_ops__destroy(skel); +} + +static int get_default_qdisc(char *qdisc_name) +{ + FILE *f; + int num; + + f = fopen("/proc/sys/net/core/default_qdisc", "r"); + if (!f) + return -errno; + + num = fscanf(f, "%s", qdisc_name); + fclose(f); + + return num == 1 ? 0 : -EFAULT; +} + +static void test_default_qdisc_attach_to_mq(void) +{ + char default_qdisc[IFNAMSIZ] = {}; + struct bpf_qdisc_fifo *fifo_skel; + struct netns_obj *netns = NULL; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + err = get_default_qdisc(default_qdisc); + if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc")) + goto out; + + err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo"); + if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc")) + goto out; + + netns = netns_new("bpf_qdisc_ns", true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called"); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + netns_free(netns); + if (default_qdisc[0]) + write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc); + + bpf_qdisc_fifo__destroy(fifo_skel); +} + +void test_ns_bpf_qdisc(void) +{ + if (test__start_subtest("fifo")) + test_fifo(); + if (test__start_subtest("fq")) + test_fq(); + if (test__start_subtest("attach to mq")) + test_qdisc_attach_to_mq(); + if (test__start_subtest("attach to non root")) + test_qdisc_attach_to_non_root(); + if (test__start_subtest("incompl_ops")) + test_incompl_ops(); +} + +void serial_test_bpf_qdisc_default(void) +{ + test_default_qdisc_attach_to_mq(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index d56e18b25528..a4517bee34d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -7,14 +7,433 @@ #define TEST_NS "sock_iter_batch_netns" +static const int init_batch_size = 16; static const int nr_soreuse = 4; +struct iter_out { + int idx; + __u64 cookie; +} __packed; + +struct sock_count { + __u64 cookie; + int count; +}; + +static int insert(__u64 cookie, struct sock_count counts[], int counts_len) +{ + int insert = -1; + int i = 0; + + for (; i < counts_len; i++) { + if (!counts[i].cookie) { + insert = i; + } else if (counts[i].cookie == cookie) { + insert = i; + break; + } + } + if (insert < 0) + return insert; + + counts[insert].cookie = cookie; + counts[insert].count++; + + return counts[insert].count; +} + +static int read_n(int iter_fd, int n, struct sock_count counts[], + int counts_len) +{ + struct iter_out out; + int nread = 1; + int i = 0; + + for (; nread > 0 && (n < 0 || i < n); i++) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread")) + break; + ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert"); + } + + ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n"); + + return i; +} + +static __u64 socket_cookie(int fd) +{ + __u64 cookie; + socklen_t cookie_len = sizeof(cookie); + + if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, + &cookie_len), "getsockopt(SO_COOKIE)")) + return 0; + return cookie; +} + +static bool was_seen(int fd, struct sock_count counts[], int counts_len) +{ + __u64 cookie = socket_cookie(fd); + int i = 0; + + for (; cookie && i < counts_len; i++) + if (cookie == counts[i].cookie) + return true; + + return false; +} + +static int get_seen_socket(int *fds, struct sock_count counts[], int n) +{ + int i = 0; + + for (; i < n; i++) + if (was_seen(fds[i], counts, n)) + return i; + return -1; +} + +static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n) +{ + int i, nread, iter_fd; + int nth_sock_idx = -1; + struct iter_out out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + return -1; + + for (; n >= 0; n--) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_GE(nread, 1, "nread")) + goto done; + } + + for (i = 0; i < fds_len && nth_sock_idx < 0; i++) + if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie) + nth_sock_idx = i; +done: + close(iter_fd); + return nth_sock_idx; +} + +static int get_seen_count(int fd, struct sock_count counts[], int n) +{ + __u64 cookie = socket_cookie(fd); + int count = 0; + int i = 0; + + for (; cookie && !count && i < n; i++) + if (cookie == counts[i].cookie) + count = counts[i].count; + + return count; +} + +static void check_n_were_seen_once(int *fds, int fds_len, int n, + struct sock_count counts[], int counts_len) +{ + int seen_once = 0; + int seen_cnt; + int i = 0; + + for (; i < fds_len; i++) { + /* Skip any sockets that were closed or that weren't seen + * exactly once. + */ + if (fds[i] < 0) + continue; + seen_cnt = get_seen_count(fds[i], counts, counts_len); + if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt")) + seen_once++; + } + + ASSERT_EQ(seen_once, n, "seen_once"); +} + +static void remove_seen(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_seen_socket(socks, counts, counts_len); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_unseen(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_all(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx, i; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + for (i = 0; i < socks_len - 1; i++) { + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); +} + +static void add_some(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Double the number of sockets in the bucket. */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +static void force_realloc(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socket just to initialize the batch. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Double the number of sockets in the bucket to force a realloc on the + * next read. + */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket from the first set was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +struct test_case { + void (*test)(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd); + const char *description; + int init_socks; + int max_socks; + int sock_type; + int family; +}; + +static struct test_case resume_tests[] = { + { + .description = "udp: resume after removing a seen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "udp: resume after removing one unseen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "udp: resume after removing all unseen sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "udp: resume after adding a few sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "udp: force a realloc to occur", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_DGRAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, +}; + +static void do_resume_test(struct test_case *tc) +{ + struct sock_iter_batch *skel = NULL; + static const __u16 port = 10001; + struct bpf_link *link = NULL; + struct sock_count *counts; + int err, iter_fd = -1; + const char *addr; + int *fds = NULL; + int local_port; + + counts = calloc(tc->max_socks, sizeof(*counts)); + if (!ASSERT_OK_PTR(counts, "counts")) + goto done; + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + /* Prepare a bucket of sockets in the kernel hashtable */ + addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1"; + fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0, + tc->init_socks); + if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) + goto done; + local_port = get_socket_local_port(*fds); + if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) + goto done; + skel->rodata->ports[0] = ntohs(local_port); + skel->rodata->sf = tc->family; + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, + counts, tc->max_socks, link, iter_fd); +done: + free(counts); + free_fds(fds, tc->init_socks); + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +static void do_resume_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(resume_tests); i++) { + if (test__start_subtest(resume_tests[i].description)) { + do_resume_test(&resume_tests[i]); + } + } +} + static void do_test(int sock_type, bool onebyone) { int err, i, nread, to_read, total_read, iter_fd = -1; - int first_idx, second_idx, indices[nr_soreuse]; + struct iter_out outputs[nr_soreuse]; struct bpf_link *link = NULL; struct sock_iter_batch *skel; + int first_idx, second_idx; int *fds[2] = {}; skel = sock_iter_batch__open(); @@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone) goto done; skel->rodata->ports[i] = ntohs(local_port); } + skel->rodata->sf = AF_INET6; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone) * from a bucket and leave one socket out from * that bucket on purpose. */ - to_read = (nr_soreuse - 1) * sizeof(*indices); + to_read = (nr_soreuse - 1) * sizeof(*outputs); total_read = 0; first_idx = -1; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; if (first_idx == -1) - first_idx = indices[0]; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], first_idx, "first_idx"); + first_idx = outputs[0].idx; + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, first_idx, "first_idx"); } while (total_read < to_read); - ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread"); ASSERT_EQ(total_read, to_read, "total_read"); free_fds(fds[first_idx], nr_soreuse); fds[first_idx] = NULL; /* Read the "whole" second bucket */ - to_read = nr_soreuse * sizeof(*indices); + to_read = nr_soreuse * sizeof(*outputs); total_read = 0; second_idx = !first_idx; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], second_idx, "second_idx"); + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, second_idx, "second_idx"); } while (total_read <= to_read); ASSERT_EQ(nread, 0, "nread"); /* Both so_reuseport ports should be in different buckets, so @@ -128,6 +548,7 @@ void test_sock_iter_batch(void) do_test(SOCK_DGRAM, true); do_test(SOCK_DGRAM, false); } + do_resume_tests(); close_netns(nstoken); done: diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h new file mode 100644 index 000000000000..3754f581b328 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_QDISC_COMMON_H +#define _BPF_QDISC_COMMON_H + +#define NET_XMIT_SUCCESS 0x00 +#define NET_XMIT_DROP 0x01 /* skb dropped */ +#define NET_XMIT_CN 0x02 /* congestion notification */ + +#define TC_PRIO_CONTROL 7 +#define TC_PRIO_MAX 15 + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + +struct bpf_sk_buff_ptr; + +static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) +{ + return (struct qdisc_skb_cb *)skb->cb; +} + +static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) +{ + return qdisc_skb_cb(skb)->pkt_len; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c new file mode 100644 index 000000000000..f188062ed730 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops") +int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; +} + +SEC("struct_ops") +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)bpf_qdisc_test_enqueue, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; + diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c new file mode 100644 index 000000000000..1de2be3e370b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +struct skb_node { + struct sk_buff __kptr * skb; + struct bpf_list_node node; +}; + +private(A) struct bpf_spin_lock q_fifo_lock; +private(A) struct bpf_list_head q_fifo __contains(skb_node, node); + +bool init_called; + +SEC("struct_ops/bpf_fifo_enqueue") +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct skb_node *skbn; + u32 pkt_len; + + if (sch->q.qlen == sch->limit) + goto drop; + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) + goto drop; + + pkt_len = qdisc_pkt_len(skb); + + sch->q.qlen++; + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&q_fifo_lock); + bpf_list_push_back(&q_fifo, &skbn->node); + bpf_spin_unlock(&q_fifo_lock); + + sch->qstats.backlog += pkt_len; + return NET_XMIT_SUCCESS; +drop: + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; +} + +SEC("struct_ops/bpf_fifo_dequeue") +struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + if (!node) + return NULL; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + if (!skb) + return NULL; + + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + sch->q.qlen--; + + return skb; +} + +SEC("struct_ops/bpf_fifo_init") +int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + sch->limit = 1000; + init_called = true; + return 0; +} + +SEC("struct_ops/bpf_fifo_reset") +void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct skb_node *skbn; + int i; + + bpf_for(i, 0, sch->q.qlen) { + struct sk_buff *skb = NULL; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + + if (!node) + break; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_kfree_skb(skb); + bpf_obj_drop(skbn); + } + sch->q.qlen = 0; +} + +SEC("struct_ops") +void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops fifo = { + .enqueue = (void *)bpf_fifo_enqueue, + .dequeue = (void *)bpf_fifo_dequeue, + .init = (void *)bpf_fifo_init, + .reset = (void *)bpf_fifo_reset, + .destroy = (void *)bpf_fifo_destroy, + .id = "bpf_fifo", +}; + diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c new file mode 100644 index 000000000000..1a3233a275c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c @@ -0,0 +1,756 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct + * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before + * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was + * introduced. It gives each flow a fair chance to transmit packets in a + * round-robin fashion. Note that for flow pacing, bpf_fq currently only + * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there + * are multiple bpf_fq instances, they will have a shared view of flows and + * configuration since some key data structure such as fq_prio_flows, + * fq_nonprio_flows, and fq_bpf_data are global. + * + * To use bpf_fq alone without running selftests, use the following commands. + * + * 1. Register bpf_fq to the kernel + * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf + * 2. Add bpf_fq to an interface + * tc qdisc add dev <interface name> root handle <handle> bpf_fq + * 3. Delete bpf_fq attached to the interface + * tc qdisc delete dev <interface name> root + * 4. Unregister bpf_fq + * bpftool struct_ops unregister name fq + * + * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id. + * The struct_ops_map_name, fq, used in the bpftool command is the name of the + * Qdisc_ops. + * + * SEC(".struct_ops") + * struct Qdisc_ops fq = { + * ... + * .id = "bpf_fq", + * }; + */ + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +#define NSEC_PER_USEC 1000L +#define NSEC_PER_SEC 1000000000L + +#define NUM_QUEUE (1 << 20) + +struct fq_bpf_data { + u32 quantum; + u32 initial_quantum; + u32 flow_refill_delay; + u32 flow_plimit; + u64 horizon; + u32 orphan_mask; + u32 timer_slack; + u64 time_next_delayed_flow; + u64 unthrottle_latency_ns; + u8 horizon_drop; + u32 new_flow_cnt; + u32 old_flow_cnt; + u64 ktime_cache; +}; + +enum { + CLS_RET_PRIO = 0, + CLS_RET_NONPRIO = 1, + CLS_RET_ERR = 2, +}; + +struct skb_node { + u64 tstamp; + struct sk_buff __kptr * skb; + struct bpf_rb_node node; +}; + +struct fq_flow_node { + int credit; + u32 qlen; + u64 age; + u64 time_next_packet; + struct bpf_list_node list_node; + struct bpf_rb_node rb_node; + struct bpf_rb_root queue __contains(skb_node, node); + struct bpf_spin_lock lock; + struct bpf_refcount refcount; +}; + +struct dequeue_nonprio_ctx { + bool stop_iter; + u64 expire; + u64 now; +}; + +struct remove_flows_ctx { + bool gc_only; + u32 reset_cnt; + u32 reset_max; +}; + +struct unset_throttled_flows_ctx { + bool unset_all; + u64 now; +}; + +struct fq_stashed_flow { + struct fq_flow_node __kptr * flow; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, NUM_QUEUE); +} fq_nonprio_flows SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, 1); +} fq_prio_flows SEC(".maps"); + +private(A) struct bpf_spin_lock fq_delayed_lock; +private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node); + +private(B) struct bpf_spin_lock fq_new_flows_lock; +private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node); + +private(C) struct bpf_spin_lock fq_old_flows_lock; +private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node); + +private(D) struct fq_bpf_data q; + +/* Wrapper for bpf_kptr_xchg that expects NULL dst */ +static void bpf_kptr_xchg_back(void *map_val, void *ptr) +{ + void *ret; + + ret = bpf_kptr_xchg(map_val, ptr); + if (ret) + bpf_obj_drop(ret); +} + +static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct skb_node *skbn_a; + struct skb_node *skbn_b; + + skbn_a = container_of(a, struct skb_node, node); + skbn_b = container_of(b, struct skb_node, node); + + return skbn_a->tstamp < skbn_b->tstamp; +} + +static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct fq_flow_node *flow_a; + struct fq_flow_node *flow_b; + + flow_a = container_of(a, struct fq_flow_node, rb_node); + flow_b = container_of(b, struct fq_flow_node, rb_node); + + return flow_a->time_next_packet < flow_b->time_next_packet; +} + +static void +fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_front(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_back(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct bpf_list_node **node, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + *node = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + *flow_cnt -= 1; +} + +static bool +fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock) +{ + struct bpf_list_node *node; + + bpf_spin_lock(lock); + node = bpf_list_pop_front(head); + if (node) { + bpf_list_push_front(head, node); + bpf_spin_unlock(lock); + return false; + } + bpf_spin_unlock(lock); + + return true; +} + +/* flow->age is used to denote the state of the flow (not-detached, detached, throttled) + * as well as the timestamp when the flow is detached. + * + * 0: not-detached + * 1 - (~0ULL-1): detached + * ~0ULL: throttled + */ +static void fq_flow_set_detached(struct fq_flow_node *flow) +{ + flow->age = bpf_jiffies64(); +} + +static bool fq_flow_is_detached(struct fq_flow_node *flow) +{ + return flow->age != 0 && flow->age != ~0ULL; +} + +static bool sk_listener(struct sock *sk) +{ + return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); +} + +static void fq_gc(void); + +static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash) +{ + struct fq_stashed_flow tmp = {}; + struct fq_flow_node *flow; + int ret; + + flow = bpf_obj_new(typeof(*flow)); + if (!flow) + return -ENOMEM; + + flow->credit = q.initial_quantum, + flow->qlen = 0, + flow->age = 1, + flow->time_next_packet = 0, + + ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0); + if (ret == -ENOMEM || ret == -E2BIG) { + fq_gc(); + bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0); + } + + *sflow = bpf_map_lookup_elem(flow_map, &hash); + if (!*sflow) { + bpf_obj_drop(flow); + return -ENOMEM; + } + + bpf_kptr_xchg_back(&(*sflow)->flow, flow); + return 0; +} + +static int +fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow) +{ + struct sock *sk = skb->sk; + int ret = CLS_RET_NONPRIO; + u64 hash = 0; + + if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) { + *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + ret = CLS_RET_PRIO; + } else { + if (!sk || sk_listener(sk)) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + /* Avoid collision with an existing flow hash, which + * only uses the lower 32 bits of hash, by setting the + * upper half of hash to 1. + */ + hash |= (1ULL << 32); + } else if (sk->__sk_common.skc_state == TCP_CLOSE) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + hash |= (1ULL << 32); + } else { + hash = sk->__sk_common.skc_hash; + } + *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash); + } + + if (!*sflow) + ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ? + CLS_RET_ERR : CLS_RET_NONPRIO; + + return ret; +} + +static bool fq_packet_beyond_horizon(struct sk_buff *skb) +{ + return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon); +} + +SEC("struct_ops/bpf_fq_enqueue") +int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct fq_flow_node *flow = NULL, *flow_copy; + struct fq_stashed_flow *sflow; + u64 time_to_send, jiffies; + struct skb_node *skbn; + int ret; + + if (sch->q.qlen >= sch->limit) + goto drop; + + if (!skb->tstamp) { + time_to_send = q.ktime_cache = bpf_ktime_get_ns(); + } else { + if (fq_packet_beyond_horizon(skb)) { + q.ktime_cache = bpf_ktime_get_ns(); + if (fq_packet_beyond_horizon(skb)) { + if (q.horizon_drop) + goto drop; + + skb->tstamp = q.ktime_cache + q.horizon; + } + } + time_to_send = skb->tstamp; + } + + ret = fq_classify(skb, &sflow); + if (ret == CLS_RET_ERR) + goto drop; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + goto drop; + + if (ret == CLS_RET_NONPRIO) { + if (flow->qlen >= q.flow_plimit) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + if (fq_flow_is_detached(flow)) { + flow_copy = bpf_refcount_acquire(flow); + + jiffies = bpf_jiffies64(); + if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) { + if (flow_copy->credit < q.quantum) + flow_copy->credit = q.quantum; + } + flow_copy->age = 0; + fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy, + &q.new_flow_cnt); + } + } + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + skbn->tstamp = skb->tstamp = time_to_send; + + sch->qstats.backlog += qdisc_pkt_len(skb); + + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&flow->lock); + bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less); + bpf_spin_unlock(&flow->lock); + + flow->qlen++; + bpf_kptr_xchg_back(&sflow->flow, flow); + + sch->q.qlen++; + return NET_XMIT_SUCCESS; + +drop: + bpf_qdisc_skb_drop(skb, to_free); + sch->qstats.drops++; + return NET_XMIT_DROP; +} + +static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx) +{ + struct bpf_rb_node *node = NULL; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_delayed_lock); + + node = bpf_rbtree_first(&fq_delayed); + if (!node) { + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + flow = container_of(node, struct fq_flow_node, rb_node); + if (!ctx->unset_all && flow->time_next_packet > ctx->now) { + q.time_next_delayed_flow = flow->time_next_packet; + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node); + + bpf_spin_unlock(&fq_delayed_lock); + + if (!node) + return 1; + + flow = container_of(node, struct fq_flow_node, rb_node); + flow->age = 0; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + + return 0; +} + +static void fq_flow_set_throttled(struct fq_flow_node *flow) +{ + flow->age = ~0ULL; + + if (q.time_next_delayed_flow > flow->time_next_packet) + q.time_next_delayed_flow = flow->time_next_packet; + + bpf_spin_lock(&fq_delayed_lock); + bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less); + bpf_spin_unlock(&fq_delayed_lock); +} + +static void fq_check_throttled(u64 now) +{ + struct unset_throttled_flows_ctx ctx = { + .unset_all = false, + .now = now, + }; + unsigned long sample; + + if (q.time_next_delayed_flow > now) + return; + + sample = (unsigned long)(now - q.time_next_delayed_flow); + q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3; + q.unthrottle_latency_ns += sample >> 3; + + q.time_next_delayed_flow = ~0ULL; + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0); +} + +static struct sk_buff* +fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx) +{ + u64 time_next_packet, time_to_send; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct bpf_list_head *head; + struct bpf_list_node *node; + struct bpf_spin_lock *lock; + struct fq_flow_node *flow; + struct skb_node *skbn; + bool is_empty; + u32 *cnt; + + if (q.new_flow_cnt) { + head = &fq_new_flows; + lock = &fq_new_flows_lock; + cnt = &q.new_flow_cnt; + } else if (q.old_flow_cnt) { + head = &fq_old_flows; + lock = &fq_old_flows_lock; + cnt = &q.old_flow_cnt; + } else { + if (q.time_next_delayed_flow != ~0ULL) + ctx->expire = q.time_next_delayed_flow; + goto break_loop; + } + + fq_flows_remove_front(head, lock, &node, cnt); + if (!node) + goto break_loop; + + flow = container_of(node, struct fq_flow_node, list_node); + if (flow->credit <= 0) { + flow->credit += q.quantum; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + return NULL; + } + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock); + if (head == &fq_new_flows && !is_empty) { + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + } else { + fq_flow_set_detached(flow); + bpf_obj_drop(flow); + } + return NULL; + } + + skbn = container_of(rb_node, struct skb_node, node); + time_to_send = skbn->tstamp; + + time_next_packet = (time_to_send > flow->time_next_packet) ? + time_to_send : flow->time_next_packet; + if (ctx->now < time_next_packet) { + bpf_spin_unlock(&flow->lock); + flow->time_next_packet = time_next_packet; + fq_flow_set_throttled(flow); + return NULL; + } + + rb_node = bpf_rbtree_remove(&flow->queue, rb_node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto add_flow_and_break; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + + if (!skb) + goto add_flow_and_break; + + flow->credit -= qdisc_skb_cb(skb)->pkt_len; + flow->qlen--; + +add_flow_and_break: + fq_flows_add_head(head, lock, flow, cnt); + +break_loop: + ctx->stop_iter = true; + return skb; +} + +static struct sk_buff *fq_dequeue_prio(void) +{ + struct fq_flow_node *flow = NULL; + struct fq_stashed_flow *sflow; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + u64 hash = 0; + + sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + if (!sflow) + return NULL; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + return NULL; + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + goto out; + } + + skbn = container_of(rb_node, struct skb_node, node); + rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto out; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + +out: + bpf_kptr_xchg_back(&sflow->flow, flow); + + return skb; +} + +SEC("struct_ops/bpf_fq_dequeue") +struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch) +{ + struct dequeue_nonprio_ctx cb_ctx = {}; + struct sk_buff *skb = NULL; + int i; + + if (!sch->q.qlen) + goto out; + + skb = fq_dequeue_prio(); + if (skb) + goto dequeue; + + q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns(); + fq_check_throttled(q.ktime_cache); + bpf_for(i, 0, sch->limit) { + skb = fq_dequeue_nonprio_flows(i, &cb_ctx); + if (cb_ctx.stop_iter) + break; + }; + + if (skb) { +dequeue: + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + return skb; + } + + if (cb_ctx.expire) + bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack); +out: + return NULL; +} + +static int fq_remove_flows_in_list(u32 index, void *ctx) +{ + struct bpf_list_node *node; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_new_flows_lock); + node = bpf_list_pop_front(&fq_new_flows); + bpf_spin_unlock(&fq_new_flows_lock); + if (!node) { + bpf_spin_lock(&fq_old_flows_lock); + node = bpf_list_pop_front(&fq_old_flows); + bpf_spin_unlock(&fq_old_flows_lock); + if (!node) + return 1; + } + + flow = container_of(node, struct fq_flow_node, list_node); + bpf_obj_drop(flow); + + return 0; +} + +extern unsigned CONFIG_HZ __kconfig; + +/* limit number of collected flows per round */ +#define FQ_GC_MAX 8 +#define FQ_GC_AGE (3*CONFIG_HZ) + +static bool fq_gc_candidate(struct fq_flow_node *flow) +{ + u64 jiffies = bpf_jiffies64(); + + return fq_flow_is_detached(flow) && + ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0); +} + +static int +fq_remove_flows(struct bpf_map *flow_map, u64 *hash, + struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx) +{ + if (sflow->flow && + (!ctx->gc_only || fq_gc_candidate(sflow->flow))) { + bpf_map_delete_elem(flow_map, hash); + ctx->reset_cnt++; + } + + return ctx->reset_cnt < ctx->reset_max ? 0 : 1; +} + +static void fq_gc(void) +{ + struct remove_flows_ctx cb_ctx = { + .gc_only = true, + .reset_cnt = 0, + .reset_max = FQ_GC_MAX, + }; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0); +} + +SEC("struct_ops/bpf_fq_reset") +void BPF_PROG(bpf_fq_reset, struct Qdisc *sch) +{ + struct unset_throttled_flows_ctx utf_ctx = { + .unset_all = true, + }; + struct remove_flows_ctx rf_ctx = { + .gc_only = false, + .reset_cnt = 0, + .reset_max = NUM_QUEUE, + }; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + sch->q.qlen = 0; + sch->qstats.backlog = 0; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0); + + rf_ctx.reset_cnt = 0; + bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0); + fq_new_flow(&fq_prio_flows, &sflow, hash); + + bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0); + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0); +} + +SEC("struct_ops/bpf_fq_init") +int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = sch->dev_queue->dev; + u32 psched_mtu = dev->mtu + dev->hard_header_len; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0) + return -ENOMEM; + + sch->limit = 10000; + q.initial_quantum = 10 * psched_mtu; + q.quantum = 2 * psched_mtu; + q.flow_refill_delay = 40; + q.flow_plimit = 100; + q.horizon = 10ULL * NSEC_PER_SEC; + q.horizon_drop = 1; + q.orphan_mask = 1024 - 1; + q.timer_slack = 10 * NSEC_PER_USEC; + q.time_next_delayed_flow = ~0ULL; + q.unthrottle_latency_ns = 0ULL; + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + return 0; +} + +SEC("struct_ops") +void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops fq = { + .enqueue = (void *)bpf_fq_enqueue, + .dequeue = (void *)bpf_fq_dequeue, + .reset = (void *)bpf_fq_reset, + .init = (void *)bpf_fq_init, + .destroy = (void *)bpf_fq_destroy, + .id = "bpf_fq", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 659694162739..17db400f0e0d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -128,6 +128,7 @@ #define sk_refcnt __sk_common.skc_refcnt #define sk_state __sk_common.skc_state #define sk_net __sk_common.skc_net +#define sk_rcv_saddr __sk_common.skc_rcv_saddr #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 96531b0d9d55..8f483337e103 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a) a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0; } +static bool ipv4_addr_loopback(__be32 a) +{ + return a == bpf_ntohl(0x7f000001); +} + +volatile const unsigned int sf; volatile const __u16 ports[2]; unsigned int bucket[2]; @@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) struct sock *sk = (struct sock *)ctx->sk_common; struct inet_hashinfo *hinfo; unsigned int hash; + __u64 sock_cookie; struct net *net; int idx; if (!sk) return 0; + sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); - if (sk->sk_family != AF_INET6 || + if (sk->sk_family != sf || sk->sk_state != TCP_LISTEN || - !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + sk->sk_family == AF_INET6 ? + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : + !ipv4_addr_loopback(sk->sk_rcv_saddr)) return 0; if (sk->sk_num == ports[0]) @@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) hinfo = net->ipv4.tcp_death_row.hashinfo; bucket[idx] = hash & hinfo->lhash2_mask; bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); return 0; } @@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) { struct sock *sk = (struct sock *)ctx->udp_sk; struct udp_table *udptable; + __u64 sock_cookie; int idx; if (!sk) return 0; + sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); - if (sk->sk_family != AF_INET6 || - !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + if (sk->sk_family != sf || + sk->sk_family == AF_INET6 ? + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : + !ipv4_addr_loopback(sk->sk_rcv_saddr)) return 0; if (sk->sk_num == ports[0]) @@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) udptable = sk->sk_net.net->ipv4.udp_table; bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask; bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); return 0; } diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index ccde6a4c6319..683306db8594 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -4,6 +4,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/errno.h> #include "xsk_xdp_common.h" struct { @@ -14,6 +16,7 @@ struct { } xsk SEC(".maps"); static unsigned int idx; +int adjust_value = 0; int count = 0; SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) @@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) return bpf_redirect_map(&xsk, idx, XDP_DROP); } +SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp) +{ + __u32 buff_len, curr_buff_len; + int ret; + + buff_len = bpf_xdp_get_buff_len(xdp); + if (buff_len == 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_tail(xdp, adjust_value); + if (ret < 0) { + /* Handle unsupported cases */ + if (ret == -EOPNOTSUPP) { + /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case + * is unsupported + */ + adjust_value = -EOPNOTSUPP; + return bpf_redirect_map(&xsk, 0, XDP_DROP); + } + + return XDP_DROP; + } + + curr_buff_len = bpf_xdp_get_buff_len(xdp); + if (curr_buff_len != buff_len + adjust_value) + return XDP_DROP; + + if (curr_buff_len > buff_len) { + __u32 *pkt_data = (void *)(long)xdp->data; + __u32 len, words_to_end, seq_num; + + len = curr_buff_len - PKT_HDR_ALIGN; + words_to_end = len / sizeof(*pkt_data) - 1; + seq_num = words_to_end; + + /* Convert sequence number to network byte order. Store this in the last 4 bytes of + * the packet. Use 'adjust_value' to determine the position at the end of the + * packet for storing the sequence number. + */ + seq_num = __constant_htonl(words_to_end); + bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num, + sizeof(seq_num)); + } + + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h index 5a6f36f07383..45810ff552da 100644 --- a/tools/testing/selftests/bpf/xsk_xdp_common.h +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -4,6 +4,7 @@ #define XSK_XDP_COMMON_H_ #define MAX_SOCKETS 2 +#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ struct xdp_info { __u64 count; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 11f047b8af75..0ced4026ee44 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->nb_sockets = 1; test->fail = false; test->set_ring = false; + test->adjust_tail = false; + test->adjust_tail_support = false; test->mtu = MAX_ETH_PKT_SIZE; test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; @@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) { - struct pkt_stream *pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); +} - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_tx->xsk->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_rx->xsk->pkt_stream = pkt_stream; +static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, @@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) return true; } +static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx) +{ + struct bpf_map *data_map; + int adjust_value = 0; + int key = 0; + int ret; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + exit_with_error(errno); + } + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); + if (ret) { + ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); + exit_with_error(errno); + } + + /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail + * helper is not supported. Skip the adjust_tail test case in this scenario. + */ + return adjust_value != -EOPNOTSUPP; +} + static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, u32 bytes_processed) { @@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) - report_failure(test); + + if (err) { + if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs)) + test->adjust_tail_support = false; + else + report_failure(test); + } pthread_exit(NULL); } @@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) return testapp_validate_traffic(test); } +static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, + skel_tx->progs.xsk_xdp_adjust_tail, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + skel_rx->bss->adjust_value = adjust_value; + + return testapp_validate_traffic(test); +} + +static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) +{ + int ret; + + test->adjust_tail_support = true; + test->adjust_tail = true; + test->total_steps = 1; + + pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); + + ret = testapp_xdp_adjust_tail(test, value); + if (ret) + return ret; + + if (!test->adjust_tail_support) { + ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", + mode_string(test), busy_poll_string(test)); + return TEST_SKIP; + } + + return 0; +} + +static int testapp_adjust_tail_shrink(struct test_spec *test) +{ + /* Shrink by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_shrink_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Shrink by the frag size */ + return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +static int testapp_adjust_tail_grow(struct test_spec *test) +{ + /* Grow by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_grow_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ + return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, + XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = { {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, + {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, + {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, + {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index e46e823f6a1a..67fc44b2813b 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -173,6 +173,8 @@ struct test_spec { u16 nb_sockets; bool fail; bool set_ring; + bool adjust_tail; + bool adjust_tail_support; enum test_mode mode; char name[MAX_TEST_NAME_SIZE]; }; diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index ec746f374e85..d634d8395d90 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -xdp_helper +napi_id_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 0c95bd944d56..17db31aa58c9 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -6,9 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ ../../net/net_helper.sh \ ../../net/lib.sh \ -TEST_GEN_FILES := xdp_helper +TEST_GEN_FILES := \ + napi_id_helper \ +# end of TEST_GEN_FILES TEST_PROGS := \ + napi_id.py \ netcons_basic.sh \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 07cddb19ba35..df2c047ffa90 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,12 +15,11 @@ TEST_PROGS = \ iou-zcrx.py \ irq.py \ loopback.sh \ - nic_link_layer.py \ - nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ rss_input_xfrm.py \ tso.py \ + xsk_reconfig.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 3947e9157115..7fc686cf47a2 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +from os import path from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, KsftSkipEx from lib.py import NetDrvEpEnv @@ -10,8 +11,7 @@ from lib.py import ksft_disruptive def require_devmem(cfg): if not hasattr(cfg, "_devmem_probed"): - port = rand_port() - probe_command = f"./ncdevmem -f {cfg.ifname}" + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 cfg._devmem_probed = True @@ -25,7 +25,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" with bkg(listen_cmd) as socat: wait_port_listen(port) @@ -34,9 +34,27 @@ def check_rx(cfg) -> None: ksft_eq(socat.stdout.strip(), "hello\nworld") +@ksft_disruptive +def check_tx(cfg) -> None: + cfg.require_ipver("6") + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + + with bkg(listen_cmd, exit_wait=True) as socat: + wait_port_listen(port) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run([check_rx], + cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run([check_rx, check_tx], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c index c26b4180eddd..62456df947bc 100644 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -37,8 +37,8 @@ #include <liburing.h> -#define PAGE_SIZE (4096) -#define AREA_SIZE (8192 * PAGE_SIZE) +static long page_size; +#define AREA_SIZE (8192 * page_size) #define SEND_SIZE (512 * 4096) #define min(a, b) \ ({ \ @@ -66,7 +66,7 @@ static int cfg_oneshot_recvs; static int cfg_send_size = SEND_SIZE; static struct sockaddr_in6 cfg_addr; -static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE))); +static char *payload; static void *area_ptr; static void *ring_ptr; static size_t ring_size; @@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries) ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); /* add space for the header (head/tail/etc.) */ - ring_size += PAGE_SIZE; - return ALIGN_UP(ring_size, 4096); + ring_size += page_size; + return ALIGN_UP(ring_size, page_size); } static void setup_zcrx(struct io_uring *ring) @@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) connfd = cqe->res; if (cfg_oneshot) - add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + add_recvzc_oneshot(ring, connfd, page_size); else add_recvzc(ring, connfd); } @@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) if (cfg_oneshot) { if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { - add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + add_recvzc_oneshot(ring, connfd, page_size); cfg_oneshot_recvs--; } } else if (!(cqe->flags & IORING_CQE_F_MORE)) { @@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) for (i = 0; i < n; i++) { if (*(data + i) != payload[(received + i)]) - error(1, 0, "payload mismatch at ", i); + error(1, 0, "payload mismatch at %d", i); } received += n; @@ -354,7 +354,7 @@ static void run_client(void) chunk = min_t(ssize_t, cfg_payload_len, to_send); res = send(fd, src, chunk, 0); if (res < 0) - error(1, 0, "send(): %d", sent); + error(1, 0, "send(): %zd", sent); sent += res; to_send -= res; } @@ -370,7 +370,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { - const int max_payload_len = sizeof(payload) - + const int max_payload_len = SEND_SIZE - sizeof(struct ipv6hdr) - sizeof(struct tcphdr) - 40 /* max tcp options */; @@ -443,6 +443,13 @@ int main(int argc, char **argv) const char *cfg_test = argv[argc - 1]; int i; + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + return 1; + + if (posix_memalign((void **)&payload, page_size, SEND_SIZE)) + return 1; + parse_opts(argc, argv); for (i = 0; i < SEND_SIZE; i++) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 6a0378e06cab..9c03fd777f3d 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -5,13 +5,12 @@ import re from os import path from lib.py import ksft_run, ksft_exit from lib.py import NetDrvEpEnv -from lib.py import bkg, cmd, ethtool, wait_port_listen +from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen -def _get_rx_ring_entries(cfg): - output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout - values = re.findall(r'RX:\s+(\d+)', output) - return int(values[1]) +def _get_current_settings(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0] + return (output['rx'], output['hds-thresh']) def _get_combined_channels(cfg): @@ -20,8 +19,21 @@ def _get_combined_channels(cfg): return int(values[1]) -def _set_flow_rule(cfg, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout +def _create_rss_ctx(cfg, chan): + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout + values = re.search(r'New RSS context is (\d+)', output).group(1) + ctx_id = int(values) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) + + +def _set_flow_rule(cfg, port, chan): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def _set_flow_rule_rss(cfg, port, ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None: combined_chans = _get_combined_channels(cfg) if combined_chans < 2: raise KsftSkipEx('at least 2 combined channels required') - rx_ring = _get_rx_ring_entries(cfg) - - try: - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) - - rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(9999, proto="tcp", host=cfg.remote) - cmd(tx_cmd) - finally: - ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) - ethtool(f"-X {cfg.ifname} default", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) - ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) def test_zcrx_oneshot(cfg) -> None: @@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None: combined_chans = _get_combined_channels(cfg) if combined_chans < 2: raise KsftSkipEx('at least 2 combined channels required') - rx_ring = _get_rx_ring_entries(cfg) - - try: - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) - - rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(9999, proto="tcp", host=cfg.remote) - cmd(tx_cmd) - finally: - ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) - ethtool(f"-X {cfg.ifname} default", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) - ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) + + +def test_zcrx_rss(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) + flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 399789a9676a..b582885786f5 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -9,7 +9,6 @@ try: sys.path.append(KSFT_DIR.as_posix()) from net.lib.py import * from drivers.net.lib.py import * - from .linkconfig import LinkConfig except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py deleted file mode 100644 index 79fde603cbbc..000000000000 --- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py +++ /dev/null @@ -1,222 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -from lib.py import cmd, ethtool, ip -from lib.py import ksft_pr, ksft_eq, KsftSkipEx -from typing import Optional -import re -import time -import json - -#The LinkConfig class is implemented to handle the link layer configurations. -#Required minimum ethtool version is 6.10 - -class LinkConfig: - """Class for handling the link layer configurations""" - def __init__(self, cfg: object) -> None: - self.cfg = cfg - self.partner_netif = self.get_partner_netif_name() - - """Get the initial link configuration of local interface""" - self.common_link_modes = self.get_common_link_modes() - - def get_partner_netif_name(self) -> Optional[str]: - partner_netif = None - try: - if not self.verify_link_up(): - return None - """Get partner interface name""" - partner_json_output = ip("addr show", json=True, host=self.cfg.remote) - for interface in partner_json_output: - for addr in interface.get('addr_info', []): - if addr.get('local') == self.cfg.remote_addr: - partner_netif = interface['ifname'] - ksft_pr(f"Partner Interface name: {partner_netif}") - if partner_netif is None: - ksft_pr("Unable to get the partner interface name") - except Exception as e: - print(f"Unexpected error occurred while getting partner interface name: {e}") - self.partner_netif = partner_netif - return partner_netif - - def verify_link_up(self) -> bool: - """Verify whether the local interface link is up""" - with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp: - link_state = fp.read().strip() - - if link_state == "down": - ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN") - return False - else: - return True - - def reset_interface(self, local: bool = True, remote: bool = True) -> bool: - ksft_pr("Resetting interfaces in local and remote") - if remote: - if self.verify_link_up(): - if self.partner_netif is not None: - ifname = self.partner_netif - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting remote: {e}") - else: - ksft_pr("Partner interface not available") - if local: - ifname = self.cfg.ifname - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting local: {e}") - time.sleep(10) - if self.verify_link_up() and self.get_ethtool_field("link-detected"): - ksft_pr("Local and remote interfaces reset to original state") - return True - else: - ksft_pr("Error occurred after resetting interfaces. Link is DOWN.") - return False - - def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool: - """Set the speed and duplex state for the interface""" - autoneg_state = "on" if autoneg is True else "off" - process = None - try: - process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}") - except Exception as e: - ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}") - if process is None or process.ret != 0: - return False - else: - ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}") - return True - - def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool: - if not self.verify_link_up(): - return False - """Verifying the speed and duplex state for the interface""" - with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp: - actual_speed = fp.read().strip() - with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp: - actual_duplex = fp.read().strip() - - ksft_eq(actual_speed, expected_speed) - ksft_eq(actual_duplex, expected_duplex) - return True - - def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool: - common_link_modes = self.common_link_modes - speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes) - speed = speeds[0] - duplex = duplex_modes[0] - if not speed or not duplex: - ksft_pr("No speed or duplex modes found") - return False - - speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else "" - if remote: - if not self.verify_link_up(): - return False - """Set the autonegotiation state for the partner""" - command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}" - partner_autoneg_change = None - """Set autonegotiation state for interface in remote pc""" - try: - partner_autoneg_change = ethtool(command, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}") - if partner_autoneg_change is None or partner_autoneg_change.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.") - return False - ksft_pr(f"Autoneg set as {state} for {self.partner_netif}") - else: - """Set the autonegotiation state for the interface""" - try: - process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}") - if process.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}") - return False - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}") - return False - ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}") - return True - - def check_autoneg_supported(self, remote: bool = False) -> bool: - if not remote: - local_autoneg = self.get_ethtool_field("supports-auto-negotiation") - if local_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}") - """Return autoneg status of the local interface""" - return local_autoneg - else: - if not self.verify_link_up(): - raise KsftSkipEx("Link is DOWN") - """Check remote auto-negotiation support status""" - partner_autoneg = False - if self.partner_netif is not None: - partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True) - if partner_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}") - return partner_autoneg - - def get_common_link_modes(self) -> set[str]: - common_link_modes = [] - """Populate common link modes""" - link_modes = self.get_ethtool_field("supported-link-modes") - partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes") - if link_modes is None: - raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}") - if partner_link_modes is None: - raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}") - common_link_modes = set(link_modes) and set(partner_link_modes) - return common_link_modes - - def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]: - speed = [] - duplex = [] - """Check the link modes""" - for data in link_modes: - parts = data.split('/') - speed_value = re.match(r'\d+', parts[0]) - if speed_value: - speed.append(speed_value.group()) - else: - ksft_pr(f"No speed value found for interface {self.ifname}") - return None, None - duplex.append(parts[1].lower()) - return speed, duplex - - def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: - process = None - if not remote: - """Get the ethtool field value for the local interface""" - try: - process = ethtool(self.cfg.ifname, json=True) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}") - return None - else: - if not self.verify_link_up(): - return None - """Get the ethtool field value for the remote interface""" - self.cfg.require_cmd("ethtool", remote=True) - if self.partner_netif is None: - ksft_pr(f"Partner interface name is unavailable.") - return None - try: - process = ethtool(self.partner_netif, json=True, host=self.cfg.remote) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}") - return None - json_data = process[0] - """Check if the field exist in the json data""" - if field not in json_data: - raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}') - return json_data[field] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 9d48004ff1a1..ca723722a810 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -9,22 +9,31 @@ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 * * On client: - * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201 + * echo -n "hello\nworld" | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 * - * Test data validation: + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + * + * Test data validation (devmem TCP on RX only): * * On server: * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 * * On client: * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ + * head -c 1G | \ * nc <server IP> 5201 -p 5201 * + * Test data validation (devmem TCP on RX and TX, validation happens on RX): * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ + * head -c 1M | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 */ #define _GNU_SOURCE #define __EXPORTED_HEADERS__ @@ -40,15 +49,18 @@ #include <fcntl.h> #include <malloc.h> #include <error.h> +#include <poll.h> #include <arpa/inet.h> #include <sys/socket.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> +#include <sys/time.h> #include <linux/memfd.h> #include <linux/dma-buf.h> +#include <linux/errqueue.h> #include <linux/udmabuf.h> #include <linux/types.h> #include <linux/netlink.h> @@ -79,6 +91,8 @@ static int num_queues = -1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; +static uint32_t tx_dmabuf_id; +static int waittime_ms = 500; struct memory_buffer { int fd; @@ -92,6 +106,8 @@ struct memory_buffer { struct memory_provider { struct memory_buffer *(*alloc)(size_t size); void (*free)(struct memory_buffer *ctx); + void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, + void *src, int n); void (*memcpy_from_device)(void *dst, struct memory_buffer *src, size_t off, int n); }; @@ -152,6 +168,20 @@ static void udmabuf_free(struct memory_buffer *ctx) free(ctx); } +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, + void *src, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst->buf_mem + off, src, n); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, size_t off, int n) { @@ -169,6 +199,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, static struct memory_provider udmabuf_memory_provider = { .alloc = udmabuf_alloc, .free = udmabuf_free, + .memcpy_to_device = udmabuf_memcpy_to_device, .memcpy_from_device = udmabuf_memcpy_from_device, }; @@ -187,14 +218,16 @@ void validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; - int errors = 0; + unsigned char expected; + static int errors; size_t i; for (i = 0; i < size; i++) { - if (ptr[i] != seed) { + expected = seed ? seed : '\n'; + if (ptr[i] != expected) { fprintf(stderr, "Failed validation: expected=%u, actual=%u, index=%lu\n", - seed, ptr[i], i); + expected, ptr[i], i); errors++; if (errors > 20) error(1, 0, "validation failed."); @@ -393,6 +426,49 @@ err_close: return -1; } +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct ynl_sock **ys) +{ + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + if (!rsp) { + perror("netdev_bind_tx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); + tx_dmabuf_id = rsp->id; + + netdev_bind_tx_req_free(req); + netdev_bind_tx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_tx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + static void enable_reuseaddr(int fd) { int opt = 1; @@ -447,7 +523,7 @@ static struct netdev_queue_id *create_queues(void) return queues; } -int do_server(struct memory_buffer *mem) +static int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; @@ -674,6 +750,206 @@ void run_devmem_tests(void) provider->free(mem); } +static uint64_t gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); +} + +static int do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, waittime_ms); + if (ret == -1) + error(1, errno, "poll"); + + return ret && (pfd.revents & POLLERR); +} + +static void wait_compl(int fd) +{ + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + __u32 hi, lo; + int ret; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + if (!do_poll(fd)) + continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret < 0) { + if (errno == EAGAIN) + continue; + error(1, errno, "recvmsg(MSG_ERRQUEUE)"); + return; + } + if (msg.msg_flags & MSG_CTRUNC) + error(1, 0, "MSG_CTRUNC\n"); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_IP && + cm->cmsg_level != SOL_IPV6) + continue; + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type != IP_RECVERR) + continue; + if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type != IPV6_RECVERR) + continue; + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) + error(1, 0, "wrong origin %u", serr->ee_origin); + if (serr->ee_errno != 0) + error(1, 0, "wrong errno %d", serr->ee_errno); + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); + return; + } + } + + error(1, 0, "did not receive tx completion"); +} + +static int do_client(struct memory_buffer *mem) +{ + char ctrl_data[CMSG_SPACE(sizeof(__u32))]; + struct sockaddr_in6 server_sin; + struct sockaddr_in6 client_sin; + struct ynl_sock *ys = NULL; + struct msghdr msg = {}; + ssize_t line_size = 0; + struct cmsghdr *cmsg; + struct iovec iov[2]; + char *line = NULL; + unsigned long mid; + size_t len = 0; + int socket_fd; + __u32 ddmabuf; + int opt = 1; + int ret; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) + error(1, socket_fd, "create socket"); + + enable_reuseaddr(socket_fd); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, + strlen(ifname) + 1); + if (ret) + error(1, errno, "bindtodevice"); + + if (bind_tx_queue(ifindex, mem->fd, &ys)) + error(1, 0, "Failed to bind\n"); + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) + error(1, 0, "parse client address"); + + ret = bind(socket_fd, &client_sin, sizeof(client_sin)); + if (ret) + error(1, errno, "bind"); + } + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + if (ret) + error(1, errno, "set sock opt"); + + fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, + ntohs(server_sin.sin6_port), ifname); + + ret = connect(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(1, errno, "connect"); + + while (1) { + free(line); + line = NULL; + line_size = getline(&line, &len, stdin); + + if (line_size < 0) + break; + + mid = (line_size / 2) + 1; + + iov[0].iov_base = (void *)1; + iov[0].iov_len = mid; + iov[1].iov_base = (void *)(mid + 2); + iov[1].iov_len = line_size - mid; + + provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line, + iov[0].iov_len); + provider->memcpy_to_device(mem, (size_t)iov[1].iov_base, + line + iov[0].iov_len, + iov[1].iov_len); + + fprintf(stderr, + "read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n", + line_size, (unsigned long)iov[0].iov_base, + iov[0].iov_len, (unsigned long)iov[1].iov_base, + iov[1].iov_len); + + msg.msg_iov = iov; + msg.msg_iovlen = 2; + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + + ddmabuf = tx_dmabuf_id; + + *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; + + ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + if (ret < 0) + error(1, errno, "Failed sendmsg"); + + fprintf(stderr, "sendmsg_ret=%d\n", ret); + + if (ret != line_size) + error(1, errno, "Did not send all bytes"); + + wait_compl(socket_fd); + } + + fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + + free(line); + close(socket_fd); + + if (ys) + ynl_sock_destroy(ys); + + return 0; +} + int main(int argc, char *argv[]) { struct memory_buffer *mem; @@ -717,6 +993,8 @@ int main(int argc, char *argv[]) ifindex = if_nametoindex(ifname); + fprintf(stderr, "using ifindex=%u\n", ifindex); + if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); @@ -767,7 +1045,7 @@ int main(int argc, char *argv[]) error(1, 0, "Missing -p argument\n"); mem = provider->alloc(getpagesize() * NUM_PAGES); - ret = is_server ? do_server(mem) : 1; + ret = is_server ? do_server(mem) : do_client(mem); provider->free(mem); return ret; diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py deleted file mode 100644 index efd921180532..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic link layer tests for generic NIC drivers. -#The test comprises of auto-negotiation, speed and duplex checks. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#Required minimum ethtool version is 6.10 (supports json) -#Default values: -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. - -import time -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq -from lib.py import KsftFailEx, KsftSkipEx -from lib.py import NetDrvEpEnv -from lib.py import LinkConfig - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if link_config.partner_netif is None: - KsftSkipEx("Partner interface is not available") - if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True): - KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}") - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - -def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - """Verifying the autonegotiation state in partner""" - partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True) - if partner_autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}") - partner_autoneg_state = "on" if partner_autoneg_output is True else "off" - - ksft_eq(partner_autoneg_state, expected_state) - - """Verifying the autonegotiation state of local""" - autoneg_output = link_config.get_ethtool_field("auto-negotiation") - if autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}") - actual_state = "on" if autoneg_output is True else "off" - - ksft_eq(actual_state, expected_state) - - """Verifying the link establishment""" - link_available = link_config.get_ethtool_field("link-detected") - if link_available is None: - KsftSkipEx(f"Link status not available for interface {cfg.ifname}") - if link_available != True: - raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation") - -def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - for state in ["off", "on"]: - if not link_config.set_autonegotiation_state(state, remote=True): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}") - if not link_config.set_autonegotiation_state(state): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}") - time.sleep(time_delay) - verify_autonegotiation(cfg, state, link_config) - -def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - common_link_modes = link_config.common_link_modes - if not common_link_modes: - KsftSkipEx("No common link modes exist") - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - - if speeds and duplex_modes and len(speeds) == len(duplex_modes): - for idx in range(len(speeds)): - speed = speeds[idx] - duplex = duplex_modes[idx] - if not link_config.set_speed_and_duplex(speed, duplex): - raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}") - time.sleep(time_delay) - if not link_config.verify_speed_and_duplex(speed, duplex): - raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}") - else: - if not speeds or not duplex_modes: - KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}") - else: - KsftSkipEx("Mismatch in the number of speeds and duplex modes") - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - args = parser.parse_args() - time_delay = args.time_delay - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,)) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py deleted file mode 100644 index 201403b76ea3..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_performance.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic performance test for generic NIC drivers. -#The test comprises of throughput check for TCP and UDP streams. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote. -#Required minimum ethtool version is 6.10 -#Change the below configuration based on your hw needs. -# """Default values""" -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. -#test_duration = 10 #performance test duration for the throughput check, in seconds. -#send_throughput_threshold = 80 #percentage of send throughput required to pass the check -#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check - -import time -import json -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true -from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic -from lib.py import NetDrvEpEnv, bkg, wait_port_listen -from lib.py import cmd -from lib.py import LinkConfig - -class TestConfig: - def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None: - self.time_delay = time_delay - self.test_duration = test_duration - self.send_throughput_threshold = send_throughput_threshold - self.receive_throughput_threshold = receive_throughput_threshold - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - common_link_modes = link_config.common_link_modes - if common_link_modes is None: - KsftSkipEx("No common link modes found") - if link_config.partner_netif == None: - KsftSkipEx("Partner interface is not available") - if link_config.check_autoneg_supported(): - KsftSkipEx("Auto-negotiation not supported by local") - if link_config.check_autoneg_supported(remote=True): - KsftSkipEx("Auto-negotiation not supported by remote") - cfg.require_cmd("iperf3", remote=True) - -def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None: - common_link_modes = link_config.common_link_modes - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - """Test duration in seconds""" - duration = test_config.test_duration - - ksft_pr(f"{protocol} test") - test_type = "-u" if protocol == "UDP" else "" - - send_throughput = [] - receive_throughput = [] - for idx in range(0, len(speeds)): - if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False: - raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}") - time.sleep(test_config.time_delay) - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - - send_command=f"{test_type} -b 0 -t {duration} --json" - receive_command=f"{test_type} -b 0 -t {duration} --reverse --json" - - send_result = traffic.run_remote_test(cfg, command=send_command) - if send_result.ret != 0: - raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}") - - send_output = send_result.stdout - send_data = json.loads(send_output) - - """Convert throughput to Mbps""" - send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps") - - receive_result = traffic.run_remote_test(cfg, command=receive_command) - if receive_result.ret != 0: - raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}") - - receive_output = receive_result.stdout - receive_data = json.loads(receive_output) - - """Convert throughput to Mbps""" - receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps") - - """Check whether throughput is not below the threshold (default values set at start)""" - for idx in range(0, len(speeds)): - send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100) - receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100) - ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - -def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'TCP', traffic) - -def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'UDP', traffic) - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.') - parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.') - parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.') - args=parser.parse_args() - test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt) - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - traffic = GenerateTraffic(cfg) - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, )) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 53bb08cc29ec..f439c434ba36 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + cfg.require_cmd("socat", remote=True) + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + input_xfrm = cfg.ethnl.rss_get( {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py new file mode 100755 index 000000000000..d19d1d518208 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# This is intended to be run on a virtio-net guest interface. +# The test binds the XDP socket to the interface without setting +# the fill ring to trigger delayed refill_work. This helps to +# make it easier to reproduce the deadlock when XDP program, +# XDP socket bind/unbind, rx ring resize race with refill_work on +# the buggy kernel. +# +# The Qemu command to setup virtio-net +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on + +from lib.py import ksft_exit, ksft_run +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetDrvEnv +from lib.py import bkg, ip, cmd, ethtool +import time + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True) + return output[0]["rx"] + +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + try: + return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \ + '{xdp_queue_id} -z', ksft_wait=3) + except: + raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \ + 'Please consider adding iommu_platform=on ' \ + 'when setting up virtio-net-pci') + +def check_xdp_bind(cfg): + with setup_xsk(cfg): + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + +def check_rx_resize(cfg): + with setup_xsk(cfg): + rx_ring = _get_rx_ring_entries(cfg) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2)) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring)) + +def main(): + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run([check_xdp_bind, check_rx_resize], + args=(cfg, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index da5af2c680fa..d9c10613ae67 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -2,7 +2,7 @@ import time -from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: def __init__(self, env, port=None): @@ -23,24 +23,6 @@ class GenerateTraffic: self.stop(verbose=True) raise Exception("iperf3 traffic did not ramp up") - def run_remote_test(self, env: object, port=None, command=None): - if port is None: - port = rand_port() - try: - server_cmd = f"iperf3 -s 1 -p {port} --one-off" - with bkg(server_cmd, host=env.remote): - #iperf3 opens TCP connection as default in server - #-u to be specified in client command for UDP - wait_port_listen(port, host=env.remote) - except Exception as e: - raise Exception(f"Unexpected error occurred while running server command: {e}") - try: - client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}" - proc = cmd(client_cmd) - return proc - except Exception as e: - raise Exception(f"Unexpected error occurred while running client command: {e}") - def _wait_pkts(self, pkt_cnt=None, pps=None): """ Wait until we've seen pkt_cnt or until traffic ramps up to pps. diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py new file mode 100755 index 000000000000..356bac46ba04 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, NetNSEnter + +def test_napi_id(cfg) -> None: + port = rand_port() + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}" + + with bkg(listen_cmd, ksft_wait=3) as server: + cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True) + + ksft_eq(0, server.ret) + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_napi_id], args=(cfg,)) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c new file mode 100644 index 000000000000..eecd610c2109 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> + +#include "../../net/lib/ksft.h" + +int main(int argc, char *argv[]) +{ + struct sockaddr_in address; + unsigned int napi_id; + unsigned int port; + socklen_t optlen; + char buf[1024]; + int opt = 1; + int server; + int client; + int ret; + + server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (server < 0) { + perror("socket creation failed"); + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + port = atoi(argv[2]); + + if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { + perror("setsockopt"); + return 1; + } + + address.sin_family = AF_INET; + inet_pton(AF_INET, argv[1], &address.sin_addr); + address.sin_port = htons(port); + + if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { + perror("bind failed"); + return 1; + } + + if (listen(server, 1) < 0) { + perror("listen"); + return 1; + } + + ksft_ready(); + + client = accept(server, NULL, 0); + if (client < 0) { + perror("accept"); + return 1; + } + + optlen = sizeof(napi_id); + ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, + &optlen); + if (ret != 0) { + perror("getsockopt"); + return 1; + } + + read(client, buf, 1024); + + ksft_wait(); + + if (napi_id == 0) { + fprintf(stderr, "napi ID is 0\n"); + return 1; + } + + close(client); + close(server); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index af8df2313a3b..e0f114612c1a 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -50,6 +50,16 @@ def _test_tcp(cfg) -> None: cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) ksft_eq(nc.stdout.strip(), test_string) +def _schedule_checksum_reset(cfg, netnl) -> None: + features = ethtool(f"-k {cfg.ifname}", json=True) + setting = "" + for side in ["tx", "rx"]: + f = features[0][side + "-checksumming"] + if not f["fixed"]: + setting += " " + side + setting += " " + ("on" if f["requested"] or f["active"] else "off") + defer(ethtool, f" -K {cfg.ifname} " + setting) + def _set_offload_checksum(cfg, netnl, on) -> None: try: ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") @@ -139,6 +149,7 @@ def set_interface_init(cfg) -> None: def test_default_v4(cfg, netnl) -> None: cfg.require_ipver("4") + _schedule_checksum_reset(cfg, netnl) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) _test_tcp(cfg) @@ -149,6 +160,7 @@ def test_default_v4(cfg, netnl) -> None: def test_default_v6(cfg, netnl) -> None: cfg.require_ipver("6") + _schedule_checksum_reset(cfg, netnl) _set_offload_checksum(cfg, netnl, "off") _test_v6(cfg) _test_tcp(cfg) @@ -157,6 +169,7 @@ def test_default_v6(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_generic_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_generic_sb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -168,6 +181,7 @@ def test_xdp_generic_sb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_generic_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_generic_mb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -179,6 +193,7 @@ def test_xdp_generic_mb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_native_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_native_sb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -190,6 +205,7 @@ def test_xdp_native_sb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_native_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_native_mb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 06abd3f233e1..236005290a33 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'): def check_xsk(cfg, nl, xdp_queue_id=0) -> None: # Probe for support - xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False) + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) if xdp.ret == 255: raise KsftSkipEx('AF_XDP unsupported') elif xdp.ret > 0: raise KsftFailEx('unable to create AF_XDP socket') - with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', + with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', ksft_wait=3): rx = tx = False diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index f366cadbc5e8..4046131e7888 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -106,26 +106,16 @@ # | | # +-----------------------------------------------------------------------+ +. ./lib.sh + ERR=4 # Return 4 by default, which is the SKIP code for kselftest PING6="ping" PAUSE_ON_FAIL="no" -readonly NS0=$(mktemp -u ns0-XXXXXXXX) -readonly NS1=$(mktemp -u ns1-XXXXXXXX) -readonly NS2=$(mktemp -u ns2-XXXXXXXX) -readonly NS3=$(mktemp -u ns3-XXXXXXXX) - # Exit the script after having removed the network namespaces it created -# -# Parameters: -# -# * The list of network namespaces to delete before exiting. -# exit_cleanup() { - for ns in "$@"; do - ip netns delete "${ns}" 2>/dev/null || true - done + cleanup_all_ns if [ "${ERR}" -eq 4 ]; then echo "Error: Setting up the testing environment failed." >&2 @@ -140,17 +130,7 @@ exit_cleanup() # namespaces created by this script are deleted. create_namespaces() { - ip netns add "${NS0}" || exit_cleanup - ip netns add "${NS1}" || exit_cleanup "${NS0}" - ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" - ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" -} - -# The trap function handler -# -exit_cleanup_all() -{ - exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" + setup_ns NS0 NS1 NS2 NS3 || exit_cleanup } # Configure a network interface using a host route @@ -188,10 +168,6 @@ iface_config() # setup_underlay() { - for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do - ip -netns "${ns}" link set dev lo up - done; - ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" @@ -234,14 +210,6 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 - - # The intermediate namespaces don't have routes for the reverse path, - # as it will be handled by tc. So we need to ensure that rp_filter is - # not going to block the traffic. - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() @@ -521,13 +489,10 @@ done check_features -# Create namespaces before setting up the exit trap. -# Otherwise, exit_cleanup_all() could delete namespaces that were not created -# by this script. -create_namespaces - set -e -trap exit_cleanup_all EXIT +trap exit_cleanup EXIT + +create_namespaces setup_underlay setup_overlay_ipv4 diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 130d532b7e67..3cfef5153823 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y -CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c7cea556b416..5fbdd2a0b537 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -516,10 +516,7 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ "oif redirect to table" "oif no redirect to table" - # Enable forwarding and disable rp_filter as all the addresses are in - # the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" getnomatch="from $SRC_IP iif lo" fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ea6f886a210..a94b73a53f72 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1085,6 +1085,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -2531,9 +2560,6 @@ ipv4_mpath_list_test() run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2600,6 +2626,93 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2796,8 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e6a3e04fd83f..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index f84ab2e65754..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..18fd69d8d937 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,6 +1,7 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index d6f0e449c029..b13c89a99ecb 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -178,8 +178,6 @@ setup() else ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 701905eeff66..7962da06f816 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -217,6 +217,8 @@ setup_ns() return $ksft_skip fi ip -n "${!ns_name}" link set lo up + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") @@ -270,6 +272,30 @@ tc_rule_handle_stats_get() .options.actions[0].stats$selector" } +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + ret_set_ksft_status() { local ksft_status=$1; shift diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index 1ebc6187f421..bbc97d6bf556 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only csum +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index c22623b9a2a5..88c4bc461459 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) +TEST_GEN_FILES += xdp_helper TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..17dc34a612c6 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3cfad0fd4570..61287c203b6e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -3,6 +3,7 @@ import builtins import functools import inspect +import signal import sys import time import traceback @@ -26,6 +27,10 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + def ksft_pr(*objs, **kwargs): print("#", *objs, **kwargs) @@ -193,6 +198,17 @@ def ksft_setup(env): return env +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} print("TAP version 13") @@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for line in tb.strip().split('\n'): ksft_pr("Exception|", line) if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' @@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 8986c584cb37..6329ae805abf 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily): class RtnlFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), schema='', recv_size=recv_size) class RtnlAddrFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c index aeed25914104..eb025a9f35b1 100644 --- a/tools/testing/selftests/drivers/net/xdp_helper.c +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -11,55 +11,16 @@ #include <net/if.h> #include <inttypes.h> +#include "ksft.h" + #define UMEM_SZ (1U << 16) #define NUM_DESC (UMEM_SZ / 2048) -/* Move this to a common header when reused! */ -static void ksft_ready(void) -{ - const char msg[7] = "ready\n"; - char *env_str; - int fd; - - env_str = getenv("KSFT_READY_FD"); - if (env_str) { - fd = atoi(env_str); - if (!fd) { - fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", - env_str); - return; - } - } else { - fd = STDOUT_FILENO; - } - - write(fd, msg, sizeof(msg)); - if (fd != STDOUT_FILENO) - close(fd); -} -static void ksft_wait(void) +static void print_usage(const char *bin) { - char *env_str; - char byte; - int fd; - - env_str = getenv("KSFT_WAIT_FD"); - if (env_str) { - fd = atoi(env_str); - if (!fd) { - fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", - env_str); - return; - } - } else { - /* Not running in KSFT env, wait for input from STDIN instead */ - fd = STDIN_FILENO; - } - - read(fd, &byte, sizeof(byte)); - if (fd != STDIN_FILENO) - close(fd); + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); } /* this is a simple helper program that creates an XDP socket and does the @@ -77,12 +38,13 @@ int main(int argc, char **argv) struct sockaddr_xdp sxdp = { 0 }; int num_desc = NUM_DESC; void *umem_area; + int retry = 0; int ifindex; int sock_fd; int queue; - if (argc != 3) { - fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]); + if (argc != 3 && argc != 4) { + print_usage(argv[0]); return 1; } @@ -132,11 +94,29 @@ int main(int argc, char **argv) sxdp.sxdp_queue_id = queue; sxdp.sxdp_flags = 0; - if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { - munmap(umem_area, UMEM_SZ); - perror("bind failed"); - close(sock_fd); - return 1; + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } } ksft_ready(); diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index e7a75341f0f3..7a3cb4c09e45 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -225,6 +225,37 @@ chk_dump_one() fi } +chk_dump_subflow() +{ + local inet_diag_token + local subflow_line + local ss_output + local ss_token + local msg + + ss_output=$(ss -tniN $ns) + + subflow_line=$(echo "$ss_output" | \ + grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+') + + ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+') + + inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \ + grep -Eo 'token:[^ ]+') + + msg="....chk dump_subflow" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -316,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 chk_dump_one +chk_dump_subflow flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c83a8b47bbdf..ac1349c4b9e5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + /* glibc starts to support MPTCP since v2.42. + * For older versions, use IPPROTO_TCP to resolve, + * and use TCP/MPTCP to create socket. + * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 + */ + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, int infd, struct wstate *winfo) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c index 284286c524cf..e084796e804d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_diag.c +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -8,6 +8,7 @@ #include <sys/socket.h> #include <netinet/in.h> #include <linux/tcp.h> +#include <arpa/inet.h> #include <unistd.h> #include <stdlib.h> @@ -19,6 +20,15 @@ #define IPPROTO_MPTCP 262 #endif +#define parse_rtattr_nested(tb, max, rta) \ + (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \ + NLA_F_NESTED)) + +struct params { + __u32 target_token; + char subflow_addrs[1024]; +}; + struct mptcp_info { __u8 mptcpi_subflows; __u8 mptcpi_add_addr_signal; @@ -46,6 +56,37 @@ struct mptcp_info { __u32 mptcpi_last_ack_recv; }; +enum { + MPTCP_SUBFLOW_ATTR_UNSPEC, + MPTCP_SUBFLOW_ATTR_TOKEN_REM, + MPTCP_SUBFLOW_ATTR_TOKEN_LOC, + MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + MPTCP_SUBFLOW_ATTR_SSN_OFFSET, + MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + MPTCP_SUBFLOW_ATTR_FLAGS, + MPTCP_SUBFLOW_ATTR_ID_REM, + MPTCP_SUBFLOW_ATTR_ID_LOC, + MPTCP_SUBFLOW_ATTR_PAD, + + __MPTCP_SUBFLOW_ATTR_MAX +}; + +#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1) + +#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) +#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) +#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) +#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3) +#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4) +#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5) +#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6) +#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7) +#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8) + +#define rta_getattr(type, value) (*(type *)RTA_DATA(value)) + static void die_perror(const char *msg) { perror(msg); @@ -54,11 +95,13 @@ static void die_perror(const char *msg) static void die_usage(int r) { - fprintf(stderr, "Usage: mptcp_diag -t\n"); + fprintf(stderr, "Usage:\n" + "mptcp_diag -t <token>\n" + "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n"); exit(r); } -static void send_query(int fd, __u32 token) +static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK @@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token) .nlmsg_type = SOCK_DIAG_BY_FAMILY, .nlmsg_flags = NLM_F_REQUEST }, - .r = { - .sdiag_family = AF_INET, - /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ - .sdiag_protocol = IPPROTO_TCP, - .id.idiag_cookie[0] = token, - } + .r = *r }; struct rtattr rta_proto; struct iovec iov[6]; - int iovlen = 1; - __u32 proto; - - req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1)); - proto = IPPROTO_MPTCP; - rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; - rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + int iovlen = 0; - iov[0] = (struct iovec) { + iov[iovlen++] = (struct iovec) { .iov_base = &req, .iov_len = sizeof(req) }; - iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; - iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)}; - req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); - iovlen += 2; + + if (proto == IPPROTO_MPTCP) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + } + struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), @@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info) printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); } -static void parse_nlmsg(struct nlmsghdr *nlh) +/* + * 'print_subflow_info' is from 'mptcp_subflow_info' + * which is a function in 'misc/ss.c' of iproute2. + */ +static void print_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + printf("It's a mptcp subflow, the subflow info:\n"); + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + + if (flags) + printf(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + printf(" token:%04x(id:%u)/%04x(id:%u)", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + printf(" seq:%llu", + rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + printf(" sfseq:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + printf(" ssnoff:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + printf(" maplen:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); + printf("\n"); +} + +static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto) { struct inet_diag_msg *r = NLMSG_DATA(nlh); struct rtattr *tb[INET_DIAG_MAX + 1]; @@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh) nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), NLA_F_NESTED); - if (tb[INET_DIAG_INFO]) { + if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) { int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); struct mptcp_info *info; @@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh) } print_info_msg(info); } + if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + print_subflow_info(sfinfo); + } else { + printf("It's a normal TCP!\n"); + } + } } -static void recv_nlmsg(int fd, struct nlmsghdr *nlh) +static void recv_nlmsg(int fd, __u32 proto) { char rcv_buff[8192]; + struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff; struct sockaddr_nl rcv_nladdr = { .nl_family = AF_NETLINK }; @@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh) int len; len = recvmsg(fd, &rcv_msg, 0); - nlh = (struct nlmsghdr *)rcv_buff; while (NLMSG_OK(nlh, len)) { if (nlh->nlmsg_type == NLMSG_DONE) { @@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh) -(err->error), strerror(-(err->error))); break; } - parse_nlmsg(nlh); + parse_nlmsg(nlh, proto); nlh = NLMSG_NEXT(nlh, len); } } static void get_mptcpinfo(__u32 token) { - struct nlmsghdr *nlh = NULL; + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = token, + }; + __u32 proto = IPPROTO_MPTCP; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); if (fd < 0) die_perror("Netlink socket"); - send_query(fd, token); - recv_nlmsg(fd, nlh); + send_query(fd, &r, proto); + recv_nlmsg(fd, proto); close(fd); } -static void parse_opts(int argc, char **argv, __u32 *target_token) +static void get_subflow_info(char *subflow_addrs) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE, + .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE, + }; + char saddr[64], daddr[64]; + int sport, dport; + int ret; + int fd; + + ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport); + if (ret != 4) + die_perror("IP PORT Pairs has style problems!"); + + printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + r.id.idiag_sport = htons(sport); + r.id.idiag_dport = htons(dport); + + inet_pton(AF_INET, saddr, &r.id.idiag_src); + inet_pton(AF_INET, daddr, &r.id.idiag_dst); + send_query(fd, &r, IPPROTO_TCP); + recv_nlmsg(fd, IPPROTO_TCP); +} + +static void parse_opts(int argc, char **argv, struct params *p) { int c; if (argc < 2) die_usage(1); - while ((c = getopt(argc, argv, "ht:")) != -1) { + while ((c = getopt(argc, argv, "ht:s:")) != -1) { switch (c) { case 'h': die_usage(0); break; case 't': - sscanf(optarg, "%x", target_token); + sscanf(optarg, "%x", &p->target_token); + break; + case 's': + strncpy(p->subflow_addrs, optarg, + sizeof(p->subflow_addrs) - 1); break; default: die_usage(1); @@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token) int main(int argc, char *argv[]) { - __u32 target_token; + struct params p = { 0 }; + + parse_opts(argc, argv, &p); + + if (p.target_token) + get_mptcpinfo(p.target_token); - parse_opts(argc, argv, &target_token); - get_mptcpinfo(target_token); + if (p.subflow_addrs[0] != '\0') + get_subflow_info(p.subflow_addrs); return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 218aac467321..3cf1e2a612ce 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index befa66f5a366..b8af65373b3a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -62,6 +62,7 @@ unset sflags unset fastclose unset fullmesh unset speed +unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 unset join_fail_nr @@ -1403,6 +1404,7 @@ chk_join_nr() local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 + local syn_rej=${join_syn_rej:-0} local csum_ns1=${join_csum_ns1:-0} local csum_ns2=${join_csum_ns2:-0} local fail_nr=${join_fail_nr:-0} @@ -1468,6 +1470,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack HMAC failure expected 0" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_rej" ]; then + rc=${KSFT_FAIL} + print_check "syn rejected" + fail_test "got $count JOIN[s] syn rejected expected $syn_rej" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ @@ -1963,7 +1974,8 @@ subflows_tests() pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # subflow @@ -1992,7 +2004,8 @@ subflows_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 2 1 + join_syn_rej=1 \ + chk_join_nr 2 2 1 fi # single subflow, dev @@ -3061,7 +3074,8 @@ syncookies_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 1 1 + join_syn_rej=1 \ + chk_join_nr 2 1 1 fi # test signal address with cookies @@ -3545,7 +3559,8 @@ userspace_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # userspace pm type does not send join @@ -3568,7 +3583,8 @@ userspace_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 chk_prio_nr 0 0 0 0 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 051e289d7967..55212188871e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -331,12 +331,15 @@ mptcp_lib_result_print_all_tap() { # get the value of keyword $1 in the line marked by keyword $2 mptcp_lib_get_info_value() { - grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + grep "${2}" 2>/dev/null | + sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + # the ';q' at the end limits to the first matched entry. } # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," + grep "${4:-}" "${2}" 2>/dev/null | + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } # $1: PID @@ -476,8 +479,6 @@ mptcp_lib_ns_init() { local netns for netns in "${@}"; do ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 926b0be87c99..9934a68df237 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ffe161fac8b5..3bdcbbdba925 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh +TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh TEST_PROGS += conntrack_reverse_clash.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index 1559ba275105..011de8763094 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -60,9 +60,6 @@ bcast_ping() done } -ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 - if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh index 2549b6590693..ea76f2bc2f59 100755 --- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -22,8 +22,6 @@ trap cleanup EXIT setup_ns nsbr ns1 ns2 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 43d8b500d391..363646f4fefe 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_MARK=y diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh new file mode 100755 index 000000000000..9e033e80219e --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -0,0 +1,427 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft tool" + +init_net_max=0 +ct_buckets=0 +tmpfile="" +tmpfile_proc="" +tmpfile_uniq="" +ret=0 + +insert_count=2000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 + +modprobe -q nf_conntrack +if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then + echo "SKIP: conntrack sysctls not available" + exit $KSFT_SKIP +fi + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 +ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 + +cleanup() { + cleanup_all_ns + + rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq" + + # restore original sysctl setting + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets +} +trap cleanup EXIT + +check_max_alias() +{ + local expected="$1" + # old name, expected to alias to the first, i.e. changing one + # changes the other as well. + local lv=$(sysctl -n net.nf_conntrack_max) + + if [ $expected -ne "$lv" ];then + echo "nf_conntrack_max sysctls should have identical values" + exit 1 + fi +} + +insert_ctnetlink() { + local ns="$1" + local count="$2" + local i=0 + local bulk=16 + + while [ $i -lt $count ] ;do + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ + return;\ + fi & \ + done ; wait" 2>/dev/null + + i=$((i+bulk)) + done +} + +check_ctcount() { + local ns="$1" + local count="$2" + local msg="$3" + + local now=$(ip netns exec "$ns" conntrack -C) + + if [ $now -ne "$count" ] ;then + echo "expected $count entries in $ns, not $now: $msg" + exit 1 + fi + + echo "PASS: got $count connections: $msg" +} + +ctresize() { + local duration="$1" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM + now=$(date +%s) + done +} + +do_rsleep() { + local limit="$1" + local r=$RANDOM + + r=$((r%limit)) + sleep "$r" +} + +ct_flush_once() { + local ns="$1" + + ip netns exec "$ns" conntrack -F 2>/dev/null +} + +ctflush() { + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + do_rsleep "$duration" + + while [ $now -lt $end ]; do + ct_flush_once "$ns" + do_rsleep "$duration" + now=$(date +%s) + done +} + +ctflood() +{ + local ns="$1" + local duration="$2" + local msg="$3" + local now=$(date +%s) + local end=$((now + duration)) + local j=0 + local k=0 + + while [ $now -lt $end ]; do + j=$((j%256)) + k=$((k%256)) + + ip netns exec "$ns" bash -c \ + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 + + j=$((j+1)) + + if [ $j -eq 256 ];then + k=$((k+1)) + fi + + now=$(date +%s) + done + + wait +} + +# dump to /dev/null. We don't want dumps to cause infinite loops +# or use-after-free even when conntrack table is altered while dumps +# are in progress. +ct_nulldump() +{ + local ns="$1" + + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & + + # Don't require /proc support in conntrack + if [ -r /proc/self/net/nf_conntrack ] ; then + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & + fi + + wait +} + +check_taint() +{ + local tainted_then="$1" + local msg="$2" + + local tainted_now=0 + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + exit 1 + fi +} + +insert_flood() +{ + local n="$1" + local r=0 + + r=$((RANDOM%$insert_count)) + + ctflood "$n" "$timeout" "floodresize" & + insert_ctnetlink "$n" "$r" & + ctflush "$n" "$timeout" & + ct_nulldump "$n" & + + wait +} + +test_floodresize_all() +{ + local timeout=20 + local n="" + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + for n in "$nsclient1" "$nsclient2";do + insert_flood "$n" & + done + + # resize table constantly while flood/insert/dump/flushs + # are happening in parallel. + ctresize "$timeout" + + # wait for subshells to complete, everything is limited + # by $timeout. + wait + + check_taint "$tainted_then" "resize+flood" +} + +check_dump() +{ + local ns="$1" + local protoname="$2" + local c=0 + local proto=0 + local proc=0 + local unique="" + local lret=0 + + # NOTE: assumes timeouts are large enough to not have + # expirations in all following tests. + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l) + c=$(ip netns exec "$ns" conntrack -C) + + if [ "$c" -eq 0 ]; then + echo "FAIL: conntrack count for $ns is 0" + lret=1 + fi + + if [ "$c" -ne "$l" ]; then + echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l" + lret=1 + fi + + # check the dump we retrieved is free of duplicated entries. + unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$unique" ]; then + echo "FAIL: listing contained redundant entries for $ns: $l != $unique" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proto" ]; then + echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + if [ -r /proc/self/net/nf_conntrack ] ; then + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l") + + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency for $ns: $l != $proc" + lret=1 + fi + + proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" + diff -u "$tmpfile_proc" "$tmpfile_uniq" + lret=1 + fi + fi + + if [ $lret -eq 0 ];then + echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" + else + echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)" + ret=1 + fi +} + +test_dump_all() +{ + local timeout=3 + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 + + ctflood "$nsclient1" $timeout "dumpall" & + insert_ctnetlink "$nsclient2" $insert_count + + wait + + check_dump "$nsclient1" "icmp" + check_dump "$nsclient2" "udp" + + check_taint "$tainted_then" "test parallel conntrack dumps" +} + +check_sysctl_immutable() +{ + local ns="$1" + local name="$2" + local failhard="$3" + local o=0 + local n=0 + + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + n=$((o+1)) + + # return value isn't reliable, need to read it back + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null + + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + + [ -z "$n" ] && return 1 + + if [ $o -ne $n ]; then + if [ $failhard -gt 0 ] ;then + echo "FAIL: net.$name should not be changeable from namespace (now $n)" + ret=1 + fi + return 0 + fi + + return 1 +} + +test_conntrack_max_limit() +{ + sysctl -q net.netfilter.nf_conntrack_max=100 + insert_ctnetlink "$nsclient1" 101 + + # check netns is clamped by init_net, i.e., either netns follows + # init_net value, or a higher pernet limit (compared to init_net) is ignored. + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" + + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +} + +test_conntrack_disable() +{ + local timeout=2 + + # disable conntrack pickups + ip netns exec "$nsclient1" nft flush table ip test_ct + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ctflood "$nsclient1" "$timeout" "conntrack disable" + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 + + # Disabled, should not have picked up any connection. + check_ctcount "$nsclient1" 0 "conntrack disabled" + + # This one is still active, expect 1 connection. + check_ctcount "$nsclient2" 1 "conntrack enabled" +} + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) + +check_max_alias $init_net_max + +sysctl -q net.netfilter.nf_conntrack_max="262000" +check_max_alias 262000 + +setup_ns nsclient1 nsclient2 + +# check this only works from init_net +for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do + check_sysctl_immutable "$nsclient1" "net.$n" 1 +done + +# won't work on older kernels. If it works, check that the netns obeys the limit +if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then + # subtest: if pernet is changeable, check that reducing it in pernet + # limits the pernet entries. Inverse, pernet clamped by a lower init_net + # setting, is already checked by "test_conntrack_max_limit" test. + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 + insert_ctnetlink "$nsclient1" 2 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +fi + +for n in "$nsclient1" "$nsclient2";do +# enable conntrack in both namespaces +ip netns exec "$n" nft -f - <<EOF +table ip test_ct { + chain input { + type filter hook input priority 0 + ct state new counter + } +} +EOF +done + +tmpfile=$(mktemp) +tmpfile_proc=$(mktemp) +tmpfile_uniq=$(mktemp) +test_conntrack_max_limit +test_dump_all +test_floodresize_all +test_conntrack_disable + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index e95ecb37c2b1..025b58f2ae91 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -52,9 +52,6 @@ trap cleanup EXIT setup_ns ns0 ns1 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index d3edb16cd4b3..6af2ea3ad6b8 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -129,9 +129,6 @@ test_dr() { # avoid incorrect arp response ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - # avoid reverse route lookup - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service @@ -167,9 +164,6 @@ test_tun() { ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh index ce1451c275fd..82780b39277c 100755 --- a/tools/testing/selftests/net/netfilter/nft_fib.sh +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -45,6 +45,19 @@ table inet filter { EOF } +load_input_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + load_pbr_ruleset() { local netns=$1 @@ -154,8 +167,6 @@ test_ping() { ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null test_ping 10.0.2.1 dead:2::1 || exit 1 check_drops || exit 1 @@ -165,6 +176,16 @@ check_drops || exit 1 echo "PASS: fib expression did not cause unwanted packet drops" +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.1.99 dead:1::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not discard loopback packets" + ip netns exec "$nsrouter" nft flush table inet filter ip -net "$ns1" route del default diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh index 3b81d88bdde3..9f200f80253a 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 echo netns exec "$gw" ip link set "veth$i" up echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 # clients have same IP addresses. echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 @@ -178,7 +177,6 @@ fi ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null # useful for debugging: allows to use 'ping' from clients to gateway. ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 86ec4e68594d..24ad41d526d9 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -1,8 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# return code to signal skipped test -ksft_skip=4 +source lib.sh # search for legacy iptables (it uses the xtables extensions if iptables-legacy --version >/dev/null 2>&1; then @@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then exit $ksft_skip fi -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -trap "ip netns del $ns1; ip netns del $ns2" EXIT - -# create two netns, disable rp_filter in ns2 and -# keep IPv6 address when moving into VRF -ip netns add "$ns1" -ip netns add "$ns2" -ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +trap cleanup_all_ns EXIT + +# create two netns, keep IPv6 address when moving into VRF +setup_ns ns1 ns2 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 # a standard connection between the netns, should not trigger rp filter diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore new file mode 100644 index 000000000000..ee44c081ca7c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile new file mode 100644 index 000000000000..2d102878cb6d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS = -I/usr/include/libnl3 +endif +CFLAGS += $(VAR_CFLAGS) + + +LDLIBS = -lmbedtls -lmbedcrypto +VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(VAR_LDLIBS) + + +TEST_FILES = common.sh + +TEST_PROGS = test.sh \ + test-chachapoly.sh \ + test-tcp.sh \ + test-float.sh \ + test-close-socket.sh \ + test-close-socket-tcp.sh + +TEST_GEN_FILES := ovpn-cli + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh new file mode 100644 index 000000000000..7502292a1ee0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} +TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} +OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +ALG=${ALG:-aes} +PROTO=${PROTO:-UDP} +FLOAT=${FLOAT:-0} + +create_ns() { + ip netns add peer${1} +} + +setup_ns() { + MODE="P2P" + + if [ ${1} -eq 0 ]; then + MODE="MP" + for p in $(seq 1 ${NUM_PEERS}); do + ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} + + ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} + ip -n peer0 link set veth${p} up + + ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} link set veth${p} up + done + fi + + ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE + ip -n peer${1} addr add ${2} dev tun${1} + ip -n peer${1} link set tun${1} up +} + +add_peer() { + if [ "${PROTO}" == "UDP" ]; then + if [ ${1} -eq 0 ]; then + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE} + + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ + data64.key + done + else + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} 1 10.10.${1}.1 1 + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \ + data64.key + fi + else + if [ ${1} -eq 0 ]; then + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && { + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ + ${ALG} 0 data64.key + done + }) & + sleep 5 + else + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \ + data64.key + fi + fi +} + +cleanup() { + # some ovpn-cli processes sleep in background so they need manual poking + killall $(basename ${OVPN_CLI}) 2>/dev/null || true + + # netns peer0 is deleted without erasing ifaces first + for p in $(seq 1 10); do + ip -n peer${p} link set tun${p} down 2>/dev/null || true + ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true + done + for p in $(seq 1 10); do + ip -n peer0 link del veth${p} 2>/dev/null || true + done + for p in $(seq 0 10); do + ip netns del peer${p} 2>/dev/null || true + done +} + +if [ "${PROTO}" == "UDP" ]; then + NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} +else + NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} +fi + + diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config new file mode 100644 index 000000000000..71946ba9fa17 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/config @@ -0,0 +1,10 @@ +CONFIG_NET=y +CONFIG_INET=y +CONFIG_STREAM_PARSER=y +CONFIG_NET_UDP_TUNNEL=y +CONFIG_DST_CACHE=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_OVPN=m diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key new file mode 100644 index 000000000000..a99e88c4e290 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -0,0 +1,5 @@ +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B +ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9 +uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6 +KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE +BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c new file mode 100644 index 000000000000..69e41fc07fbc --- /dev/null +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -0,0 +1,2376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel accelerator + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <stdio.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <time.h> + +#include <linux/ovpn.h> +#include <linux/types.h> +#include <linux/netlink.h> + +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <mbedtls/base64.h> +#include <mbedtls/error.h> + +#include <sys/socket.h> + +/* defines to make checkpatch happy */ +#define strscpy strncpy +#define __always_unused __attribute__((__unused__)) + +/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we + * have to explicitly do it to prevent the kernel from failing upon + * parsing of the message + */ +#define nla_nest_start(_msg, _type) \ + nla_nest_start(_msg, (_type) | NLA_F_NESTED) + +/* libnl < 3.11.0 does not implement nla_get_uint() */ +uint64_t ovpn_nla_get_uint(struct nlattr *attr) +{ + if (nla_len(attr) == sizeof(uint32_t)) + return nla_get_u32(attr); + else + return nla_get_u64(attr); +} + +typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg); + +enum ovpn_key_direction { + KEY_DIR_IN = 0, + KEY_DIR_OUT, +}; + +#define KEY_LEN (256 / 8) +#define NONCE_LEN 8 + +#define PEER_ID_UNDEF 0x00FFFFFF +#define MAX_PEERS 10 + +struct nl_ctx { + struct nl_sock *nl_sock; + struct nl_msg *nl_msg; + struct nl_cb *nl_cb; + + int ovpn_dco_id; +}; + +enum ovpn_cmd { + CMD_INVALID, + CMD_NEW_IFACE, + CMD_DEL_IFACE, + CMD_LISTEN, + CMD_CONNECT, + CMD_NEW_PEER, + CMD_NEW_MULTI_PEER, + CMD_SET_PEER, + CMD_DEL_PEER, + CMD_GET_PEER, + CMD_NEW_KEY, + CMD_DEL_KEY, + CMD_GET_KEY, + CMD_SWAP_KEYS, + CMD_LISTEN_MCAST, +}; + +struct ovpn_ctx { + enum ovpn_cmd cmd; + + __u8 key_enc[KEY_LEN]; + __u8 key_dec[KEY_LEN]; + __u8 nonce[NONCE_LEN]; + + enum ovpn_cipher_alg cipher; + + sa_family_t sa_family; + + unsigned long peer_id; + unsigned long lport; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } remote; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } peer_ip; + + bool peer_ip_set; + + unsigned int ifindex; + char ifname[IFNAMSIZ]; + enum ovpn_mode mode; + bool mode_set; + + int socket; + int cli_sockets[MAX_PEERS]; + + __u32 keepalive_interval; + __u32 keepalive_timeout; + + enum ovpn_key_direction key_dir; + enum ovpn_key_slot key_slot; + int key_id; + + const char *peers_file; +}; + +static int ovpn_nl_recvmsgs(struct nl_ctx *ctx) +{ + int ret; + + ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb); + + switch (ret) { + case -NLE_INTR: + fprintf(stderr, + "netlink received interrupt due to signal - ignoring\n"); + break; + case -NLE_NOMEM: + fprintf(stderr, "netlink out of memory error\n"); + break; + case -NLE_AGAIN: + fprintf(stderr, + "netlink reports blocking read - aborting wait\n"); + break; + default: + if (ret) + fprintf(stderr, "netlink reports error (%d): %s\n", + ret, nl_geterror(-ret)); + break; + } + + return ret; +} + +static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd, + int flags) +{ + struct nl_ctx *ctx; + int err, ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->nl_sock = nl_socket_alloc(); + if (!ctx->nl_sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192); + + ret = genl_connect(ctx->nl_sock); + if (ret) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_sock; + } + + /* enable Extended ACK for detailed error reporting */ + err = 1; + setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK, + &err, sizeof(err)); + + ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME); + if (ctx->ovpn_dco_id < 0) { + fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n", + ctx->ovpn_dco_id); + goto err_free; + } + + ctx->nl_msg = nlmsg_alloc(); + if (!ctx->nl_msg) { + fprintf(stderr, "cannot allocate netlink message\n"); + goto err_sock; + } + + ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!ctx->nl_cb) { + fprintf(stderr, "failed to allocate netlink callback\n"); + goto err_msg; + } + + nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb); + + genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0); + + if (ovpn->ifindex > 0) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex); + + return ctx; +nla_put_failure: +err_msg: + nlmsg_free(ctx->nl_msg); +err_sock: + nl_socket_free(ctx->nl_sock); +err_free: + free(ctx); + return NULL; +} + +static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd) +{ + return nl_ctx_alloc_flags(ovpn, cmd, 0); +} + +static void nl_ctx_free(struct nl_ctx *ctx) +{ + if (!ctx) + return; + + nl_socket_free(ctx->nl_sock); + nlmsg_free(ctx->nl_msg); + nl_cb_put(ctx->nl_cb); + free(ctx); +} + +static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1; + struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1]; + int len = nlh->nlmsg_len; + struct nlattr *attrs; + int *ret = arg; + int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh); + + *ret = err->error; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return NL_STOP; + + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + ack_len += err->msg.nlmsg_len - sizeof(*nlh); + + if (len <= ack_len) + return NL_STOP; + + attrs = (void *)((uint8_t *)nlh + ack_len); + len -= ack_len; + + nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL); + if (tb_msg[NLMSGERR_ATTR_MSG]) { + len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]), + nla_len(tb_msg[NLMSGERR_ATTR_MSG])); + fprintf(stderr, "kernel error: %*s\n", len, + (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) { + fprintf(stderr, "missing required nesting type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) { + fprintf(stderr, "missing required attribute type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE])); + } + + return NL_STOP; +} + +static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_SKIP; +} + +static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_STOP; +} + +static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb) +{ + int status = 1; + + nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status); + nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, + &status); + nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status); + + if (cb) + nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx); + + nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg); + + while (status == 1) + ovpn_nl_recvmsgs(ctx); + + if (status < 0) + fprintf(stderr, "failed to send netlink message: %s (%d)\n", + strerror(-status), status); + + return status; +} + +static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx) +{ + int idx_enc, idx_dec, ret = -1; + unsigned char *ckey = NULL; + __u8 *bkey = NULL; + size_t olen = 0; + long ckey_len; + FILE *fp; + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "cannot open: %s\n", file); + return -1; + } + + /* get file size */ + fseek(fp, 0L, SEEK_END); + ckey_len = ftell(fp); + rewind(fp); + + /* if the file is longer, let's just read a portion */ + if (ckey_len > 256) + ckey_len = 256; + + ckey = malloc(ckey_len); + if (!ckey) + goto err; + + ret = fread(ckey, 1, ckey_len, fp); + if (ret != ckey_len) { + fprintf(stderr, + "couldn't read enough data from key file: %dbytes read\n", + ret); + goto err; + } + + olen = 0; + ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len); + if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf, + ret); + + goto err; + } + + bkey = malloc(olen); + if (!bkey) { + fprintf(stderr, "cannot allocate binary key buffer\n"); + goto err; + } + + ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len); + if (ret) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf, + ret); + + goto err; + } + + if (olen < 2 * KEY_LEN + NONCE_LEN) { + fprintf(stderr, + "not enough data in key file, found %zdB but needs %dB\n", + olen, 2 * KEY_LEN + NONCE_LEN); + goto err; + } + + switch (ctx->key_dir) { + case KEY_DIR_IN: + idx_enc = 0; + idx_dec = 1; + break; + case KEY_DIR_OUT: + idx_enc = 1; + idx_dec = 0; + break; + default: + goto err; + } + + memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN); + memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN); + memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN); + + ret = 0; + +err: + fclose(fp); + free(bkey); + free(ckey); + + return ret; +} + +static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx) +{ + if (strcmp(cipher, "aes") == 0) + ctx->cipher = OVPN_CIPHER_ALG_AES_GCM; + else if (strcmp(cipher, "chachapoly") == 0) + ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else if (strcmp(cipher, "none") == 0) + ctx->cipher = OVPN_CIPHER_ALG_NONE; + else + return -ENOTSUP; + + return 0; +} + +static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx) +{ + int in_dir; + + in_dir = strtoll(dir, NULL, 10); + switch (in_dir) { + case KEY_DIR_IN: + case KEY_DIR_OUT: + ctx->key_dir = in_dir; + break; + default: + fprintf(stderr, + "invalid key direction provided. Can be 0 or 1 only\n"); + return -1; + } + + return 0; +} + +static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) +{ + struct sockaddr_storage local_sock = { 0 }; + struct sockaddr_in6 *in6; + struct sockaddr_in *in; + int ret, s, sock_type; + size_t sock_len; + + if (proto == IPPROTO_UDP) + sock_type = SOCK_DGRAM; + else if (proto == IPPROTO_TCP) + sock_type = SOCK_STREAM; + else + return -EINVAL; + + s = socket(family, sock_type, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (family) { + case AF_INET: + in = (struct sockaddr_in *)&local_sock; + in->sin_family = family; + in->sin_port = htons(ctx->lport); + in->sin_addr.s_addr = htonl(INADDR_ANY); + sock_len = sizeof(*in); + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&local_sock; + in6->sin6_family = family; + in6->sin6_port = htons(ctx->lport); + in6->sin6_addr = in6addr_any; + sock_len = sizeof(*in6); + break; + default: + return -1; + } + + int opt = 1; + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + if (ret < 0) { + perror("setsockopt for SO_REUSEADDR"); + return ret; + } + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret < 0) { + perror("setsockopt for SO_REUSEPORT"); + return ret; + } + + if (family == AF_INET6) { + opt = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, + sizeof(opt))) { + perror("failed to set IPV6_V6ONLY"); + return -1; + } + } + + ret = bind(s, (struct sockaddr *)&local_sock, sock_len); + if (ret < 0) { + perror("cannot bind socket"); + goto err_socket; + } + + ctx->socket = s; + ctx->sa_family = family; + return 0; + +err_socket: + close(s); + return -1; +} + +static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family) +{ + return ovpn_socket(ctx, family, IPPROTO_UDP); +} + +static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family) +{ + int ret; + + ret = ovpn_socket(ctx, family, IPPROTO_TCP); + if (ret < 0) + return ret; + + ret = listen(ctx->socket, 10); + if (ret < 0) { + perror("listen"); + close(ctx->socket); + return -1; + } + + return 0; +} + +static int ovpn_accept(struct ovpn_ctx *ctx) +{ + socklen_t socklen; + int ret; + + socklen = sizeof(ctx->remote); + ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen); + if (ret < 0) { + perror("accept"); + goto err; + } + + fprintf(stderr, "Connection received!\n"); + + switch (socklen) { + case sizeof(struct sockaddr_in): + case sizeof(struct sockaddr_in6): + break; + default: + fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n"); + close(ret); + ret = -EINVAL; + goto err; + } + + return ret; +err: + close(ctx->socket); + return ret; +} + +static int ovpn_connect(struct ovpn_ctx *ovpn) +{ + socklen_t socklen; + int s, ret; + + s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + socklen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + socklen = sizeof(struct sockaddr_in6); + break; + default: + return -EOPNOTSUPP; + } + + ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen); + if (ret < 0) { + perror("connect"); + goto err; + } + + fprintf(stderr, "connected\n"); + + ovpn->socket = s; + + return 0; +err: + close(s); + return ret; +} + +static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); + + if (!is_tcp) { + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4, + ovpn->remote.in4.sin_addr.s_addr); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in4.sin_port); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6, + sizeof(ovpn->remote.in6.sin6_addr), + &ovpn->remote.in6.sin6_addr); + NLA_PUT_U32(ctx->nl_msg, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + ovpn->remote.in6.sin6_scope_id); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in6.sin6_port); + break; + default: + fprintf(stderr, + "Invalid family for remote socket address\n"); + goto nla_put_failure; + } + } + + if (ovpn->peer_ip_set) { + switch (ovpn->peer_ip.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4, + ovpn->peer_ip.in4.sin_addr.s_addr); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6, + sizeof(struct in6_addr), + &ovpn->peer_ip.in6.sin6_addr); + break; + default: + fprintf(stderr, "Invalid family for peer address\n"); + goto nla_put_failure; + } + } + + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_set_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL, + ovpn->keepalive_interval); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + ovpn->keepalive_timeout); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *pattrs[OVPN_A_PEER_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + __u16 rport = 0, lport = 0; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_PEER]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]), + nla_len(attrs[OVPN_A_PEER]), NULL); + + if (pattrs[OVPN_A_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_ID])); + + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) + fprintf(stderr, "\tsocket NetNS ID: %d\n", + nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); + + if (pattrs[OVPN_A_PEER_VPN_IPV4]) { + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv4: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_VPN_IPV6]) { + char buf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv6: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_LOCAL_PORT]) + lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_PORT]) + rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6]; + char buf[INET6_ADDRSTRLEN]; + int scope_id = -1; + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]; + + scope_id = nla_get_u32(p); + } + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport, + scope_id); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6]; + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4]; + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) { + void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4]; + + inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) { + void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]; + + fprintf(stderr, "\tKeepalive interval: %u sec\n", + nla_get_u32(p)); + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) + fprintf(stderr, "\tKeepalive timeout: %u sec\n", + nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])); + + if (pattrs[OVPN_A_PEER_VPN_RX_BYTES]) + fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_TX_BYTES]) + fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS]) + fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS]) + fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_RX_BYTES]) + fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_TX_BYTES]) + fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS]) + fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS]) + fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS])); + + return NL_SKIP; +} + +static int ovpn_get_peer(struct ovpn_ctx *ovpn) +{ + int flags = 0, ret = -1; + struct nlattr *attr; + struct nl_ctx *ctx; + + if (ovpn->peer_id == PEER_ID_UNDEF) + flags = NLM_F_DUMP; + + ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags); + if (!ctx) + return -ENOMEM; + + if (ovpn->peer_id != PEER_ID_UNDEF) { + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + } + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_new_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf, *key_dir; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_KEYCONF]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]), + nla_len(attrs[OVPN_A_KEYCONF]), NULL); + + if (kattrs[OVPN_A_KEYCONF_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID])); + if (kattrs[OVPN_A_KEYCONF_SLOT]) { + fprintf(stderr, "\t- Slot: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) { + case OVPN_KEY_SLOT_PRIMARY: + fprintf(stderr, "primary\n"); + break; + case OVPN_KEY_SLOT_SECONDARY: + fprintf(stderr, "secondary\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])); + break; + } + } + if (kattrs[OVPN_A_KEYCONF_KEY_ID]) + fprintf(stderr, "\t- Key ID: %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID])); + if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) { + fprintf(stderr, "\t- Cipher: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) { + case OVPN_CIPHER_ALG_NONE: + fprintf(stderr, "none\n"); + break; + case OVPN_CIPHER_ALG_AES_GCM: + fprintf(stderr, "aes-gcm\n"); + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + fprintf(stderr, "chacha20poly1305\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])); + break; + } + } + + return NL_SKIP; +} + +static int ovpn_get_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_key); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_swap_keys(struct ovpn_ctx *ovpn) +{ + struct nl_ctx *ctx; + struct nlattr *kc; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP); + if (!ctx) + return -ENOMEM; + + kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, kc); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +/* Helper function used to easily add attributes to a rtnl message */ +static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) { + fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n", + __func__, maxlen); + return -EMSGSIZE; + } + + rta = nlmsg_tail(n); + rta->rta_type = type; + rta->rta_len = len; + + if (!data) + memset(RTA_DATA(rta), 0, alen); + else + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return 0; +} + +static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size, + int attr) +{ + struct rtattr *nest = nlmsg_tail(msg); + + if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0) + return NULL; + + return nest; +} + +static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest; +} + +#define RT_SNDBUF_SIZE (1024 * 2) +#define RT_RCVBUF_SIZE (1024 * 4) + +/* Open RTNL socket */ +static int ovpn_rt_socket(void) +{ + int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + fprintf(stderr, "%s: cannot open netlink socket\n", __func__); + return fd; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)) < 0) { + fprintf(stderr, "%s: SO_SNDBUF\n", __func__); + close(fd); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) < 0) { + fprintf(stderr, "%s: SO_RCVBUF\n", __func__); + close(fd); + return -1; + } + + return fd; +} + +/* Bind socket to Netlink subsystem */ +static int ovpn_rt_bind(int fd, uint32_t groups) +{ + struct sockaddr_nl local = { 0 }; + socklen_t addr_len; + + local.nl_family = AF_NETLINK; + local.nl_groups = groups; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { + fprintf(stderr, "%s: cannot bind netlink socket: %d\n", + __func__, errno); + return -errno; + } + + addr_len = sizeof(local); + if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) { + fprintf(stderr, "%s: cannot getsockname: %d\n", __func__, + errno); + return -errno; + } + + if (addr_len != sizeof(local)) { + fprintf(stderr, "%s: wrong address length %d\n", __func__, + addr_len); + return -EINVAL; + } + + if (local.nl_family != AF_NETLINK) { + fprintf(stderr, "%s: wrong address family %d\n", __func__, + local.nl_family); + return -EINVAL; + } + + return 0; +} + +typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg); + +/* Send Netlink message and run callback on reply (if specified) */ +static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer, + unsigned int groups, ovpn_parse_reply_cb cb, + void *arg_cb) +{ + int len, rem_len, fd, ret, rcv_len; + struct sockaddr_nl nladdr = { 0 }; + struct nlmsgerr *err; + struct nlmsghdr *h; + char buf[1024 * 16]; + struct iovec iov = { + .iov_base = payload, + .iov_len = payload->nlmsg_len, + }; + struct msghdr nlmsg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = peer; + nladdr.nl_groups = groups; + + payload->nlmsg_seq = time(NULL); + + /* no need to send reply */ + if (!cb) + payload->nlmsg_flags |= NLM_F_ACK; + + fd = ovpn_rt_socket(); + if (fd < 0) { + fprintf(stderr, "%s: can't open rtnl socket\n", __func__); + return -errno; + } + + ret = ovpn_rt_bind(fd, 0); + if (ret < 0) { + fprintf(stderr, "%s: can't bind rtnl socket\n", __func__); + ret = -errno; + goto out; + } + + ret = sendmsg(fd, &nlmsg, 0); + if (ret < 0) { + fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__); + ret = -errno; + goto out; + } + + /* prepare buffer to store RTNL replies */ + memset(buf, 0, sizeof(buf)); + iov.iov_base = buf; + + while (1) { + /* + * iov_len is modified by recvmsg(), therefore has to be initialized before + * using it again + */ + iov.iov_len = sizeof(buf); + rcv_len = recvmsg(fd, &nlmsg, 0); + if (rcv_len < 0) { + if (errno == EINTR || errno == EAGAIN) { + fprintf(stderr, "%s: interrupted call\n", + __func__); + continue; + } + fprintf(stderr, "%s: rtnl: error on recvmsg()\n", + __func__); + ret = -errno; + goto out; + } + + if (rcv_len == 0) { + fprintf(stderr, + "%s: rtnl: socket reached unexpected EOF\n", + __func__); + ret = -EIO; + goto out; + } + + if (nlmsg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "%s: sender address length: %u (expected %zu)\n", + __func__, nlmsg.msg_namelen, sizeof(nladdr)); + ret = -EIO; + goto out; + } + + h = (struct nlmsghdr *)buf; + while (rcv_len >= (int)sizeof(*h)) { + len = h->nlmsg_len; + rem_len = len - sizeof(*h); + + if (rem_len < 0 || len > rcv_len) { + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: truncated message\n", + __func__); + ret = -EIO; + goto out; + } + fprintf(stderr, "%s: malformed message: len=%d\n", + __func__, len); + ret = -EIO; + goto out; + } + + if (h->nlmsg_type == NLMSG_DONE) { + ret = 0; + goto out; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = (struct nlmsgerr *)NLMSG_DATA(h); + if (rem_len < (int)sizeof(struct nlmsgerr)) { + fprintf(stderr, "%s: ERROR truncated\n", + __func__); + ret = -EIO; + goto out; + } + + if (err->error) { + fprintf(stderr, "%s: (%d) %s\n", + __func__, err->error, + strerror(-err->error)); + ret = err->error; + goto out; + } + + ret = 0; + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) + ret = r; + } + goto out; + } + + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) { + ret = r; + goto out; + } + } else { + fprintf(stderr, "%s: RTNL: unexpected reply\n", + __func__); + } + + rcv_len -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((uint8_t *)h + + NLMSG_ALIGN(len)); + } + + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: message truncated\n", __func__); + continue; + } + + if (rcv_len) { + fprintf(stderr, "%s: rtnl: %d not parsed bytes\n", + __func__, rcv_len); + ret = -1; + goto out; + } + } +out: + close(fd); + + return ret; +} + +struct ovpn_link_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[256]; +}; + +static int ovpn_new_iface(struct ovpn_ctx *ovpn) +{ + struct rtattr *linkinfo, *data; + struct ovpn_link_req req = { 0 }; + int ret = -1; + + fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname, + ovpn->mode); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname, + strlen(ovpn->ifname) + 1) < 0) + goto err; + + linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO); + if (!linkinfo) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME, + strlen(OVPN_FAMILY_NAME) + 1) < 0) + goto err; + + if (ovpn->mode_set) { + data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA); + if (!data) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE, + &ovpn->mode, sizeof(uint8_t)) < 0) + goto err; + + ovpn_nest_end(&req.n, data); + } + + ovpn_nest_end(&req.n, linkinfo); + + req.i.ifi_family = AF_PACKET; + + ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +err: + return ret; +} + +static int ovpn_del_iface(struct ovpn_ctx *ovpn) +{ + struct ovpn_link_req req = { 0 }; + + fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname, + ovpn->ifindex); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_DELLINK; + + req.i.ifi_family = AF_PACKET; + req.i.ifi_index = ovpn->ifindex; + + return ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +} + +static int nl_seq_check(struct nl_msg (*msg)__always_unused, + void (*arg)__always_unused) +{ + return NL_OK; +} + +struct mcast_handler_args { + const char *group; + int id; +}; + +static int mcast_family_handler(struct nl_msg *msg, void *arg) +{ + struct mcast_handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return NL_SKIP; +} + +static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + int *ret = arg; + + *ret = err->error; + return NL_STOP; +} + +static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg) +{ + int *ret = arg; + + *ret = 0; + return NL_STOP; +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + struct nlmsghdr *nlh = nlmsg_hdr(msg); + char ifname[IF_NAMESIZE]; + int *ret = arg; + __u32 ifindex; + + fprintf(stderr, "received message from ovpn-dco\n"); + + *ret = -1; + + if (!genlmsg_valid_hdr(nlh, 0)) { + fprintf(stderr, "invalid header\n"); + return NL_STOP; + } + + if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL)) { + fprintf(stderr, "received bogus data from ovpn-dco\n"); + return NL_STOP; + } + + if (!attrs[OVPN_A_IFINDEX]) { + fprintf(stderr, "no ifindex in this message\n"); + return NL_STOP; + } + + ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (!if_indextoname(ifindex, ifname)) { + fprintf(stderr, "cannot resolve ifname for ifindex: %u\n", + ifindex); + return NL_STOP; + } + + switch (gnlh->cmd) { + case OVPN_CMD_PEER_DEL_NTF: + fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); + break; + case OVPN_CMD_KEY_SWAP_NTF: + fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); + break; + default: + fprintf(stderr, "received unknown command: %d\n", gnlh->cmd); + return NL_STOP; + } + + *ret = 0; + return NL_OK; +} + +static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family, + const char *group) +{ + struct nl_msg *msg; + struct nl_cb *cb; + int ret, ctrlid; + struct mcast_handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) { + ret = -ENOMEM; + goto out_fail_cb; + } + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + ret = -ENOBUFS; + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_auto_complete(sock, msg); + if (ret < 0) + goto nla_put_failure; + + ret = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp); + + while (ret > 0) + nl_recvmsgs(sock, cb); + + if (ret == 0) + ret = grp.id; + nla_put_failure: + nl_cb_put(cb); + out_fail_cb: + nlmsg_free(msg); + return ret; +} + +static int ovpn_listen_mcast(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int mcid, ret; + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(sock, 8192, 8192); + + ret = genl_connect(sock); + if (ret < 0) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_free; + } + + mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS); + if (mcid < 0) { + fprintf(stderr, "cannot get mcast group: %s\n", + nl_geterror(mcid)); + goto err_free; + } + + ret = nl_socket_add_membership(sock, mcid); + if (ret) { + fprintf(stderr, "failed to join mcast group: %d\n", ret); + goto err_free; + } + + ret = 1; + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret); + nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret); + + while (ret == 1) { + int err = nl_recvmsgs(sock, cb); + + if (err < 0) { + fprintf(stderr, + "cannot receive netlink message: (%d) %s\n", + err, nl_geterror(-err)); + ret = -1; + break; + } + } + + nl_cb_put(cb); +err_free: + nl_socket_free(sock); + return ret; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage %s <command> <iface> [arguments..]\n", + cmd); + fprintf(stderr, "where <command> can be one of the following\n\n"); + + fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tmode:\n"); + fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n"); + fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n"); + + fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + + fprintf(stderr, + "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, + "\tpeers_file: file containing one peer per line: Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n"); + fprintf(stderr, + "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); + + fprintf(stderr, + "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n"); + fprintf(stderr, "\traddr: peer IP address to connect to\n"); + fprintf(stderr, "\trport: peer TCP port to connect to\n"); + fprintf(stderr, + "\tkey_file: file containing the symmetric key for encryption\n"); + + fprintf(stderr, + "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeer_id: peer ID to be used in data packets to/from this peer\n"); + fprintf(stderr, "\traddr: peer IP address\n"); + fprintf(stderr, "\trport: peer UDP port\n"); + fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); + + fprintf(stderr, + "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeers_file: text file containing one peer per line. Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n"); + + fprintf(stderr, + "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, + "\tkeepalive_interval: interval for sending ping messages\n"); + fprintf(stderr, + "\tkeepalive_timeout: time after which a peer is timed out\n"); + + fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n"); + + fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n"); + + fprintf(stderr, + "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to configure the key for\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + fprintf(stderr, "\tkey_id: an ID from 0 to 7\n"); + fprintf(stderr, + "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n"); + fprintf(stderr, + "\tkey_dir: key direction, must 0 on one host and 1 on the other\n"); + fprintf(stderr, "\tkey_file: file containing the pre-shared key\n"); + + fprintf(stderr, + "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n"); + + fprintf(stderr, + "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + + fprintf(stderr, + "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + + fprintf(stderr, + "* listen_mcast: listen to ovpn netlink multicast messages\n"); +} + +static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host, + const char *service, const char *vpnip) +{ + int ret; + struct addrinfo *result; + struct addrinfo hints = { + .ai_family = ovpn->sa_family, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP + }; + + if (host) { + ret = getaddrinfo(host, service, &hints, &result); + if (ret == EAI_NONAME || ret == EAI_FAIL) + return -1; + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen); + } + + if (vpnip) { + ret = getaddrinfo(vpnip, NULL, &hints, &result); + if (ret == EAI_NONAME || ret == EAI_FAIL) + return -1; + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen); + ovpn->sa_family = result->ai_family; + + ovpn->peer_ip_set = true; + } + + ret = 0; +out: + freeaddrinfo(result); + return ret; +} + +static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, + const char *raddr, const char *rport, + const char *vpnip) +{ + ovpn->peer_id = strtoul(peer_id, NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); +} + +static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn) +{ + int slot = strtoul(arg, NULL, 10); + + if (errno == ERANGE || slot < 1 || slot > 2) { + fprintf(stderr, "key slot out of range\n"); + return -1; + } + + switch (slot) { + case 1: + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + break; + case 2: + ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY; + break; + } + + return 0; +} + +static int ovpn_send_tcp_data(int socket) +{ + uint16_t len = htons(1000); + uint8_t buf[1002]; + int ret; + + memcpy(buf, &len, sizeof(len)); + memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len)); + + ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + fprintf(stdout, "Sent %u bytes over TCP socket\n", ret); + + return ret > 0 ? 0 : ret; +} + +static int ovpn_recv_tcp_data(int socket) +{ + uint8_t buf[1002]; + uint16_t len; + int ret; + + ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + if (ret < 2) { + fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret); + return ret; + } + + memcpy(&len, buf, sizeof(len)); + len = ntohs(len); + + fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n", + ret, len); + + return 0; +} + +static enum ovpn_cmd ovpn_parse_cmd(const char *cmd) +{ + if (!strcmp(cmd, "new_iface")) + return CMD_NEW_IFACE; + + if (!strcmp(cmd, "del_iface")) + return CMD_DEL_IFACE; + + if (!strcmp(cmd, "listen")) + return CMD_LISTEN; + + if (!strcmp(cmd, "connect")) + return CMD_CONNECT; + + if (!strcmp(cmd, "new_peer")) + return CMD_NEW_PEER; + + if (!strcmp(cmd, "new_multi_peer")) + return CMD_NEW_MULTI_PEER; + + if (!strcmp(cmd, "set_peer")) + return CMD_SET_PEER; + + if (!strcmp(cmd, "del_peer")) + return CMD_DEL_PEER; + + if (!strcmp(cmd, "get_peer")) + return CMD_GET_PEER; + + if (!strcmp(cmd, "new_key")) + return CMD_NEW_KEY; + + if (!strcmp(cmd, "del_key")) + return CMD_DEL_KEY; + + if (!strcmp(cmd, "get_key")) + return CMD_GET_KEY; + + if (!strcmp(cmd, "swap_keys")) + return CMD_SWAP_KEYS; + + if (!strcmp(cmd, "listen_mcast")) + return CMD_LISTEN_MCAST; + + return CMD_INVALID; +} + +/* Send process to background and waits for signal. + * + * This helper is called at the end of commands + * creating sockets, so that the latter stay alive + * along with the process that created them. + * + * A signal is expected to be delivered in order to + * terminate the waiting processes + */ +static void ovpn_waitbg(void) +{ + daemon(1, 1); + pause(); +} + +static int ovpn_run_cmd(struct ovpn_ctx *ovpn) +{ + char peer_id[10], vpnip[INET6_ADDRSTRLEN], raddr[128], rport[10]; + int n, ret; + FILE *fp; + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + ret = ovpn_new_iface(ovpn); + break; + case CMD_DEL_IFACE: + ret = ovpn_del_iface(ovpn); + break; + case CMD_LISTEN: + ret = ovpn_listen(ovpn, ovpn->sa_family); + if (ret < 0) { + fprintf(stderr, "cannot listen on TCP socket\n"); + return ret; + } + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + int num_peers = 0; + + while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) { + struct ovpn_ctx peer_ctx = { 0 }; + + if (num_peers == MAX_PEERS) { + fprintf(stderr, "max peers reached!\n"); + return -E2BIG; + } + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.sa_family = ovpn->sa_family; + + peer_ctx.socket = ovpn_accept(ovpn); + if (peer_ctx.socket < 0) { + fprintf(stderr, "cannot accept connection!\n"); + return -1; + } + + /* store peer sockets to test TCP I/O */ + ovpn->cli_sockets[num_peers] = peer_ctx.socket; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL, + NULL, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, true); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s\n", + peer_id, vpnip); + return ret; + } + num_peers++; + } + + for (int i = 0; i < num_peers; i++) { + ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]); + if (ret < 0) + break; + } + ovpn_waitbg(); + break; + case CMD_CONNECT: + ret = ovpn_connect(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot connect TCP socket\n"); + return ret; + } + + ret = ovpn_new_peer(ovpn, true); + if (ret < 0) { + fprintf(stderr, "cannot add peer to VPN\n"); + close(ovpn->socket); + return ret; + } + + if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) { + ret = ovpn_new_key(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot set key\n"); + return ret; + } + } + + ret = ovpn_send_tcp_data(ovpn->socket); + ovpn_waitbg(); + break; + case CMD_NEW_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + ret = ovpn_new_peer(ovpn, false); + ovpn_waitbg(); + break; + case CMD_NEW_MULTI_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + while ((n = fscanf(fp, "%s %s %s %s\n", peer_id, raddr, rport, + vpnip)) == 4) { + struct ovpn_ctx peer_ctx = { 0 }; + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.socket = ovpn->socket; + peer_ctx.sa_family = AF_UNSPEC; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr, + rport, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, false); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s %s %s\n", + peer_id, raddr, rport, vpnip); + return ret; + } + } + ovpn_waitbg(); + break; + case CMD_SET_PEER: + ret = ovpn_set_peer(ovpn); + break; + case CMD_DEL_PEER: + ret = ovpn_del_peer(ovpn); + break; + case CMD_GET_PEER: + if (ovpn->peer_id == PEER_ID_UNDEF) + fprintf(stderr, "List of peers connected to: %s\n", + ovpn->ifname); + + ret = ovpn_get_peer(ovpn); + break; + case CMD_NEW_KEY: + ret = ovpn_new_key(ovpn); + break; + case CMD_DEL_KEY: + ret = ovpn_del_key(ovpn); + break; + case CMD_GET_KEY: + ret = ovpn_get_key(ovpn); + break; + case CMD_SWAP_KEYS: + ret = ovpn_swap_keys(ovpn); + break; + case CMD_LISTEN_MCAST: + ret = ovpn_listen_mcast(); + break; + case CMD_INVALID: + break; + } + + return ret; +} + +static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) +{ + int ret; + + /* no args required for LISTEN_MCAST */ + if (ovpn->cmd == CMD_LISTEN_MCAST) + return 0; + + /* all commands need an ifname */ + if (argc < 3) + return -EINVAL; + + strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1); + ovpn->ifname[IFNAMSIZ - 1] = '\0'; + + /* all commands, except NEW_IFNAME, needs an ifindex */ + if (ovpn->cmd != CMD_NEW_IFACE) { + ovpn->ifindex = if_nametoindex(ovpn->ifname); + if (!ovpn->ifindex) { + fprintf(stderr, "cannot find interface: %s\n", + strerror(errno)); + return -1; + } + } + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + if (argc < 4) + break; + + if (!strcmp(argv[3], "P2P")) { + ovpn->mode = OVPN_MODE_P2P; + } else if (!strcmp(argv[3], "MP")) { + ovpn->mode = OVPN_MODE_MP; + } else { + fprintf(stderr, "Cannot parse iface mode: %s\n", + argv[3]); + return -1; + } + ovpn->mode_set = true; + break; + case CMD_DEL_IFACE: + break; + case CMD_LISTEN: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + + if (argc > 5 && !strcmp(argv[5], "ipv6")) + ovpn->sa_family = AF_INET6; + break; + case CMD_CONNECT: + if (argc < 6) + return -EINVAL; + + ovpn->sa_family = AF_INET; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], + NULL); + if (ret < 0) { + fprintf(stderr, "Cannot parse remote peer data\n"); + return -1; + } + + if (argc > 6) { + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + ovpn->key_id = 0; + ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; + ovpn->key_dir = KEY_DIR_OUT; + + ret = ovpn_parse_key(argv[6], ovpn); + if (ret) + return -1; + } + break; + case CMD_NEW_PEER: + if (argc < 7) + return -EINVAL; + + ovpn->lport = strtoul(argv[4], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + const char *vpnip = (argc > 7) ? argv[7] : NULL; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6], + vpnip); + if (ret < 0) + return -1; + break; + case CMD_NEW_MULTI_PEER: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + break; + case CMD_SET_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ovpn->keepalive_interval = strtoul(argv[4], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + + ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + break; + case CMD_DEL_PEER: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_GET_PEER: + ovpn->peer_id = PEER_ID_UNDEF; + if (argc > 3) { + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + } + break; + case CMD_NEW_KEY: + if (argc < 9) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return -1; + + ovpn->key_id = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->key_id > 2) { + fprintf(stderr, "key ID out of range\n"); + return -1; + } + + ret = ovpn_parse_cipher(argv[6], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key_direction(argv[7], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key(argv[8], ovpn); + if (ret) + return -1; + break; + case CMD_DEL_KEY: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_GET_KEY: + if (argc < 5) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_SWAP_KEYS: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_LISTEN_MCAST: + break; + case CMD_INVALID: + break; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct ovpn_ctx ovpn; + int ret; + + if (argc < 2) { + usage(argv[0]); + return -1; + } + + memset(&ovpn, 0, sizeof(ovpn)); + ovpn.sa_family = AF_INET; + ovpn.cipher = OVPN_CIPHER_ALG_NONE; + + ovpn.cmd = ovpn_parse_cmd(argv[1]); + if (ovpn.cmd == CMD_INVALID) { + fprintf(stderr, "Error: unknown command.\n\n"); + usage(argv[0]); + return -1; + } + + ret = ovpn_parse_cmd_args(&ovpn, argc, argv); + if (ret < 0) { + fprintf(stderr, "Error: invalid arguments.\n\n"); + if (ret == -EINVAL) + usage(argv[0]); + return ret; + } + + ret = ovpn_run_cmd(&ovpn); + if (ret) + fprintf(stderr, "Cannot execute command: %s (%d)\n", + strerror(-ret), ret); + + return ret; +} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt new file mode 100644 index 000000000000..d753eebe8716 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -0,0 +1,5 @@ +1 5.5.5.2 +2 5.5.5.3 +3 5.5.5.4 +4 5.5.5.5 +5 5.5.5.6 diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh new file mode 100755 index 000000000000..32504079a2b8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +ALG="chachapoly" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh new file mode 100755 index 000000000000..093d44772ffd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test-close-socket.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh new file mode 100755 index 000000000000..5e48a8b67928 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh new file mode 100755 index 000000000000..ba5d725e18b0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-float.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh new file mode 100755 index 000000000000..ba3f1f315a34 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh new file mode 100755 index 000000000000..7b62897b0240 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +if [ "$FLOAT" == "1" ]; then + # make clients float.. + for p in $(seq 1 ${NUM_PEERS}); do + ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} + done + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 + done +fi + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +echo "Adding secondary key and then swap:" +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p} +done + +sleep 1 + +echo "Querying all peers:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 +ip netns exec peer1 ${OVPN_CLI} get_peer tun1 + +echo "Querying peer 1:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 + +echo "Querying non-existent peer 10:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true + +echo "Deleting peer 1:" +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1 + +echo "Querying keys:" +for p in $(seq 2 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2 +done + +echo "Deleting peer while sending traffic:" +(ip netns exec peer2 ping -qf -w 4 5.5.5.1)& +sleep 2 +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 +# following command fails in TCP mode +# (both ends get conn reset when one peer disconnects) +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true + +echo "Deleting keys:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2 +done + +echo "Setting timeout to 3s MP:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0 +done +# wait for peers to timeout +sleep 5 + +echo "Setting timeout to 3s P2P:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3 +done +sleep 5 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt new file mode 100644 index 000000000000..32f14bd9347a --- /dev/null +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -0,0 +1,5 @@ +1 10.10.1.2 1 5.5.5.2 +2 10.10.2.2 1 5.5.5.3 +3 10.10.3.2 1 5.5.5.4 +4 10.10.4.2 1 5.5.5.5 +5 10.10.5.2 1 5.5.5.6 diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index b8475cb29be7..1c43401a1c80 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,7 +9,6 @@ #include <arpa/inet.h> #include <errno.h> #include <error.h> -#include <linux/dccp.h> #include <linux/in.h> #include <linux/unistd.h> #include <stdbool.h> @@ -21,10 +20,6 @@ #include <sys/socket.h> #include <unistd.h> -#ifndef SOL_DCCP -#define SOL_DCCP 269 -#endif - static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; @@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) error(1, errno, "tcp: failed to listen on receive port"); - else if (proto == SOCK_DCCP) { - if (setsockopt(rcv_fds[i], SOL_DCCP, - DCCP_SOCKOPT_SERVICE, - &(int) {htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - - if (listen(rcv_fds[i], 10)) - error(1, errno, "dccp: failed to listen on receive port"); - } } } @@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); - if (proto == SOCK_DCCP && - setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, - &(int){htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + if (proto == SOCK_STREAM) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto, static void test_proto(int proto, const char *proto_str) { - if (proto == SOCK_DCCP) { - int test_fd; - - test_fd = socket(AF_INET, proto, 0); - if (test_fd < 0) { - if (errno == ESOCKTNOSUPPORT) { - fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); - return; - } else - error(1, errno, "failed to create a DCCP socket"); - } - close(test_fd); - } - fprintf(stderr, "%s IPv4 ... ", proto_str); run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); @@ -271,7 +238,6 @@ int main(void) { test_proto(SOCK_DGRAM, "UDP"); test_proto(SOCK_STREAM, "TCP"); - test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 02d617040793..a5e959a080bb 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -285,11 +285,6 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 79fb81e63c59..a649dba3cb77 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -250,11 +250,6 @@ setup_hs() eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index 87e414cc417c..ba730655a7bf 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -245,10 +245,8 @@ # that adopted in the use cases already examined (of course, it is necessary to # consider the different SIDs/C-SIDs). -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -376,32 +374,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -410,8 +394,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -420,28 +403,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -462,10 +429,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -497,7 +464,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -518,9 +485,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -596,7 +560,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -668,8 +632,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -744,8 +708,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -791,11 +755,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -880,7 +839,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -903,7 +862,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -915,7 +874,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index c79cb8ede17f..4b86040c58c6 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -287,10 +287,8 @@ # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) # and sends it to the host hs-1. -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -418,32 +416,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -452,15 +436,12 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" + setup_ns "${nsname}" - __create_namespace "${nsname}" - + eval nsname=\${$(get_rtname "${rtid}")} ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -470,29 +451,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -512,10 +476,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -547,7 +511,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -631,7 +595,7 @@ set_end_x_nextcsid() local rt="$1" local adj="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} net_prefix="$(get_network_prefix "${rt}" "${adj}")" lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" @@ -650,7 +614,7 @@ set_underlay_sids_reachability() local rt="$1" local rt_neighs="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -685,7 +649,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} set_underlay_sids_reachability "${rt}" "${rt_neighs}" @@ -728,8 +692,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -804,8 +768,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -851,11 +815,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -947,7 +906,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -970,7 +929,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -982,7 +941,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 28a775654b92..3efce1718c5f 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -166,10 +166,8 @@ # hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" readonly RT2HS_DEVNAME="veth-t${VRF_TID}" @@ -248,32 +246,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -282,8 +266,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -292,29 +275,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -334,10 +300,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -369,7 +335,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -387,9 +353,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -403,7 +366,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -469,7 +432,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -516,8 +479,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -555,11 +518,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -656,7 +614,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -679,7 +637,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -691,7 +649,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cb4177d41b21..cabc70538ffe 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -116,10 +116,8 @@ # hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly RT2HS_DEVNAME="veth-hs" readonly HS_VETH_NAME="veth0" @@ -199,32 +197,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -233,8 +217,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -243,28 +226,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -285,10 +252,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -320,7 +287,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -341,9 +308,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -357,7 +321,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -407,7 +371,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -446,7 +410,7 @@ setup_decap() local rt="$1" local nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} # Local End.DX2 behavior ip -netns "${nsname}" -6 route \ @@ -463,8 +427,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -486,11 +450,6 @@ setup_hs() add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up - - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 } # set an auto-generated mac address @@ -508,7 +467,7 @@ set_mac_address() local ifname="$4" local nsname - nsname=$(get_nodename "${nodename}") + eval nsname=\${${nodename}} ip -netns "${nsname}" link set dev "${ifname}" down @@ -532,7 +491,7 @@ set_host_l2peer() local hssrc_name local ipaddr - hssrc_name="$(get_hsname "${hssrc}")" + eval hssrc_name=\${$(get_hsname "${hssrc}")} if [ "${proto}" -eq 6 ]; then ipaddr="${ipprefix}::${hsdst}" @@ -562,7 +521,7 @@ setup_l2vpn() local rtdst="${hsdst}" # set fixed mac for source node and the neigh MAC address - set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 @@ -570,7 +529,7 @@ setup_l2vpn() # to the mac address of the remote peer (L2 VPN destination host). # Otherwise, traffic coming from the source host is dropped at the # ingress router. - set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" # set the SRv6 Policies at the ingress router setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ @@ -647,7 +606,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -670,7 +629,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -682,7 +641,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 02b986c9c247..9067197c9055 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -51,7 +51,9 @@ ret=0 # All tests in this script. Can be overridden with -t option. TESTS=" neigh_suppress_arp + neigh_suppress_uc_arp neigh_suppress_ns + neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns " @@ -388,6 +390,52 @@ neigh_suppress_arp() neigh_suppress_arp_common $vid $sip $tip } +neigh_suppress_uc_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local tmac + + echo + echo "Unicast ARP, per-port ARP suppression - VLAN $vid" + echo "-----------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass" + + run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h2 filter" +} + +neigh_suppress_uc_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_uc_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_uc_arp_common $vid $sip $tip +} + neigh_suppress_ns_common() { local vid=$1; shift @@ -494,6 +542,78 @@ neigh_suppress_ns() neigh_suppress_ns_common $vid $saddr $daddr $maddr } +icmpv6_header_get() +{ + local csum=$1; shift + local tip=$1; shift + local type + local p + + # Type 135 (Neighbor Solicitation), hex format + type="87" + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"$csum:"$( : ICMPv6.checksum + )"00:00:00:00:"$( : Reserved + )"$tip:"$( : Target Address + ) + echo $p +} + +neigh_suppress_uc_ns_common() +{ + local vid=$1; shift + local sip=$1; shift + local dip=$1; shift + local full_dip=$1; shift + local csum=$1; shift + local tmac + + echo + echo "Unicast NS, per-port NS suppression - VLAN $vid" + echo "---------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h2 filter" +} + +neigh_suppress_uc_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="ef:79" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02 + csum="ef:76" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum +} + neigh_vlan_suppress_arp() { local vid1=10 @@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + bridge link help 2>&1 | grep -q "neigh_vlan_suppress" if [ $? -ne 0 ]; then echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index cddff1772e10..589b18ed758a 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -31,6 +31,10 @@ try_modprobe act_skbedit try_modprobe act_skbmod try_modprobe act_tunnel_key try_modprobe act_vlan +try_modprobe act_ife +try_modprobe act_meta_mark +try_modprobe act_meta_skbtcindex +try_modprobe act_meta_skbprio try_modprobe cls_basic try_modprobe cls_bpf try_modprobe cls_cgroup |
