aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/drivers/net/hw/tso.py
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:48:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:48:21 -0700
commit1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95 (patch)
tree286dda5e84757594218e684b94b01b3a3cac15a2 /tools/testing/selftests/drivers/net/hw/tso.py
parentMerge tag 'zstd-linus-v6.15-rc1' of https://github.com/terrelln/linux (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadlinux-1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95.tar.gz
linux-1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95.zip
Merge tag 'net-next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Continue Netlink conversions to per-namespace RTNL lock (IPv4 routing, routing rules, routing next hops, ARP ioctls) - Continue extending the use of netdev instance locks. As a driver opt-in protect queue operations and (in due course) ethtool operations with the instance lock and not RTNL lock. - Support collecting TCP timestamps (data submitted, sent, acked) in BPF, allowing for transparent (to the application) and lower overhead tracking of TCP RPC performance. - Tweak existing networking Rx zero-copy infra to support zero-copy Rx via io_uring. - Optimize MPTCP performance in single subflow mode by 29%. - Enable GRO on packets which went thru XDP CPU redirect (were queued for processing on a different CPU). Improving TCP stream performance up to 2x. - Improve performance of contended connect() by 200% by searching for an available 4-tuple under RCU rather than a spin lock. Bring an additional 229% improvement by tweaking hash distribution. - Avoid unconditionally touching sk_tsflags on RX, improving performance under UDP flood by as much as 10%. - Avoid skb_clone() dance in ping_rcv() to improve performance under ping flood. - Avoid FIB lookup in netfilter if socket is available, 20% perf win. - Rework network device creation (in-kernel) API to more clearly identify network namespaces and their roles. There are up to 4 namespace roles but we used to have just 2 netns pointer arguments, interpreted differently based on context. - Use sysfs_break_active_protection() instead of trylock to avoid deadlocks between unregistering objects and sysfs access. - Add a new sysctl and sockopt for capping max retransmit timeout in TCP. - Support masking port and DSCP in routing rule matches. - Support dumping IPv4 multicast addresses with RTM_GETMULTICAST. - Support specifying at what time packet should be sent on AF_XDP sockets. - Expose TCP ULP diagnostic info (for TLS and MPTCP) to non-admin users. - Add Netlink YAML spec for WiFi (nl80211) and conntrack. - Introduce EXPORT_IPV6_MOD() and EXPORT_IPV6_MOD_GPL() for symbols which only need to be exported when IPv6 support is built as a module. - Age FDB entries based on Rx not Tx traffic in VxLAN, similar to normal bridging. - Allow users to specify source port range for GENEVE tunnels. - netconsole: allow attaching kernel release, CPU ID and task name to messages as metadata Driver API: - Continue rework / fixing of Energy Efficient Ethernet (EEE) across the SW layers. Delegate the responsibilities to phylink where possible. Improve its handling in phylib. - Support symmetric OR-XOR RSS hashing algorithm. - Support tracking and preserving IRQ affinity by NAPI itself. - Support loopback mode speed selection for interface selftests. Device drivers: - Remove the IBM LCS driver for s390 - Remove the sb1000 cable modem driver - Add support for SFP module access over SMBus - Add MCTP transport driver for MCTP-over-USB - Enable XDP metadata support in multiple drivers - Ethernet high-speed NICs: - Broadcom (bnxt): - add PCIe TLP Processing Hints (TPH) support for new AMD platforms - support dumping RoCE queue state for debug - opt into instance locking - Intel (100G, ice, idpf): - ice: rework MSI-X IRQ management and distribution - ice: support for E830 devices - iavf: add support for Rx timestamping - iavf: opt into instance locking - nVidia/Mellanox: - mlx4: use page pool memory allocator for Rx - mlx5: support for one PTP device per hardware clock - mlx5: support for 200Gbps per-lane link modes - mlx5: move IPSec policy check after decryption - AMD/Solarflare: - support FW flashing via devlink - Cisco (enic): - use page pool memory allocator for Rx - enable 32, 64 byte CQEs - get max rx/tx ring size from the device - Meta (fbnic): - support flow steering and RSS configuration - report queue stats - support TCP segmentation - support IRQ coalescing - support ring size configuration - Marvell/Cavium: - support AF_XDP - Wangxun: - support for PTP clock and timestamping - Huawei (hibmcge): - checksum offload - add more statistics - Ethernet virtual: - VirtIO net: - aggressively suppress Tx completions, improve perf by 96% with 1 CPU and 55% with 2 CPUs - expose NAPI to IRQ mapping and persist NAPI settings - Google (gve): - support XDP in DQO RDA Queue Format - opt into instance locking - Microsoft vNIC: - support BIG TCP - Ethernet NICs consumer, and embedded: - Synopsys (stmmac): - cleanup Tx and Tx clock setting and other link-focused cleanups - enable SGMII and 2500BASEX mode switching for Intel platforms - support Sophgo SG2044 - Broadcom switches (b53): - support for BCM53101 - TI: - iep: add perout configuration support - icssg: support XDP - Cadence (macb): - implement BQL - Xilinx (axinet): - support dynamic IRQ moderation and changing coalescing at runtime - implement BQL - report standard stats - MediaTek: - support phylink managed EEE - Intel: - igc: don't restart the interface on every XDP program change - RealTek (r8169): - support reading registers of internal PHYs directly - increase max jumbo packet size on RTL8125/RTL8126 - Airoha: - support for RISC-V NPU packet processing unit - enable scatter-gather and support MTU up to 9kB - Tehuti (tn40xx): - support cards with TN4010 MAC and an Aquantia AQR105 PHY - Ethernet PHYs: - support for TJA1102S, TJA1121 - dp83tg720: add randomized polling intervals for link detection - dp83822: support changing the transmit amplitude voltage - support for LEDs on 88q2xxx - CAN: - canxl: support Remote Request Substitution bit access - flexcan: add S32G2/S32G3 SoC - WiFi: - remove cooked monitor support - strict mode for better AP testing - basic EPCS support - OMI RX bandwidth reduction support - batman-adv: add support for jumbo frames - WiFi drivers: - RealTek (rtw88): - support RTL8814AE and RTL8814AU - RealTek (rtw89): - switch using wiphy_lock and wiphy_work - add BB context to manipulate two PHY as preparation of MLO - improve BT-coexistence mechanism to play A2DP smoothly - Intel (iwlwifi): - add new iwlmld sub-driver for latest HW/FW combinations - MediaTek (mt76): - preparation for mt7996 Multi-Link Operation (MLO) support - Qualcomm/Atheros (ath12k): - continued work on MLO - Silabs (wfx): - Wake-on-WLAN support - Bluetooth: - add support for skb TX SND/COMPLETION timestamping - hci_core: enable buffer flow control for SCO/eSCO - coredump: log devcd dumps into the monitor - Bluetooth drivers: - intel: add support to configure TX power - nxp: handle bootloader error during cmd5 and cmd7" * tag 'net-next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1681 commits) unix: fix up for "apparmor: add fine grained af_unix mediation" mctp: Fix incorrect tx flow invalidation condition in mctp-i2c net: usb: asix: ax88772: Increase phy_name size net: phy: Introduce PHY_ID_SIZE — minimum size for PHY ID string net: libwx: fix Tx L4 checksum net: libwx: fix Tx descriptor content for some tunnel packets atm: Fix NULL pointer dereference net: tn40xx: add pci-id of the aqr105-based Tehuti TN4010 cards net: tn40xx: prepare tn40xx driver to find phy of the TN9510 card net: tn40xx: create swnode for mdio and aqr105 phy and add to mdiobus net: phy: aquantia: add essential functions to aqr105 driver net: phy: aquantia: search for firmware-name in fwnode net: phy: aquantia: add probe function to aqr105 for firmware loading net: phy: Add swnode support to mdiobus_scan gve: add XDP DROP and PASS support for DQ gve: update XDP allocation path support RX buffer posting gve: merge packet buffer size fields gve: update GQ RX to use buf_size gve: introduce config-based allocation for XDP gve: remove xdp_xsk_done and xdp_xsk_wakeup statistics ...
Diffstat (limited to 'tools/testing/selftests/drivers/net/hw/tso.py')
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py241
1 files changed, 241 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..e1ecb92f79d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # No math behind the 10 here, but try to catch cases where
+ # TCP falls back to non-LSO.
+ ksft_lt(tcp_sock_get_retrans(sock), 10)
+ sock.close()
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[2]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ tun_partial = tun and tun[1]
+ # Tunnel which can silently fall back to gso-partial
+ has_gso_partial = tun and 'tx-gso-partial' in cfg.features
+
+ # For TSO4 via partial we need mangleid
+ if ipver == "4" and feature in cfg.partial_features:
+ ksft_pr("Testing with mangleid enabled")
+ if 'tx-tcp-mangleid-segmentation' not in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+ defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ # Now test with the feature enabled.
+ # For compatible tunnels only - just GSO partial, not specific feature.
+ if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6,
+ should_lso=tun_partial)
+
+ # Full feature enabled.
+ if feature in cfg.features:
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+ else:
+ raise KsftXfailEx(f"Device does not support {feature}")
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ cfg.features = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ cfg.features.add(f["name"])
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, partial, args)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
+ ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="4"))
+ if info[3]:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
+ tun=info[3], inner_ipver="6"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()