From 689883de94dd8a43a0ea77311327effab240afda Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 9 Jun 2025 17:30:15 +0000 Subject: net: stop napi kthreads when THREADED napi is disabled Once the THREADED napi is disabled, the napi kthread should also be stopped. Keeping the kthread intact after disabling THREADED napi makes the PID of this kthread show up in the output of netlink 'napi-get' and ps -ef output. The is discussed in the patch below: https://lore.kernel.org/all/20250502191548.559cc416@kernel.org NAPI kthread should stop only if, - There are no pending napi poll scheduled for this thread. - There are no new napi poll scheduled for this thread while it has stopped. - The ____napi_schedule can correctly fallback to the softirq for napi polling. Since napi_schedule_prep provides mutual exclusion over STATE_SCHED bit, it is safe to unset the STATE_THREADED when SCHED_THREADED is set or the SCHED bit is not set. SCHED_THREADED being set means that SCHED is already set and the kthread owns this napi. To disable threaded napi, unset STATE_THREADED bit safely if SCHED_THREADED is set or SCHED is unset. Once STATE_THREADED is unset safely then wait for the kthread to unset the SCHED_THREADED bit so it safe to stop the kthread. Add a new test in nl_netdev to verify this behaviour. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..6 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.nsim_rxq_reset_down # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0 Ran neper for 300 seconds and did enable/disable of thread napi in a loop continuously. Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250609173015.3851695-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index be97c440ecd5..5baa4691074f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6926,6 +6926,43 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void napi_stop_kthread(struct napi_struct *napi) +{ + unsigned long val, new; + + /* Wait until the napi STATE_THREADED is unset. */ + while (true) { + val = READ_ONCE(napi->state); + + /* If napi kthread own this napi or the napi is idle, + * STATE_THREADED can be unset here. + */ + if ((val & NAPIF_STATE_SCHED_THREADED) || + !(val & NAPIF_STATE_SCHED)) { + new = val & (~NAPIF_STATE_THREADED); + } else { + msleep(20); + continue; + } + + if (try_cmpxchg(&napi->state, &val, new)) + break; + } + + /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by + * the kthread. + */ + while (true) { + if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state)) + break; + + msleep(20); + } + + kthread_stop(napi->thread); + napi->thread = NULL; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6961,8 +6998,12 @@ int dev_set_threaded(struct net_device *dev, bool threaded) * softirq mode will happen in the next round of napi_schedule(). * This should not cause hiccups/stalls to the live traffic. */ - list_for_each_entry(napi, &dev->napi_list, dev_list) - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!threaded && napi->thread) + napi_stop_kthread(napi); + else + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } return err; } -- cgit v1.2.3 From 1ead7501094c6a61461c0c98dde9ec5660fa1e24 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 16 Jun 2025 09:21:14 -0700 Subject: udp_tunnel: remove rtnl_lock dependency Drivers that are using ops lock and don't depend on RTNL lock still need to manage it because udp_tunnel's RTNL dependency. Introduce new udp_tunnel_nic_lock and use it instead of rtnl_lock. Drop non-UDP_TUNNEL_NIC_INFO_MAY_SLEEP mode from udp_tunnel infra (udp_tunnel_nic_device_sync_work needs to grab udp_tunnel_nic_lock mutex and might sleep). Cover more places in v4: - netlink - udp_tunnel_notify_add_rx_port (ndo_open) - triggers udp_tunnel_nic_device_sync_work - udp_tunnel_notify_del_rx_port (ndo_stop) - triggers udp_tunnel_nic_device_sync_work - udp_tunnel_get_rx_info (__netdev_update_features) - triggers NETDEV_UDP_TUNNEL_PUSH_INFO - udp_tunnel_drop_rx_info (__netdev_update_features) - triggers NETDEV_UDP_TUNNEL_DROP_INFO - udp_tunnel_nic_reset_ntf (ndo_open) - notifiers - udp_tunnel_nic_netdevice_event, depending on the event: - triggers NETDEV_UDP_TUNNEL_PUSH_INFO - triggers NETDEV_UDP_TUNNEL_DROP_INFO - ethnl_tunnel_info_reply_size - udp_tunnel_nic_set_port_priv (two intel drivers) Cc: Michael Chan Suggested-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250616162117.287806-4-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +- drivers/net/ethernet/emulex/benet/be_main.c | 3 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 1 - drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +- .../net/ethernet/netronome/nfp/nfp_net_common.c | 3 +- drivers/net/ethernet/qlogic/qede/qede_filter.c | 3 - drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 - drivers/net/ethernet/sfc/ef10.c | 1 - drivers/net/netdevsim/udp_tunnels.c | 4 - include/net/udp_tunnel.h | 87 ++++++++++++++++------ net/core/dev.c | 2 + net/ipv4/udp_tunnel_core.c | 16 ++-- net/ipv4/udp_tunnel_nic.c | 78 ++++++++++++++----- 16 files changed, 142 insertions(+), 73 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c9a1a1d504c0..3ee4b848ef53 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10219,8 +10219,7 @@ static int bnx2x_udp_tunnel_sync(struct net_device *netdev, unsigned int table) static const struct udp_tunnel_nic_info bnx2x_udp_tunnels = { .sync_table = bnx2x_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 00a60b2b90c4..ededd292b9d3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15573,8 +15573,7 @@ static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int ta static const struct udp_tunnel_nic_info bnxt_udp_tunnels = { .set_port = bnxt_udp_tunnel_set_port, .unset_port = bnxt_udp_tunnel_unset_port, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, @@ -15582,8 +15581,7 @@ static const struct udp_tunnel_nic_info bnxt_udp_tunnels = { }, bnxt_udp_tunnels_p7 = { .set_port = bnxt_udp_tunnel_set_port, .unset_port = bnxt_udp_tunnel_unset_port, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3d2e21592119..f49400ba9729 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4031,8 +4031,7 @@ static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table, static const struct udp_tunnel_nic_info be_udp_tunnels = { .set_port = be_vxlan_set_port, .unset_port = be_vxlan_unset_port, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1156a5b3055c..3b4f59d978a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15895,7 +15895,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; - pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7959a65c0903..f8ef80069e3d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4767,7 +4767,6 @@ int ice_init_dev(struct ice_pf *pf) pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; - pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; if (pf->hw.tnl.valid_count[TNL_VXLAN]) { pf->hw.udp_tunnel_nic.tables[0].n_entries = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 281b34af0bb4..d2071aff7b8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2670,8 +2670,7 @@ static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table) static const struct udp_tunnel_nic_info mlx4_udp_tunnels = { .sync_table = mlx4_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 24559cbcbfc2..dca5ca51a470 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5351,8 +5351,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) priv->nic_info.set_port = mlx5e_vxlan_set_port; priv->nic_info.unset_port = mlx5e_vxlan_unset_port; - priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; /* Don't count the space hard-coded to the IANA port */ priv->nic_info.tables[0].n_entries = diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 932f59d70f41..132626a3f9f7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2394,8 +2394,7 @@ static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table) static const struct udp_tunnel_nic_info nfp_udp_tunnels = { .sync_table = nfp_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_OPEN_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { { .n_entries = NFP_NET_N_VXLAN_PORTS, diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 985026dd816f..7e341e026489 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -987,20 +987,17 @@ static int qede_udp_tunnel_sync(struct net_device *dev, unsigned int table) static const struct udp_tunnel_nic_info qede_udp_tunnels_both = { .sync_table = qede_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, }, }, qede_udp_tunnels_vxlan = { .sync_table = qede_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, }, qede_udp_tunnels_geneve = { .sync_table = qede_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, }, }, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index eb69121df726..53cdd36c4123 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -486,7 +486,6 @@ static int qlcnic_udp_tunnel_sync(struct net_device *dev, unsigned int table) static const struct udp_tunnel_nic_info qlcnic_udp_tunnels = { .sync_table = qlcnic_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 47349c148c0c..fcec81f862ec 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3985,7 +3985,6 @@ static int efx_ef10_udp_tnl_unset_port(struct net_device *dev, static const struct udp_tunnel_nic_info efx_ef10_udp_tunnels = { .set_port = efx_ef10_udp_tnl_set_port, .unset_port = efx_ef10_udp_tnl_unset_port, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP, .tables = { { .n_entries = 16, diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 640b4983a9a0..10cbbf1c584b 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -112,12 +112,10 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); - rtnl_lock(); if (dev->reg_state == NETREG_REGISTERED) { memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); udp_tunnel_nic_reset_ntf(dev); } - rtnl_unlock(); return count; } @@ -181,8 +179,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, info->sync_table = NULL; } - if (ns->udp_ports.sleep) - info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP; if (nsim_dev->udp_ports.open_only) info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index e3c70b579095..cbd3a43074bd 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -130,22 +130,6 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); -static inline void udp_tunnel_get_rx_info(struct net_device *dev) -{ - ASSERT_RTNL(); - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); -} - -static inline void udp_tunnel_drop_rx_info(struct net_device *dev) -{ - ASSERT_RTNL(); - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); -} - /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, @@ -222,19 +206,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { - /* Device callbacks may sleep */ - UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ - UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), + UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0), /* Device supports only IPv4 tunnels */ - UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), + UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ - UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), + UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2), }; struct udp_tunnel_nic; @@ -325,6 +307,9 @@ struct udp_tunnel_nic_ops { size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); + void (*assert_locked)(struct net_device *dev); + void (*lock)(struct net_device *dev); + void (*unlock)(struct net_device *dev); }; #ifdef CONFIG_INET @@ -353,8 +338,29 @@ static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { - if (udp_tunnel_nic_ops) + if (udp_tunnel_nic_ops) { + udp_tunnel_nic_ops->lock(dev); udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); + udp_tunnel_nic_ops->unlock(dev); + } +} + +static inline void udp_tunnel_nic_assert_locked(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->assert_locked(dev); +} + +static inline void udp_tunnel_nic_lock(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->lock(dev); +} + +static inline void udp_tunnel_nic_unlock(struct net_device *dev) +{ + if (udp_tunnel_nic_ops) + udp_tunnel_nic_ops->unlock(dev); } static inline void @@ -396,17 +402,50 @@ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { + size_t ret; + if (!udp_tunnel_nic_ops) return 0; - return udp_tunnel_nic_ops->dump_size(dev, table); + + udp_tunnel_nic_ops->lock(dev); + ret = udp_tunnel_nic_ops->dump_size(dev, table); + udp_tunnel_nic_ops->unlock(dev); + + return ret; } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { + int ret; + if (!udp_tunnel_nic_ops) return 0; - return udp_tunnel_nic_ops->dump_write(dev, table, skb); + + udp_tunnel_nic_ops->lock(dev); + ret = udp_tunnel_nic_ops->dump_write(dev, table, skb); + udp_tunnel_nic_ops->unlock(dev); + + return ret; +} + +static inline void udp_tunnel_get_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + udp_tunnel_nic_assert_locked(dev); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } + +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + udp_tunnel_nic_assert_locked(dev); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + #endif diff --git a/net/core/dev.c b/net/core/dev.c index 5baa4691074f..43f56b44f351 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10771,12 +10771,14 @@ sync_lower: * *before* calling udp_tunnel_get_rx_info, * but *after* calling udp_tunnel_drop_rx_info. */ + udp_tunnel_nic_lock(dev); if (features & NETIF_F_RX_UDP_TUNNEL_PORT) { dev->features = features; udp_tunnel_get_rx_info(dev); } else { udp_tunnel_drop_rx_info(dev); } + udp_tunnel_nic_unlock(dev); } if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) { diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9efd62505916..fce945f23069 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -134,15 +134,17 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) struct udp_tunnel_info ti; struct net_device *dev; + ASSERT_RTNL(); + ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_add_port(dev, &ti); + udp_tunnel_nic_unlock(dev); } - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); @@ -154,15 +156,17 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) struct udp_tunnel_info ti; struct net_device *dev; + ASSERT_RTNL(); + ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_del_port(dev, &ti); + udp_tunnel_nic_unlock(dev); } - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b6d2d16189c0..ff66db48453c 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -29,6 +29,7 @@ struct udp_tunnel_nic_table_entry { * struct udp_tunnel_nic - UDP tunnel port offload state * @work: async work for talking to hardware from process context * @dev: netdev pointer + * @lock: protects all fields * @need_sync: at least one port start changed * @need_replay: space was freed, we need a replay of all ports * @work_pending: @work is currently scheduled @@ -41,6 +42,8 @@ struct udp_tunnel_nic { struct net_device *dev; + struct mutex lock; + u8 need_sync:1; u8 need_replay:1; u8 work_pending:1; @@ -298,22 +301,11 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) static void udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) { - const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; - bool may_sleep; - if (!utn->need_sync) return; - /* Drivers which sleep in the callback need to update from - * the workqueue, if we come from the tunnel driver's notification. - */ - may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; - if (!may_sleep) - __udp_tunnel_nic_device_sync(dev, utn); - if (may_sleep || utn->need_replay) { - queue_work(udp_tunnel_nic_workqueue, &utn->work); - utn->work_pending = 1; - } + queue_work(udp_tunnel_nic_workqueue, &utn->work); + utn->work_pending = 1; } static bool @@ -554,12 +546,12 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) struct udp_tunnel_nic *utn; unsigned int i, j; - ASSERT_RTNL(); - utn = dev->udp_tunnel_nic; if (!utn) return; + mutex_lock(&utn->lock); + utn->need_sync = false; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) { @@ -569,7 +561,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | UDP_TUNNEL_NIC_ENTRY_OP_FAIL); - /* We don't release rtnl across ops */ + /* We don't release utn lock across ops */ WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); if (!entry->use_cnt) continue; @@ -579,6 +571,8 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) } __udp_tunnel_nic_device_sync(dev, utn); + + mutex_unlock(&utn->lock); } static size_t @@ -643,6 +637,33 @@ err_cancel: return -EMSGSIZE; } +static void __udp_tunnel_nic_assert_locked(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + lockdep_assert_held(&utn->lock); +} + +static void __udp_tunnel_nic_lock(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + mutex_lock(&utn->lock); +} + +static void __udp_tunnel_nic_unlock(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + mutex_unlock(&utn->lock); +} + static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .get_port = __udp_tunnel_nic_get_port, .set_port_priv = __udp_tunnel_nic_set_port_priv, @@ -651,6 +672,9 @@ static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .reset_ntf = __udp_tunnel_nic_reset_ntf, .dump_size = __udp_tunnel_nic_dump_size, .dump_write = __udp_tunnel_nic_dump_write, + .assert_locked = __udp_tunnel_nic_assert_locked, + .lock = __udp_tunnel_nic_lock, + .unlock = __udp_tunnel_nic_unlock, }; static void @@ -710,11 +734,15 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work) container_of(work, struct udp_tunnel_nic, work); rtnl_lock(); + mutex_lock(&utn->lock); + utn->work_pending = 0; __udp_tunnel_nic_device_sync(utn->dev, utn); if (utn->need_replay) udp_tunnel_nic_replay(utn->dev, utn); + + mutex_unlock(&utn->lock); rtnl_unlock(); } @@ -730,6 +758,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, return NULL; utn->n_tables = n_tables; INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); + mutex_init(&utn->lock); for (i = 0; i < n_tables; i++) { utn->entries[i] = kcalloc(info->tables[i].n_entries, @@ -821,8 +850,11 @@ static int udp_tunnel_nic_register(struct net_device *dev) dev_hold(dev); dev->udp_tunnel_nic = utn; - if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) { + udp_tunnel_nic_lock(dev); udp_tunnel_get_rx_info(dev); + udp_tunnel_nic_unlock(dev); + } return 0; } @@ -832,6 +864,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + udp_tunnel_nic_lock(dev); + /* For a shared table remove this dev from the list of sharing devices * and if there are other devices just detach. */ @@ -841,8 +875,10 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; - if (list_entry_is_head(node, &info->shared->devices, list)) + if (list_entry_is_head(node, &info->shared->devices, list)) { + udp_tunnel_nic_unlock(dev); return; + } list_del(&node->list); kfree(node); @@ -852,6 +888,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) if (first) { udp_tunnel_drop_rx_info(dev); utn->dev = first->dev; + udp_tunnel_nic_unlock(dev); goto release_dev; } @@ -862,6 +899,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) * from the work which we will boot immediately. */ udp_tunnel_nic_flush(dev, utn); + udp_tunnel_nic_unlock(dev); /* Wait for the work to be done using the state, netdev core will * retry unregister until we give up our reference on this device. @@ -910,12 +948,16 @@ udp_tunnel_nic_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; if (event == NETDEV_UP) { + udp_tunnel_nic_lock(dev); WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); udp_tunnel_get_rx_info(dev); + udp_tunnel_nic_unlock(dev); return NOTIFY_OK; } if (event == NETDEV_GOING_DOWN) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_flush(dev, utn); + udp_tunnel_nic_unlock(dev); return NOTIFY_OK; } -- cgit v1.2.3 From 3a321b6b1f76a46102ae9a3977f7fdb8bc7c6e22 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 16 Jun 2025 09:21:15 -0700 Subject: net: remove redundant ASSERT_RTNL() in queue setup functions The existing netdev_ops_assert_locked() already asserts that either the RTNL lock or the per-device lock is held, making the explicit ASSERT_RTNL() redundant. Cc: Michael Chan Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250616162117.287806-5-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 43f56b44f351..df1678b1fe24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3179,7 +3179,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { - ASSERT_RTNL(); netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, @@ -3229,7 +3228,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { - ASSERT_RTNL(); netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, -- cgit v1.2.3 From aa7d26c3c3497258b712fb97221e775733a710b7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:17 -0400 Subject: ref_tracker: add a static classname string to each ref_tracker_dir A later patch in the series will be adding debugfs files for each ref_tracker that get created in ref_tracker_dir_init(). The format will be "class@%px". The current "name" string can vary between ref_tracker_dir objects of the same type, so it's not suitable for this purpose. Add a new "class" string to the ref_tracker dir that describes the the type of object (sans any individual info for that object). Also, in the i915 driver, gate the creation of debugfs files on whether the dentry pointer is still set to NULL. CI has shown that the ref_tracker_dir can be initialized more than once. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-4-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- drivers/gpu/drm/display/drm_dp_tunnel.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +++- drivers/gpu/drm/i915/intel_wakeref.c | 3 ++- include/linux/ref_tracker.h | 4 ++++ lib/test_ref_tracker.c | 2 +- net/core/dev.c | 2 +- net/core/net_namespace.c | 4 ++-- 7 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index 076edf161048..b9c12b8bf2a3 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -1920,7 +1920,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) } #ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG - ref_tracker_dir_init(&mgr->ref_tracker, 16, "dptun"); + ref_tracker_dir_init(&mgr->ref_tracker, 16, "drm_dptun", "dptun"); #endif for (i = 0; i < max_group_count; i++) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8d9f4c410546..90d90145a189 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -59,7 +59,9 @@ static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm) static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev)); + if (!rpm->debug.class) + ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, + "intel_runtime_pm", dev_name(rpm->kdev)); } static intel_wakeref_t diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 51561b190b93..7e74c58862c1 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -114,7 +114,8 @@ void __intel_wakeref_init(struct intel_wakeref *wf, "wakeref.work", &key->work, 0); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) - ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name); + if (!wf->debug.class) + ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, "intel_wakeref", name); #endif } diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index a0a1ee43724f..3968f993db81 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -19,6 +19,7 @@ struct ref_tracker_dir { bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ + const char *class; /* object classname */ char name[32]; #endif }; @@ -27,6 +28,7 @@ struct ref_tracker_dir { static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, + const char *class, const char *name) { INIT_LIST_HEAD(&dir->list); @@ -36,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, dir->dead = false; refcount_set(&dir->untracked, 1); refcount_set(&dir->no_tracker, 1); + dir->class = class; strscpy(dir->name, name, sizeof(dir->name)); stack_depot_init(); } @@ -60,6 +63,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir, static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, + const char *class, const char *name) { } diff --git a/lib/test_ref_tracker.c b/lib/test_ref_tracker.c index b983ceb12afc..d263502a4c1d 100644 --- a/lib/test_ref_tracker.c +++ b/lib/test_ref_tracker.c @@ -64,7 +64,7 @@ static int __init test_ref_tracker_init(void) { int i; - ref_tracker_dir_init(&ref_dir, 100, "selftest"); + ref_tracker_dir_init(&ref_dir, 100, "selftest", "selftest"); timer_setup(&test_ref_tracker_timer, test_ref_tracker_timer_func, 0); mod_timer(&test_ref_tracker_timer, jiffies + 1); diff --git a/net/core/dev.c b/net/core/dev.c index be97c440ecd5..12cf4e5ae9c5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11715,7 +11715,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_len = sizeof_priv; - ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); + ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev", name); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ae54f26709ca..aa1e34181ed6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -403,8 +403,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ { refcount_set(&net->passive, 1); refcount_set(&net->ns.count, 1); - ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt"); + ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt", "net_refcnt"); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt", "net_notrefcnt"); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; -- cgit v1.2.3 From 707bd05be75f65749c3f1695f4e362a89b3fcc7b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 10:24:22 -0400 Subject: ref_tracker: eliminate the ref_tracker_dir name field Now that we have dentries and the ability to create meaningful symlinks to them, don't keep a name string in each tracker. Switch the output format to print "class@address", and drop the name field. Also, add a kerneldoc header for ref_tracker_dir_init(). Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20250618-reftrack-dbgfs-v15-9-24fc37ead144@kernel.org Signed-off-by: Jakub Kicinski --- drivers/gpu/drm/display/drm_dp_tunnel.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- drivers/gpu/drm/i915/intel_wakeref.c | 2 +- include/linux/ref_tracker.h | 20 ++++++++++++++------ lib/ref_tracker.c | 6 +++--- lib/test_ref_tracker.c | 2 +- net/core/dev.c | 2 +- net/core/net_namespace.c | 4 ++-- 8 files changed, 24 insertions(+), 16 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index b9c12b8bf2a3..1205a4432eb4 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -1920,7 +1920,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) } #ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG - ref_tracker_dir_init(&mgr->ref_tracker, 16, "drm_dptun", "dptun"); + ref_tracker_dir_init(&mgr->ref_tracker, 16, "drm_dptun"); #endif for (i = 0; i < max_group_count; i++) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 90d90145a189..7ce3e6de0c19 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -61,7 +61,7 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { if (!rpm->debug.class) ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, - "intel_runtime_pm", dev_name(rpm->kdev)); + "intel_runtime_pm"); } static intel_wakeref_t diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 7e74c58862c1..7fa194de5d35 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -115,7 +115,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf, #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) if (!wf->debug.class) - ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, "intel_wakeref", name); + ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, "intel_wakeref"); #endif } diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index e1323de93bf6..d10563afd91c 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -20,7 +20,6 @@ struct ref_tracker_dir { struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ const char *class; /* object classname */ - char name[32]; #endif }; @@ -44,10 +43,21 @@ void ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...) #endif /* CONFIG_DEBUG_FS */ +/** + * ref_tracker_dir_init - initialize a ref_tracker dir + * @dir: ref_tracker_dir to be initialized + * @quarantine_count: max number of entries to be tracked + * @class: pointer to static string that describes object type + * + * Initialize a ref_tracker_dir. If debugfs is configured, then a file + * will also be created for it under the top-level ref_tracker debugfs + * directory. + * + * Note that @class must point to a static string. + */ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, - const char *class, - const char *name) + const char *class) { INIT_LIST_HEAD(&dir->list); INIT_LIST_HEAD(&dir->quarantine); @@ -57,7 +67,6 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, refcount_set(&dir->untracked, 1); refcount_set(&dir->no_tracker, 1); dir->class = class; - strscpy(dir->name, name, sizeof(dir->name)); ref_tracker_dir_debugfs(dir); stack_depot_init(); } @@ -82,8 +91,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir, static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, unsigned int quarantine_count, - const char *class, - const char *name) + const char *class) { } diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index 6608520d6118..dcf923a1edf5 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -155,7 +155,7 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, stats = ref_tracker_get_stats(dir, display_limit); if (IS_ERR(stats)) { pr_ostream(s, "%s%s@%p: couldn't get stats, error %pe\n", - s->prefix, dir->name, dir, stats); + s->prefix, dir->class, dir, stats); return; } @@ -166,14 +166,14 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4)) sbuf[0] = 0; pr_ostream(s, "%s%s@%p has %d/%d users at\n%s\n", s->prefix, - dir->name, dir, stats->stacks[i].count, + dir->class, dir, stats->stacks[i].count, stats->total, sbuf); skipped -= stats->stacks[i].count; } if (skipped) pr_ostream(s, "%s%s@%p skipped reports about %d/%d users.\n", - s->prefix, dir->name, dir, skipped, stats->total); + s->prefix, dir->class, dir, skipped, stats->total); kfree(sbuf); diff --git a/lib/test_ref_tracker.c b/lib/test_ref_tracker.c index d263502a4c1d..b983ceb12afc 100644 --- a/lib/test_ref_tracker.c +++ b/lib/test_ref_tracker.c @@ -64,7 +64,7 @@ static int __init test_ref_tracker_init(void) { int i; - ref_tracker_dir_init(&ref_dir, 100, "selftest", "selftest"); + ref_tracker_dir_init(&ref_dir, 100, "selftest"); timer_setup(&test_ref_tracker_timer, test_ref_tracker_timer_func, 0); mod_timer(&test_ref_tracker_timer, jiffies + 1); diff --git a/net/core/dev.c b/net/core/dev.c index 12cf4e5ae9c5..92a830162dd8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11715,7 +11715,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_len = sizeof_priv; - ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev", name); + ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev"); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 45de05d8f087..d0f607507ee8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -403,8 +403,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ { refcount_set(&net->passive, 1); refcount_set(&net->ns.count, 1); - ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt", "net_refcnt"); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt", "net_notrefcnt"); + ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt"); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt"); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; -- cgit v1.2.3 From d2527ad3a9e1f3031095d65376a94a8e640b2b74 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 30 Jun 2025 15:42:11 -0400 Subject: net: preserve MSG_ZEROCOPY with forwarding MSG_ZEROCOPY data must be copied before data is queued to local sockets, to avoid indefinite timeout until memory release. This test is performed by skb_orphan_frags_rx, which is called when looping an egress skb to packet sockets, error queue or ingress path. To preserve zerocopy for skbs that are looped to ingress but are then forwarded to an egress device rather than delivered locally, defer this last check until an skb enters the local IP receive path. This is analogous to existing behavior of skb_clear_delivery_time. Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20250630194312.1571410-2-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 -- net/ipv4/ip_input.c | 6 ++++++ net/ipv6/ip6_input.c | 7 +++++++ 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7ee808eb068e..96d33dead604 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5937,8 +5937,6 @@ check_vlan_id: } if (pt_prev) { - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) - goto drop; *ppt_prev = pt_prev; } else { drop: diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 30a5e9460d00..f8696e67def4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -226,6 +226,12 @@ resubmit: static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + skb_clear_delivery_time(skb); __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 0b3b81fd4a58..168ec07e31cc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -478,6 +478,13 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + skb_clear_delivery_time(skb); ip6_protocol_deliver_rcu(net, skb, 0, false); -- cgit v1.2.3 From a41851bea7bfb627d00dc51252857019594b5758 Mon Sep 17 00:00:00 2001 From: Fengyuan Gong Date: Wed, 2 Jul 2025 16:07:41 +0000 Subject: net: account for encap headers in qdisc pkt len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refine qdisc_pkt_len_init to include headers up through the inner transport header when computing header size for encapsulations. Also refine net/sched/sch_cake.c borrowed from qdisc_pkt_len_init(). Signed-off-by: Fengyuan Gong Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250702160741.1204919-1-gfengyuan@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 5 ++++- net/sched/sch_cake.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 96d33dead604..ea129aa08317 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4026,7 +4026,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 48dd8c88903f..dbcfb948c867 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1407,7 +1407,10 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_init() */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | -- cgit v1.2.3 From 4e655028c29fbc455fdbbd3ca074443361adfa44 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 7 Jul 2025 11:41:14 -0700 Subject: net: ethtool: remove the compat code for _rxfh_context ops All drivers are now converted to dedicated _rxfh_context ops. Remove the use of >set_rxfh() to manage additional contexts. Reviewed-by: Gal Pressman Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250707184115.2285277-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 --- net/core/dev.c | 15 +----------- net/ethtool/ioctl.c | 65 +++++++++++-------------------------------------- net/ethtool/rss.c | 3 +-- 4 files changed, 16 insertions(+), 71 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 59877fd2a1d3..de5bd76a400c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -865,9 +865,6 @@ struct kernel_ethtool_ts_info { * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. - * @cap_rss_ctx_supported: indicates if the driver supports RSS - * contexts via legacy API, drivers implementing @create_rxfh_context - * do not have to set this bit. * @rxfh_per_ctx_fields: device supports selecting different header fields * for Rx hash calculation and RSS for each additional context. * @rxfh_per_ctx_key: device supports setting different RSS key for each @@ -1100,7 +1097,6 @@ struct kernel_ethtool_ts_info { struct ethtool_ops { u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; - u32 cap_rss_ctx_supported:1; u32 rxfh_per_ctx_fields:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; diff --git a/net/core/dev.c b/net/core/dev.c index ea129aa08317..fe677ccec5b0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11979,21 +11979,8 @@ static void netdev_rss_contexts_free(struct net_device *dev) mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { - struct ethtool_rxfh_param rxfh; - - rxfh.indir = ethtool_rxfh_context_indir(ctx); - rxfh.key = ethtool_rxfh_context_key(ctx); - rxfh.hfunc = ctx->hfunc; - rxfh.input_xfrm = ctx->input_xfrm; - rxfh.rss_context = context; - rxfh.rss_delete = true; - xa_erase(&dev->ethtool->rss_ctx, context); - if (dev->ethtool_ops->create_rxfh_context) - dev->ethtool_ops->remove_rxfh_context(dev, ctx, - context, NULL); - else - dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL); kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b6d96e562c9a..d8a17350d3e8 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1391,8 +1391,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; rxfh.indir_size = rxfh_dev.indir_size; @@ -1534,8 +1533,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; /* Check input data transformation capabilities */ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && @@ -1634,6 +1632,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (create) { + u32 limit, ctx_id; + if (rxfh_dev.rss_delete) { ret = -EINVAL; goto out_unlock; @@ -1644,21 +1644,15 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, goto out_unlock; } - if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; - u32 ctx_id; - - /* driver uses new API, core allocates ID */ - ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit - 1), - GFP_KERNEL_ACCOUNT); - if (ret < 0) { - kfree(ctx); - goto out_unlock; - } - WARN_ON(!ctx_id); /* can't happen */ - rxfh.rss_context = ctx_id; + limit = ops->rxfh_max_num_contexts ?: U32_MAX; + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit - 1), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out_unlock; } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; } else if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { @@ -1670,7 +1664,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - if (rxfh.rss_context && ops->create_rxfh_context) { + if (rxfh.rss_context) { if (create) { ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); @@ -1693,8 +1687,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) { if (create) { /* failed to create, free our new tracking entry */ - if (ops->create_rxfh_context) - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); kfree(ctx); } goto out_unlock; @@ -1713,36 +1706,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; } /* Update rss_ctx tracking */ - if (create && !ops->create_rxfh_context) { - /* driver uses old API, it chose context ID */ - if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) { - /* context ID reused, our tracking is screwed */ - kfree(ctx); - goto out_unlock; - } - /* Allocate the exact ID the driver gave us */ - if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context, - ctx, GFP_KERNEL))) { - kfree(ctx); - goto out_unlock; - } - - /* Fetch the defaults for the old API, in the new API drivers - * should write defaults into ctx themselves. - */ - rxfh_dev.indir = (u32 *)rss_config; - rxfh_dev.indir_size = dev_indir_size; - - rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.key_size = dev_key_size; - - ret = ops->get_rxfh(dev, &rxfh_dev); - if (WARN_ON(ret)) { - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); - kfree(ctx); - goto out_unlock; - } - } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); kfree(ctx); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index e717f23cbc10..4e8ca2c38175 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -163,8 +163,7 @@ rss_prepare_data(const struct ethnl_req_info *req_base, return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ - if (request->rss_context && - !ops->cap_rss_ctx_supported && !ops->create_rxfh_context) + if (request->rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; mutex_lock(&dev->ethtool->rss_lock); -- cgit v1.2.3 From eb1ac9ff6c4a5720b1a1476233be374c5dc44bff Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Jul 2025 16:01:31 -0700 Subject: ipv6: anycast: Don't hold RTNL for IPV6_JOIN_ANYCAST. inet6_sk(sk)->ipv6_ac_list is protected by lock_sock(). In ipv6_sock_ac_join(), only __dev_get_by_index(), __dev_get_by_flags(), and __in6_dev_get() require RTNL. __dev_get_by_flags() is only used by ipv6_sock_ac_join() and can be converted to RCU version. Let's replace RCU version helper and drop RTNL from IPV6_JOIN_ANYCAST. setsockopt_needs_rtnl() will be removed in the next patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250702230210.3115355-15-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 38 ++++++++++++++++++-------------------- net/ipv6/anycast.c | 22 ++++++++++++++-------- net/ipv6/ipv6_sockglue.c | 4 ---- 4 files changed, 34 insertions(+), 34 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5847c20994d3..a80d21a14612 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3332,8 +3332,8 @@ int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); -struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); diff --git a/net/core/dev.c b/net/core/dev.c index fe677ccec5b0..e365b099484e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1267,33 +1267,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * __dev_get_by_flags - find any device with given flags - * @net: the applicable net namespace - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check + * dev_get_by_flags_rcu - find any device with given flags + * @net: the applicable net namespace + * @if_flags: IFF_* values + * @mask: bitmask of bits in if_flags to check * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. Must be called inside - * rtnl_lock(), and result refcount is unchanged. + * Search for any interface with the given flags. + * + * Context: rcu_read_lock() must be held. + * Returns: NULL if a device is not found or a pointer to the device. */ - -struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, + unsigned short mask) { - struct net_device *dev, *ret; - - ASSERT_RTNL(); + struct net_device *dev; - ret = NULL; - for_each_netdev(net, dev) { - if (((dev->flags ^ if_flags) & mask) == 0) { - ret = dev; - break; + for_each_netdev_rcu(net, dev) { + if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) { + dev_hold(dev); + return dev; } } - return ret; + + return NULL; } -EXPORT_SYMBOL(__dev_get_by_flags); +EXPORT_IPV6_MOD(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index fd3d104c6c05..53cf68e0242b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -73,15 +73,13 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) struct inet6_dev *idev; int err = 0, ishost; - ASSERT_RTNL(); - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; if (ifindex) - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { err = -EINVAL; @@ -102,18 +100,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } else if (ishost) { + rcu_read_unlock(); err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ - dev = __dev_get_by_flags(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } + rcu_read_unlock(); } if (!dev) { @@ -121,7 +123,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; @@ -144,7 +146,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto error; + goto error_idev; } err = __ipv6_dev_ac_inc(idev, addr); @@ -154,7 +156,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac = NULL; } +error_idev: + in6_dev_put(idev); error: + dev_put(dev); + if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3d891aa6e7f5..702dc33e50ad 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -119,10 +119,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, static bool setsockopt_needs_rtnl(int optname) { - switch (optname) { - case IPV6_JOIN_ANYCAST: - return true; - } return false; } -- cgit v1.2.3 From 2a683d005286018c6f47ef0e432829655a6a21a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 11 Jul 2025 05:10:59 +0000 Subject: dev: Pass netdevice_tracker to dev_get_by_flags_rcu(). This is a follow-up for commit eb1ac9ff6c4a5 ("ipv6: anycast: Don't hold RTNL for IPV6_JOIN_ANYCAST."). We should not add a new device lookup API without netdevice_tracker. Let's pass netdevice_tracker to dev_get_by_flags_rcu() and rename it with netdev_ prefix to match other newer APIs. Note that we always use GFP_ATOMIC for netdev_hold() as it's expected to be called under RCU. Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20250708184053.102109f6@kernel.org/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250711051120.2866855-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 11 ++++++----- net/ipv6/anycast.c | 11 ++++++----- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a80d21a14612..ec23cee5245d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3332,8 +3332,6 @@ int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); @@ -3396,6 +3394,8 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex, netdevice_tracker *tracker, gfp_t gfp); struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); +struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker, + unsigned short flags, unsigned short mask); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); void netdev_copy_name(struct net_device *dev, char *name); diff --git a/net/core/dev.c b/net/core/dev.c index e365b099484e..19ddc3e6990a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1267,8 +1267,9 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags_rcu - find any device with given flags + * netdev_get_by_flags_rcu - find any device with given flags * @net: the applicable net namespace + * @tracker: tracking object for the acquired reference * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * @@ -1277,21 +1278,21 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); * Context: rcu_read_lock() must be held. * Returns: NULL if a device is not found or a pointer to the device. */ -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker, + unsigned short if_flags, unsigned short mask) { struct net_device *dev; for_each_netdev_rcu(net, dev) { if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) { - dev_hold(dev); + netdev_hold(dev, tracker, GFP_ATOMIC); return dev; } } return NULL; } -EXPORT_IPV6_MOD(dev_get_by_flags_rcu); +EXPORT_IPV6_MOD(netdev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 53cf68e0242b..f8a8e46286b8 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -69,6 +69,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_ac_socklist *pac = NULL; struct net *net = sock_net(sk); + netdevice_tracker dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev; int err = 0, ishost; @@ -79,7 +80,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; if (ifindex) - dev = dev_get_by_index(net, ifindex); + dev = netdev_get_by_index(net, ifindex, &dev_tracker, GFP_KERNEL); if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { err = -EINVAL; @@ -104,7 +105,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = dst_dev(&rt->dst); - dev_hold(dev); + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); ip6_rt_put(rt); } else if (ishost) { rcu_read_unlock(); @@ -112,8 +113,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags_rcu(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = netdev_get_by_flags_rcu(net, &dev_tracker, IFF_UP, + IFF_UP | IFF_LOOPBACK); } rcu_read_unlock(); } @@ -159,7 +160,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) error_idev: in6_dev_put(idev); error: - dev_put(dev); + netdev_put(dev, &dev_tracker); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); -- cgit v1.2.3 From 2677010e7793451c20d895c477c4dc76f6e6a10e Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Thu, 10 Jul 2025 21:12:03 +0000 Subject: Add support to set NAPI threaded for individual NAPI A net device has a threaded sysctl that can be used to enable threaded NAPI polling on all of the NAPI contexts under that device. Allow enabling threaded NAPI polling at individual NAPI level using netlink. Extend the netlink operation `napi-set` and allow setting the threaded attribute of a NAPI. This will enable the threaded polling on a NAPI context. Add a test in `nl_netdev.py` that verifies various cases of threaded NAPI being set at NAPI and at device level. Tested ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 10 ++++ Documentation/networking/napi.rst | 9 +++- include/linux/netdevice.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/dev.c | 30 +++++++++-- net/core/dev.h | 7 +++ net/core/netdev-genl-gen.c | 5 +- net/core/netdev-genl.c | 14 +++++ tools/include/uapi/linux/netdev.h | 1 + tools/testing/selftests/net/nl_netdev.py | 91 +++++++++++++++++++++++++++++++- 10 files changed, 162 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index ce4cfec82100..85d0ea6ac426 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -283,6 +283,14 @@ attribute-sets: doc: The timeout, in nanoseconds, of how long to suspend irq processing, if event polling finds events type: uint + - + name: threaded + doc: Whether the NAPI is configured to operate in threaded polling + mode. If this is set to 1 then the NAPI context operates in + threaded polling mode. + type: uint + checks: + max: 1 - name: xsk-info attributes: [] @@ -694,6 +702,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded dump: request: attributes: @@ -746,6 +755,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded - name: bind-tx doc: Bind dmabuf to netdev for TX diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index d0e3953cae6a..a15754adb041 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -444,7 +444,14 @@ dependent). The NAPI instance IDs will be assigned in the opposite order than the process IDs of the kernel threads. Threaded NAPI is controlled by writing 0/1 to the ``threaded`` file in -netdev's sysfs directory. +netdev's sysfs directory. It can also be enabled for a specific NAPI using +netlink interface. + +For example, using the script: + +.. code-block:: bash + + $ ynl --family netdev --do napi-set --json='{"id": 66, "threaded": 1}' .. rubric:: Footnotes diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec23cee5245d..e49d8c98d284 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,6 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; + bool threaded; unsigned int napi_id; }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/dev.c b/net/core/dev.c index 19ddc3e6990a..621a639aeba1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6961,6 +6961,31 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } +int napi_set_threaded(struct napi_struct *napi, bool threaded) +{ + if (threaded) { + if (!napi->thread) { + int err = napi_kthread_create(napi); + + if (err) + return err; + } + } + + if (napi->config) + napi->config->threaded = threaded; + + if (!threaded && napi->thread) { + napi_stop_kthread(napi); + } else { + /* Make sure kthread is created before THREADED bit is set. */ + smp_mb__before_atomic(); + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } + + return 0; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6968,9 +6993,6 @@ int dev_set_threaded(struct net_device *dev, bool threaded) netdev_assert_locked_or_invisible(dev); - if (dev->threaded == threaded) - return 0; - if (threaded) { list_for_each_entry(napi, &dev->napi_list, dev_list) { if (!napi->thread) { @@ -7221,6 +7243,8 @@ static void napi_restore_config(struct napi_struct *n) napi_hash_add(n); n->config->napi_id = n->napi_id; } + + WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded)); } static void napi_save_config(struct napi_struct *n) diff --git a/net/core/dev.h b/net/core/dev.h index e93f36b7ddf3..a603387fb566 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,6 +315,13 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } +static inline bool napi_get_threaded(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_THREADED, &n->state); +} + +int napi_set_threaded(struct napi_struct *n, bool threaded); + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 4fc44587f493..0994bd68a7e6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -92,11 +92,12 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] }; /* NETDEV_CMD_NAPI_SET - do */ -static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = { [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), }; /* NETDEV_CMD_BIND_TX - do */ @@ -193,7 +194,7 @@ static const struct genl_split_ops netdev_nl_ops[] = { .cmd = NETDEV_CMD_NAPI_SET, .doit = netdev_nl_napi_set_doit, .policy = netdev_napi_set_nl_policy, - .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + .maxattr = NETDEV_A_NAPI_THREADED, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 2afa7b2141aa..5875df372415 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -184,6 +184,10 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) goto nla_put_failure; + if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, + napi_get_threaded(napi))) + goto nla_put_failure; + if (napi->thread) { pid = task_pid_nr(napi->thread); if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) @@ -322,8 +326,18 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; + u8 threaded = 0; u32 defer = 0; + if (info->attrs[NETDEV_A_NAPI_THREADED]) { + int ret; + + threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); + ret = napi_set_threaded(napi, !!threaded); + if (ret) + return ret; + } + if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); napi_set_defer_hard_irqs(napi, defer); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c9109627a741..c8ffade79a52 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,91 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 0}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + def dev_set_threaded(nf) -> None: """ Test that verifies various cases of napi threaded @@ -56,8 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) ksft_ne(napi1.get('pid'), None) # unset threaded @@ -65,8 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: @@ -156,7 +245,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - dev_set_threaded, nsim_rxq_reset_down], + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() -- cgit v1.2.3 From a6f190630d070173897a7e98a30188b7638ba0a1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 16 Jul 2025 18:26:53 +0200 Subject: net: track pfmemalloc drops via SKB_DROP_REASON_PFMEMALLOC Add a new SKB drop reason (SKB_DROP_REASON_PFMEMALLOC) to track packets dropped due to memory pressure. In production environments, we've observed memory exhaustion reported by memory layer stack traces, but these drops were not properly tracked in the SKB drop reason infrastructure. While most network code paths now properly report pfmemalloc drops, some protocol-specific socket implementations still use sk_filter() without drop reason tracking: - Bluetooth L2CAP sockets - CAIF sockets - IUCV sockets - Netlink sockets - SCTP sockets - Unix domain sockets These remaining cases represent less common paths and could be converted in a follow-up patch if needed. The current implementation provides significantly improved observability into memory pressure events in the network stack, especially for key protocols like TCP and UDP, helping to diagnose problems in production environments. Reported-by: Matt Fleming Signed-off-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/175268316579.2407873.11634752355644843509.stgit@firesoul Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 6 ++---- include/linux/filter.h | 14 ++++++++++++-- include/net/dropreason-core.h | 6 ++++++ include/net/tcp.h | 2 +- net/core/dev.c | 8 ++++++-- net/core/filter.c | 15 ++++++++++++--- net/core/sock.c | 20 +++++++++++++------- net/ipv4/tcp_ipv4.c | 26 +++++++++++++++----------- net/ipv4/udp.c | 6 ++---- net/ipv6/tcp_ipv6.c | 9 +++------ net/ipv6/udp.c | 4 +--- net/rose/rose_in.c | 3 ++- 12 files changed, 75 insertions(+), 44 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 49bcd12a4ac8..e65228ba3fae 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1002,8 +1002,8 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, /* Net device start xmit */ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { + enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct tun_struct *tun = netdev_priv(dev); - enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; @@ -1032,10 +1032,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) } if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + sk_filter_reason(tfile->socket.sk, skb, &drop_reason)) goto drop; - } len = run_ebpf_filter(tun, skb, len); if (len == 0) { diff --git a/include/linux/filter.h b/include/linux/filter.h index f5cf4d35d83e..4e82332afe03 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1073,10 +1073,20 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap, + enum skb_drop_reason *reason); + static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { - return sk_filter_trim_cap(sk, skb, 1); + enum skb_drop_reason ignore_reason; + + return sk_filter_trim_cap(sk, skb, 1, &ignore_reason); +} + +static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) +{ + return sk_filter_trim_cap(sk, skb, 1, reason); } struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 229bb1826f2a..e19184dd1b0f 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -125,6 +125,7 @@ FN(CAN_RX_INVALID_FRAME) \ FN(CANFD_RX_INVALID_FRAME) \ FN(CANXL_RX_INVALID_FRAME) \ + FN(PFMEMALLOC) \ FNe(MAX) /** @@ -598,6 +599,11 @@ enum skb_drop_reason { * non conform CAN-XL frame (or device is unable to receive CAN frames) */ SKB_DROP_REASON_CANXL_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_PFMEMALLOC: packet allocated from memory reserve + * reached a path or socket not eligible for use of memory reserves + */ + SKB_DROP_REASON_PFMEMALLOC, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/tcp.h b/include/net/tcp.h index bc08de49805c..b3815d104340 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1559,7 +1559,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); -int tcp_filter(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); int tcp_abort(struct sock *sk, int err); diff --git a/net/core/dev.c b/net/core/dev.c index 621a639aeba1..59a9089117de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5749,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct packet_type **ppt_prev) { + enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO; struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct sk_buff *skb = *pskb; @@ -5840,8 +5841,10 @@ skip_taps: #endif skb_reset_redirect(skb); skip_classify: - if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) + if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) { + drop_reason = SKB_DROP_REASON_PFMEMALLOC; goto drop; + } if (skb_vlan_tag_present(skb)) { if (pt_prev) { @@ -5946,7 +5949,8 @@ drop: dev_core_stats_rx_dropped_inc(skb->dev); else dev_core_stats_rx_nohandler_inc(skb->dev); - kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); + + kfree_skb_reason(skb, drop_reason); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ diff --git a/net/core/filter.c b/net/core/filter.c index 7a72f766aacf..2eb8947d8097 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -122,6 +122,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); * @sk: sock associated with &sk_buff * @skb: buffer to filter * @cap: limit on how short the eBPF program may trim the packet + * @reason: record drop reason on errors (negative return value) * * Run the eBPF program and then cut skb->data to correct size returned by * the program. If pkt_len is 0 we toss packet. If skb->len is smaller @@ -130,7 +131,8 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); * be accepted or -EPERM if the packet should be tossed. * */ -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, + unsigned int cap, enum skb_drop_reason *reason) { int err; struct sk_filter *filter; @@ -142,15 +144,20 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) */ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); + *reason = SKB_DROP_REASON_PFMEMALLOC; return -ENOMEM; } err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); - if (err) + if (err) { + *reason = SKB_DROP_REASON_SOCKET_FILTER; return err; + } err = security_sock_rcv_skb(sk, skb); - if (err) + if (err) { + *reason = SKB_DROP_REASON_SECURITY_HOOK; return err; + } rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -162,6 +169,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) pkt_len = bpf_prog_run_save_cb(filter->prog, skb); skb->sk = save_sk; err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; + if (err) + *reason = SKB_DROP_REASON_SOCKET_FILTER; } rcu_read_unlock(); diff --git a/net/core/sock.c b/net/core/sock.c index 8b7623c7d547..7c26ec8dce63 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -526,11 +526,10 @@ int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason drop_reason; int err; - err = sk_filter(sk, skb); - if (err) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + err = sk_filter_reason(sk, skb, &drop_reason); + if (err) goto out; - } + err = __sock_queue_rcv_skb(sk, skb); switch (err) { case -ENOMEM: @@ -553,15 +552,18 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; int rc = NET_RX_SUCCESS; + int err; - if (sk_filter_trim_cap(sk, skb, trim_cap)) + if (sk_filter_trim_cap(sk, skb, trim_cap, &reason)) goto discard_and_relse; skb->dev = NULL; if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); + reason = SKB_DROP_REASON_SOCKET_RCVBUFF; goto discard_and_relse; } if (nested) @@ -577,8 +579,12 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { + } else if ((err = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))) { bh_unlock_sock(sk); + if (err == -ENOMEM) + reason = SKB_DROP_REASON_PFMEMALLOC; + if (err == -ENOBUFS) + reason = SKB_DROP_REASON_SOCKET_BACKLOG; atomic_inc(&sk->sk_drops); goto discard_and_relse; } @@ -589,7 +595,7 @@ out: sock_put(sk); return rc; discard_and_relse: - kfree_skb(skb); + sk_skb_reason_drop(sk, skb, reason); goto out; } EXPORT_SYMBOL(__sk_receive_skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 16bf6fdff96b..84d3d556ed80 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2026,6 +2026,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, u32 gso_size; u64 limit; int delta; + int err; /* In case all data was pulled from skb frags (in __pskb_pull_tail()), * we can fix skb->truesize to its real value to avoid future drops. @@ -2136,21 +2137,27 @@ no_coalesce: limit = min_t(u64, limit, UINT_MAX); - if (unlikely(sk_add_backlog(sk, skb, limit))) { + err = sk_add_backlog(sk, skb, limit); + if (unlikely(err)) { bh_unlock_sock(sk); - *reason = SKB_DROP_REASON_SOCKET_BACKLOG; - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + if (err == -ENOMEM) { + *reason = SKB_DROP_REASON_PFMEMALLOC; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); + } else { + *reason = SKB_DROP_REASON_SOCKET_BACKLOG; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + } return true; } return false; } EXPORT_IPV6_MOD(tcp_add_backlog); -int tcp_filter(struct sock *sk, struct sk_buff *skb) +int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason) { struct tcphdr *th = (struct tcphdr *)skb->data; - return sk_filter_trim_cap(sk, skb, th->doff * 4); + return sk_filter_trim_cap(sk, skb, th->doff * 4, reason); } EXPORT_IPV6_MOD(tcp_filter); @@ -2277,14 +2284,12 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) { + if (!tcp_filter(sk, skb, &drop_reason)) { th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen, &drop_reason); - } else { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -2340,10 +2345,9 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (tcp_filter(sk, skb, &drop_reason)) goto discard_and_relse; - } + th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 49f43c54cfb0..cc3ce0f762ec 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2347,7 +2347,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { - int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; + enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -2436,10 +2436,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) goto drop; - } udp_csum_pull_header(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8f2c3cba1f1f..7577e7eb2c97 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1834,14 +1834,12 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) { + if (!tcp_filter(sk, skb, &drop_reason)) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen, &drop_reason); - } else { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -1897,10 +1895,9 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (tcp_filter(sk, skb, &drop_reason)) goto discard_and_relse; - } + th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6bbdadbd5fec..6a68f77da44b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -894,10 +894,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) goto drop; - } udp_csum_pull_header(skb); diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 4d67f36dce1b..3e99181e759f 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -101,6 +101,7 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety */ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m) { + enum skb_drop_reason dr; /* ignored */ struct rose_sock *rose = rose_sk(sk); int queued = 0; @@ -162,7 +163,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety rose_frames_acked(sk, nr); if (ns == rose->vr) { rose_start_idletimer(sk); - if (sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) == 0 && + if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN, &dr) && __sock_queue_rcv_skb(sk, skb) == 0) { rose->vr = (rose->vr + 1) % ROSE_MODULUS; queued = 1; -- cgit v1.2.3 From ffea1168346120df9417fcafd8f3a1c93033ae34 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:27 -0700 Subject: net: s/dev_get_port_parent_id/netif_get_port_parent_id/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- include/linux/netdevice.h | 4 ++-- net/bridge/br_switchdev.c | 2 +- net/core/dev.c | 25 +++++++++++++------------ net/core/net-sysfs.c | 2 +- net/core/rtnetlink.c | 2 +- net/ipv4/ipmr.c | 2 +- 7 files changed, 20 insertions(+), 19 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fef418e1ed1a..32c07a8b03d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5446,7 +5446,7 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_action_counter; } - err = dev_get_port_parent_id(priv->netdev, &ppid, false); + err = netif_get_port_parent_id(priv->netdev, &ppid, false); if (!err) { memcpy(&key, &ppid.id, sizeof(key)); mlx5_esw_offloads_devcom_init(esw, key); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e49d8c98d284..c6ba4ea66039 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4223,8 +4223,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid, bool recurse); +int netif_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 9a910cf0256e..fe3f7bbe86ee 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -837,7 +837,7 @@ int br_switchdev_port_offload(struct net_bridge_port *p, struct netdev_phys_item_id ppid; int err; - err = dev_get_port_parent_id(dev, &ppid, false); + err = netif_get_port_parent_id(dev, &ppid, false); if (err) return err; diff --git a/net/core/dev.c b/net/core/dev.c index 59a9089117de..4979a9197b18 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9844,16 +9844,17 @@ int dev_get_phys_port_name(struct net_device *dev, } /** - * dev_get_port_parent_id - Get the device's port parent identifier - * @dev: network device - * @ppid: pointer to a storage for the port's parent identifier - * @recurse: allow/disallow recursion to lower devices + * netif_get_port_parent_id() - Get the device's port parent identifier + * @dev: network device + * @ppid: pointer to a storage for the port's parent identifier + * @recurse: allow/disallow recursion to lower devices + * + * Get the devices's port parent identifier. * - * Get the devices's port parent identifier + * Return: 0 on success, -errno on failure. */ -int dev_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid, - bool recurse) +int netif_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse) { const struct net_device_ops *ops = dev->netdev_ops; struct netdev_phys_item_id first = { }; @@ -9872,7 +9873,7 @@ int dev_get_port_parent_id(struct net_device *dev, return err; netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = dev_get_port_parent_id(lower_dev, ppid, true); + err = netif_get_port_parent_id(lower_dev, ppid, true); if (err) break; if (!first.id_len) @@ -9883,7 +9884,7 @@ int dev_get_port_parent_id(struct net_device *dev, return err; } -EXPORT_SYMBOL(dev_get_port_parent_id); +EXPORT_SYMBOL(netif_get_port_parent_id); /** * netdev_port_same_parent_id - Indicate if two network devices have @@ -9896,8 +9897,8 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) struct netdev_phys_item_id a_id = { }; struct netdev_phys_item_id b_id = { }; - if (dev_get_port_parent_id(a, &a_id, true) || - dev_get_port_parent_id(b, &b_id, true)) + if (netif_get_port_parent_id(a, &a_id, true) || + netif_get_port_parent_id(b, &b_id, true)) return false; return netdev_phys_item_id_same(&a_id, &b_id); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f897e2c8b4f..f7a6cc7aea79 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -687,7 +687,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (ret) return ret; - ret = dev_get_port_parent_id(netdev, &ppid, false); + ret = netif_get_port_parent_id(netdev, &ppid, false); if (!ret) ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9555bfc372f..108995b6eced 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1448,7 +1448,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) struct netdev_phys_item_id ppid = { }; int err; - err = dev_get_port_parent_id(dev, &ppid, false); + err = netif_get_port_parent_id(dev, &ppid, false); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3a2044e6033d..e86a8a862c41 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -901,7 +901,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), (VIFF_TUNNEL | VIFF_REGISTER)); - err = dev_get_port_parent_id(dev, &ppid, true); + err = netif_get_port_parent_id(dev, &ppid, true); if (err == 0) { memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); v->dev_parent_id.id_len = ppid.id_len; -- cgit v1.2.3 From af1d017377c1c1931bfb898e719ab712cf79f944 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:28 -0700 Subject: net: s/dev_get_mac_address/netif_get_mac_address/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_get_mac_address is used only by tun/tap, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 5 +++-- drivers/net/tun.c | 3 ++- include/linux/netdevice.h | 2 +- net/core/dev.c | 4 ++-- net/core/dev_ioctl.c | 3 ++- net/core/net-sysfs.c | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d82eb7276a8b..1197f245e873 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1000,8 +1000,8 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return -ENOLINK; } ret = 0; - dev_get_mac_address((struct sockaddr *)&ss, dev_net(tap->dev), - tap->dev->name); + netif_get_mac_address((struct sockaddr *)&ss, dev_net(tap->dev), + tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || copy_to_user(&ifr->ifr_hwaddr, &ss, sizeof(ifr->ifr_hwaddr))) ret = -EFAULT; @@ -1282,3 +1282,4 @@ MODULE_DESCRIPTION("Common library for drivers implementing the TAP interface"); MODULE_AUTHOR("Arnd Bergmann "); MODULE_AUTHOR("Sainath Grandhi "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e65228ba3fae..cc6c50180663 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3223,7 +3223,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: /* Get hw address */ - dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); + netif_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; @@ -3732,3 +3732,4 @@ MODULE_AUTHOR(DRV_COPYRIGHT); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(TUN_MINOR); MODULE_ALIAS("devname:net/tun"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c6ba4ea66039..b3a48934b4cb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4222,7 +4222,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); +int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int netif_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); diff --git a/net/core/dev.c b/net/core/dev.c index 4979a9197b18..d71f03874057 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9765,7 +9765,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, DECLARE_RWSEM(dev_addr_sem); /* "sa" is a true struct sockaddr with limited "sa_data" member. */ -int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) +int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); struct net_device *dev; @@ -9791,7 +9791,7 @@ unlock: up_read(&dev_addr_sem); return ret; } -EXPORT_SYMBOL(dev_get_mac_address); +EXPORT_SYMBOL_NS_GPL(netif_get_mac_address, "NETDEV_INTERNAL"); int netif_change_carrier(struct net_device *dev, bool new_carrier) { diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 616479e71466..ceb2d63a818a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -728,7 +728,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, switch (cmd) { case SIOCGIFHWADDR: dev_load(net, ifr->ifr_name); - ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name); + ret = netif_get_mac_address(&ifr->ifr_hwaddr, net, + ifr->ifr_name); if (colon) *colon = ':'; return ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f7a6cc7aea79..e41ad1890e49 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -256,7 +256,7 @@ static ssize_t name_assign_type_show(struct device *dev, } static DEVICE_ATTR_RO(name_assign_type); -/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */ +/* use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) */ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { -- cgit v1.2.3 From 0413a34ef678c3e2f0fafb4e113e810a05197030 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:29 -0700 Subject: net: s/dev_pre_changeaddr_notify/netif_pre_changeaddr_notify/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_pre_changeaddr_notify is used only by ipvlan/bond, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 3 ++- drivers/net/ipvlan/ipvlan_main.c | 7 ++++--- include/linux/netdevice.h | 4 ++-- net/bridge/br.c | 7 ++++--- net/bridge/br_if.c | 3 ++- net/core/dev.c | 18 ++++++++++-------- net/core/dev_addr_lists.c | 2 +- 7 files changed, 25 insertions(+), 19 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 17c7542be6a5..d8281c486a44 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1040,7 +1040,7 @@ static int bond_set_dev_addr(struct net_device *bond_dev, slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", bond_dev, slave_dev, slave_dev->addr_len); - err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL); + err = netif_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL); if (err) return err; @@ -6743,3 +6743,4 @@ module_exit(bonding_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 0ed2fd833a5d..660f3db11766 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -784,9 +784,9 @@ static int ipvlan_device_event(struct notifier_block *unused, case NETDEV_PRE_CHANGEADDR: prechaddr_info = ptr; list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - err = dev_pre_changeaddr_notify(ipvlan->dev, - prechaddr_info->dev_addr, - extack); + err = netif_pre_changeaddr_notify(ipvlan->dev, + prechaddr_info->dev_addr, + extack); if (err) return notifier_from_errno(err); } @@ -1094,3 +1094,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mahesh Bandewar "); MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); MODULE_ALIAS_RTNL_LINK("ipvlan"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3a48934b4cb..55c5cd9d1929 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4214,8 +4214,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, int __dev_set_mtu(struct net_device *, int); int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); -int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, - struct netlink_ext_ack *extack); +int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr, + struct netlink_ext_ack *extack); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, diff --git a/net/bridge/br.c b/net/bridge/br.c index 0adeafe11a36..1885d0c315f0 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -74,9 +74,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v if (br->dev->addr_assign_type == NET_ADDR_SET) break; prechaddr_info = ptr; - err = dev_pre_changeaddr_notify(br->dev, - prechaddr_info->dev_addr, - extack); + err = netif_pre_changeaddr_notify(br->dev, + prechaddr_info->dev_addr, + extack); if (err) return notifier_from_errno(err); break; @@ -484,3 +484,4 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); MODULE_ALIAS_RTNL_LINK("bridge"); MODULE_DESCRIPTION("Ethernet bridge driver"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2450690f98cf..98c5b9c3145f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -668,7 +668,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, /* Ask for permission to use this MAC address now, even if we * don't end up choosing it below. */ - err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack); + err = netif_pre_changeaddr_notify(br->dev, dev->dev_addr, + extack); if (err) goto err6; } diff --git a/net/core/dev.c b/net/core/dev.c index d71f03874057..a47754fa7b15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9716,13 +9716,15 @@ void netif_set_group(struct net_device *dev, int new_group) } /** - * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR. - * @dev: device - * @addr: new address - * @extack: netlink extended ack + * netif_pre_changeaddr_notify() - Call NETDEV_PRE_CHANGEADDR. + * @dev: device + * @addr: new address + * @extack: netlink extended ack + * + * Return: 0 on success, -errno on failure. */ -int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, - struct netlink_ext_ack *extack) +int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr, + struct netlink_ext_ack *extack) { struct netdev_notifier_pre_changeaddr_info info = { .info.dev = dev, @@ -9734,7 +9736,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info); return notifier_to_errno(rc); } -EXPORT_SYMBOL(dev_pre_changeaddr_notify); +EXPORT_SYMBOL_NS_GPL(netif_pre_changeaddr_notify, "NETDEV_INTERNAL"); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack) @@ -9748,7 +9750,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev_pre_changeaddr_notify(dev, ss->__data, extack); + err = netif_pre_changeaddr_notify(dev, ss->__data, extack); if (err) return err; if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) { diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 90716bd736f3..76c91f224886 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -603,7 +603,7 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, ASSERT_RTNL(); - err = dev_pre_changeaddr_notify(dev, addr, NULL); + err = netif_pre_changeaddr_notify(dev, addr, NULL); if (err) return err; err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); -- cgit v1.2.3 From 303a8487a657c357ca6abc06a4045f72cdae90d5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:30 -0700 Subject: net: s/__dev_set_mtu/__netif_set_mtu/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. __netif_set_mtu is used only by bond, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-5-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 22 +++++++++++++--------- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d8281c486a44..257333c88710 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2669,7 +2669,7 @@ static int __bond_release_one(struct net_device *bond_dev, if (unregister) { netdev_lock_ops(slave_dev); - __dev_set_mtu(slave_dev, slave->original_mtu); + __netif_set_mtu(slave_dev, slave->original_mtu); netdev_unlock_ops(slave_dev); } else { dev_set_mtu(slave_dev, slave->original_mtu); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55c5cd9d1929..8978fbfbd644 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4211,7 +4211,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat); -int __dev_set_mtu(struct net_device *, int); +int __netif_set_mtu(struct net_device *dev, int new_mtu); int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr, diff --git a/net/core/dev.c b/net/core/dev.c index a47754fa7b15..a056f0dfc516 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9594,7 +9594,7 @@ int netif_change_flags(struct net_device *dev, unsigned int flags, return ret; } -int __dev_set_mtu(struct net_device *dev, int new_mtu) +int __netif_set_mtu(struct net_device *dev, int new_mtu) { const struct net_device_ops *ops = dev->netdev_ops; @@ -9605,7 +9605,7 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) WRITE_ONCE(dev->mtu, new_mtu); return 0; } -EXPORT_SYMBOL(__dev_set_mtu); +EXPORT_SYMBOL_NS_GPL(__netif_set_mtu, "NETDEV_INTERNAL"); int dev_validate_mtu(struct net_device *dev, int new_mtu, struct netlink_ext_ack *extack) @@ -9624,18 +9624,22 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu, } /** - * netif_set_mtu_ext - Change maximum transfer unit - * @dev: device - * @new_mtu: new transfer unit - * @extack: netlink extended ack + * netif_set_mtu_ext() - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * @extack: netlink extended ack * - * Change the maximum transfer size of the network device. + * Change the maximum transfer size of the network device. + * + * Return: 0 on success, -errno on failure. */ int netif_set_mtu_ext(struct net_device *dev, int new_mtu, struct netlink_ext_ack *extack) { int err, orig_mtu; + netdev_ops_assert_locked(dev); + if (new_mtu == dev->mtu) return 0; @@ -9652,7 +9656,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu, return err; orig_mtu = dev->mtu; - err = __dev_set_mtu(dev, new_mtu); + err = __netif_set_mtu(dev, new_mtu); if (!err) { err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, @@ -9662,7 +9666,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu, /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ - __dev_set_mtu(dev, orig_mtu); + __netif_set_mtu(dev, orig_mtu); call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, new_mtu); } -- cgit v1.2.3 From 93893a57efd431b9b4e72359bc8a8428681ca688 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:31 -0700 Subject: net: s/dev_get_flags/netif_get_flags/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- fs/smb/server/smb2pdu.c | 2 +- include/linux/netdevice.h | 2 +- net/8021q/vlan.c | 2 +- net/bridge/br_netlink.c | 2 +- net/core/dev.c | 10 +++++----- net/core/dev_ioctl.c | 2 +- net/core/rtnetlink.c | 4 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 2 +- net/ipv4/nexthop.c | 2 +- net/ipv6/addrconf.c | 2 +- net/mpls/af_mpls.c | 6 +++--- net/wireless/wext-core.c | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2331e698a65b..4f86b56fee26 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -65,7 +65,7 @@ static int rxe_query_port(struct ib_device *ibdev, attr->state = ib_get_curr_port_state(ndev); if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - else if (dev_get_flags(ndev) & IFF_UP) + else if (netif_get_flags(ndev) & IFF_UP) attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 63d17cea2e95..fca92d1fea22 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7847,7 +7847,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, if (!ksmbd_find_netdev_name_iface_list(netdev->name)) continue; - flags = dev_get_flags(netdev); + flags = netif_get_flags(netdev); if (!(flags & IFF_RUNNING)) continue; ipv6_retry: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8978fbfbd644..8370cd0f8f6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4196,7 +4196,7 @@ int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); -unsigned int dev_get_flags(const struct net_device *); +unsigned int netif_get_flags(const struct net_device *dev); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int netif_change_flags(struct net_device *dev, unsigned int flags, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9a6df8c1daf9..7ffd3386a842 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -483,7 +483,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { - flgs = dev_get_flags(vlandev); + flgs = netif_get_flags(vlandev); if (flgs & IFF_UP) continue; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6e337937d0d7..4e2d53b27221 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -479,7 +479,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, hdr->__ifi_pad = 0; hdr->ifi_type = dev->type; hdr->ifi_index = dev->ifindex; - hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_flags = netif_get_flags(dev); hdr->ifi_change = 0; if (nla_put_string(skb, IFLA_IFNAME, dev->name) || diff --git a/net/core/dev.c b/net/core/dev.c index a056f0dfc516..25905bbf1972 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9454,12 +9454,12 @@ void dev_set_rx_mode(struct net_device *dev) } /** - * dev_get_flags - get flags reported to userspace - * @dev: device + * netif_get_flags() - get flags reported to userspace + * @dev: device * - * Get the combination of flag bits exported through APIs to userspace. + * Get the combination of flag bits exported through APIs to userspace. */ -unsigned int dev_get_flags(const struct net_device *dev) +unsigned int netif_get_flags(const struct net_device *dev) { unsigned int flags; @@ -9482,7 +9482,7 @@ unsigned int dev_get_flags(const struct net_device *dev) return flags; } -EXPORT_SYMBOL(dev_get_flags); +EXPORT_SYMBOL(netif_get_flags); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index ceb2d63a818a..9c0ad7f4b5d8 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -147,7 +147,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm switch (cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (short) dev_get_flags(dev); + ifr->ifr_flags = (short)netif_get_flags(dev); return 0; case SIOCGIFMETRIC: /* Get the metric on the interface diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 108995b6eced..094b085cff20 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2038,7 +2038,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ifm->__ifi_pad = 0; ifm->ifi_type = READ_ONCE(dev->type); ifm->ifi_index = READ_ONCE(dev->ifindex); - ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_flags = netif_get_flags(dev); ifm->ifi_change = change; if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) @@ -5227,7 +5227,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, ifm->__ifi_pad = 0; ifm->ifi_type = dev->type; ifm->ifi_index = dev->ifindex; - ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_flags = netif_get_flags(dev); ifm->ifi_change = 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fd1e1507a224..6e1b94796f67 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1524,7 +1524,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_disable_ip(dev, event, false); break; case NETDEV_CHANGE: - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) fib_sync_up(dev, RTNH_F_LINKDOWN); else diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a2f04992f579..a5f3c8459758 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2087,7 +2087,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) return 0; if (nh_flags & RTNH_F_DEAD) { - unsigned int flags = dev_get_flags(dev); + unsigned int flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) nh_flags |= RTNH_F_LINKDOWN; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e808801ab9b8..29118c43ebf5 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3884,7 +3884,7 @@ static int nh_netdev_event(struct notifier_block *this, nexthop_flush_dev(dev, event); break; case NETDEV_CHANGE: - if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) + if (!(netif_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) nexthop_flush_dev(dev, event); break; case NETDEV_CHANGEMTU: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c85b1db74b1a..4f1d7d110302 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6072,7 +6072,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, hdr->ifi_type = dev->type; ifindex = READ_ONCE(dev->ifindex); hdr->ifi_index = ifindex; - hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_flags = netif_get_flags(dev); hdr->ifi_change = 0; iflink = dev_get_iflink(dev); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 47d7dfd9ad09..25c88cba5c48 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -706,7 +706,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, } else { unsigned int flags; - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) nh->nh_flags |= RTNH_F_LINKDOWN; } @@ -1616,14 +1616,14 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return notifier_from_errno(err); break; case NETDEV_UP: - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); else mpls_ifup(dev, RTNH_F_DEAD); break; case NETDEV_CHANGE: - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); } else { diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index bea70eb6f034..c32a7c6903d5 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -431,7 +431,7 @@ static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev, r->__ifi_pad = 0; r->ifi_type = dev->type; r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); + r->ifi_flags = netif_get_flags(dev); r->ifi_change = 0; /* Wireless changes don't affect those flags */ if (nla_put_string(skb, IFLA_IFNAME, dev->name)) -- cgit v1.2.3 From 5d4d84618e1aa2c9531afa3a6323f56e1db4dcf7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:32 -0700 Subject: net: s/dev_set_threaded/netif_set_threaded/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. Note that one dev_set_threaded call still remains in mt76 for debugfs file. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/net_device.rst | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/pci.c | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/wireguard/device.c | 2 +- drivers/net/wireless/ath/ath10k/snoc.c | 2 +- include/linux/netdevice.h | 1 + net/core/dev.c | 6 +++--- net/core/dev_api.c | 12 ++++++++++++ net/core/net-sysfs.c | 2 +- 10 files changed, 23 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index c69cc89c958e..2d3dc4692d20 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -165,7 +165,7 @@ struct sfp_bus* sfp_bus struct lock_class_key* qdisc_tx_busylock bool proto_down unsigned:1 wol_enabled -unsigned:1 threaded napi_poll(napi_enable,dev_set_threaded) +unsigned:1 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down unsigned_long:1 netns_immutable diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index ef1a51347351..3a9ad4a9c1cb 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2688,7 +2688,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->mii.mdio_write = atl1c_mdio_write; adapter->mii.phy_id_mask = 0x1f; adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK; - dev_set_threaded(netdev, true); + netif_set_threaded(netdev, true); for (i = 0; i < adapter->rx_queue_count; ++i) netif_napi_add(netdev, &adapter->rrd_ring[i].napi, atl1c_clean_rx); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 058dcabfaa2e..a2e97b712a3d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -156,7 +156,7 @@ static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) } strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", sizeof(mlxsw_pci->napi_dev_rx->name)); - dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + netif_set_threaded(mlxsw_pci->napi_dev_rx, true); return 0; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c9f4976a3527..4e79bf88688a 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3075,7 +3075,7 @@ static int ravb_probe(struct platform_device *pdev) if (info->coalesce_irqs) { netdev_sw_irq_coalesce_default_on(ndev); if (num_present_cpus() == 1) - dev_set_threaded(ndev, true); + netif_set_threaded(ndev, true); } /* Network device register */ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 4a529f1f9bea..5afec5a865f4 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -366,7 +366,7 @@ static int wg_newlink(struct net_device *dev, if (ret < 0) goto err_free_handshake_queue; - dev_set_threaded(dev, true); + netif_set_threaded(dev, true); ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index d51f2e5a79a4..0ee68d3dad12 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -936,7 +936,7 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) bitmap_clear(ar_snoc->pending_ce_irqs, 0, CE_COUNT_MAX); - dev_set_threaded(ar->napi_dev, true); + netif_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); /* IRQs are left enabled when we restart due to a firmware crash */ if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8370cd0f8f6b..7929ddfd4433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -589,6 +589,7 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } +int netif_set_threaded(struct net_device *dev, bool threaded); int dev_set_threaded(struct net_device *dev, bool threaded); void napi_disable(struct napi_struct *n); diff --git a/net/core/dev.c b/net/core/dev.c index 25905bbf1972..a22f26997b94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4798,7 +4798,7 @@ static inline void ____napi_schedule(struct softnet_data *sd, if (test_bit(NAPI_STATE_THREADED, &napi->state)) { /* Paired with smp_mb__before_atomic() in - * napi_enable()/dev_set_threaded(). + * napi_enable()/netif_set_threaded(). * Use READ_ONCE() to guarantee a complete * read on napi->thread. Only call * wake_up_process() when it's not NULL. @@ -6990,7 +6990,7 @@ int napi_set_threaded(struct napi_struct *napi, bool threaded) return 0; } -int dev_set_threaded(struct net_device *dev, bool threaded) +int netif_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; int err = 0; @@ -7031,7 +7031,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded) return err; } -EXPORT_SYMBOL(dev_set_threaded); +EXPORT_SYMBOL(netif_set_threaded); /** * netif_queue_set_napi - Associate queue with the napi diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 1bf0153195f2..dd7f57013ce5 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -367,3 +367,15 @@ void netdev_state_change(struct net_device *dev) netdev_unlock_ops(dev); } EXPORT_SYMBOL(netdev_state_change); + +int dev_set_threaded(struct net_device *dev, bool threaded) +{ + int ret; + + netdev_lock(dev); + ret = netif_set_threaded(dev, threaded); + netdev_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_threaded); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e41ad1890e49..c28cd6665444 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -757,7 +757,7 @@ static int modify_napi_threaded(struct net_device *dev, unsigned long val) if (val != 0 && val != 1) return -EOPNOTSUPP; - ret = dev_set_threaded(dev, val); + ret = netif_set_threaded(dev, val); return ret; } -- cgit v1.2.3 From 88d3cec28274f9c15355835466c0c694e313680e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 17 Jul 2025 10:23:33 -0700 Subject: net: s/dev_close_many/netif_close_many/ Commit cc34acd577f1 ("docs: net: document new locking reality") introduced netif_ vs dev_ function semantics: the former expects locked netdev, the latter takes care of the locking. We don't strictly follow this semantics on either side, but there are more dev_xxx handlers now that don't fit. Rename them to netif_xxx where appropriate. netif_close_many is used only by vlan/dsa and one mtk driver, so move it into NETDEV_INTERNAL namespace. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250717172333.1288349-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- include/linux/netdevice.h | 2 +- net/8021q/vlan.c | 3 ++- net/core/dev.c | 10 +++++----- net/dsa/dsa.c | 3 ++- net/dsa/user.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 11ee7e1829bf..5a5fcde76dc0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4967,7 +4967,7 @@ void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev) list_add_tail(&dev->close_list, &dev_list); } - dev_close_many(&dev_list, false); + netif_close_many(&dev_list, false); eth->dma_dev = dma_dev; @@ -5610,3 +5610,4 @@ module_platform_driver(mtk_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Crispin "); MODULE_DESCRIPTION("Ethernet driver for MediaTek SoC"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7929ddfd4433..5aee8d3895f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3343,7 +3343,7 @@ int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); -void dev_close_many(struct list_head *head, bool unlink); +void netif_close_many(struct list_head *head, bool unlink); void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 7ffd3386a842..fda3a80e9340 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -470,7 +470,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, list_add(&vlandev->close_list, &close_list); } - dev_close_many(&close_list, false); + netif_close_many(&close_list, false); list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { vlan_stacked_transfer_operstate(dev, vlandev, @@ -765,3 +765,4 @@ module_exit(vlan_cleanup_module); MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/net/core/dev.c b/net/core/dev.c index a22f26997b94..354d3453b407 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1768,7 +1768,7 @@ static void __dev_close(struct net_device *dev) list_del(&single); } -void dev_close_many(struct list_head *head, bool unlink) +void netif_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1786,7 +1786,7 @@ void dev_close_many(struct list_head *head, bool unlink) list_del_init(&dev->close_list); } } -EXPORT_SYMBOL(dev_close_many); +EXPORT_SYMBOL_NS_GPL(netif_close_many, "NETDEV_INTERNAL"); void netif_close(struct net_device *dev) { @@ -1794,7 +1794,7 @@ void netif_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single, true); + netif_close_many(&single, true); list_del(&single); } } @@ -12099,7 +12099,7 @@ void unregister_netdevice_many_notify(struct list_head *head, netdev_lock(dev); } } - dev_close_many(&close_head, true); + netif_close_many(&close_head, true); /* ... now unlock them and go over the rest. */ list_for_each_entry(dev, head, unreg_list) { if (netdev_need_ops_lock(dev)) @@ -12107,7 +12107,7 @@ void unregister_netdevice_many_notify(struct list_head *head, else list_add_tail(&dev->close_list, &close_head); } - dev_close_many(&close_head, true); + netif_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 436a7e1b412a..5b01a0e43ebe 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1621,7 +1621,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) dsa_switch_for_each_cpu_port(dp, ds) list_add(&dp->conduit->close_list, &close_list); - dev_close_many(&close_list, true); + netif_close_many(&close_list, true); dsa_switch_for_each_user_port(dp, ds) { conduit = dsa_port_to_conduit(dp); @@ -1829,3 +1829,4 @@ MODULE_AUTHOR("Lennert Buytenhek "); MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:dsa"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/net/dsa/user.c b/net/dsa/user.c index e9334520c54a..f59d66f0975d 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -3604,7 +3604,7 @@ static int dsa_user_netdevice_event(struct notifier_block *nb, list_add(&dp->user->close_list, &close_list); } - dev_close_many(&close_list, true); + netif_close_many(&close_list, true); return NOTIFY_OK; } -- cgit v1.2.3 From 71c52411c51bf4f0869c572294ce8123b26528d5 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:29 +0000 Subject: net: Create separate gro_flush_normal function Move multiple copies of same code snippet doing `gro_flush` and `gro_normal_list` into separate helper function. Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250723013031.2911384-2-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/net/gro.h | 6 ++++++ kernel/bpf/cpumap.c | 3 +-- net/core/dev.c | 9 +++------ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/net/gro.h b/include/net/gro.h index 22d3a69e4404..a0fca7ac6e7e 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -534,6 +534,12 @@ static inline void gro_normal_list(struct gro_node *gro) gro->rx_count = 0; } +static inline void gro_flush_normal(struct gro_node *gro, bool flush_old) +{ + gro_flush(gro, flush_old); + gro_normal_list(gro); +} + /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 67e8a2fc1a99..b2b7b8ec2c2a 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -282,8 +282,7 @@ static void cpu_map_gro_flush(struct bpf_cpu_map_entry *rcpu, bool empty) * This is equivalent to how NAPI decides whether to perform a full * flush. */ - gro_flush(&rcpu->gro, !empty && HZ >= 1000); - gro_normal_list(&rcpu->gro); + gro_flush_normal(&rcpu->gro, !empty && HZ >= 1000); } static int cpu_map_kthread_run(void *data) diff --git a/net/core/dev.c b/net/core/dev.c index 354d3453b407..76384b8a7871 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6578,8 +6578,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) * it, we need to bound somehow the time packets are kept in * the GRO layer. */ - gro_flush(&n->gro, !!timeout); - gro_normal_list(&n->gro); + gro_flush_normal(&n->gro, !!timeout); if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ @@ -6649,8 +6648,7 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) } /* Flush too old packets. If HZ < 1000, flush all packets */ - gro_flush(&napi->gro, HZ >= 1000); - gro_normal_list(&napi->gro); + gro_flush_normal(&napi->gro, HZ >= 1000); clear_bit(NAPI_STATE_SCHED, &napi->state); } @@ -7515,8 +7513,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) } /* Flush too old packets. If HZ < 1000, flush all packets */ - gro_flush(&n->gro, HZ >= 1000); - gro_normal_list(&n->gro); + gro_flush_normal(&n->gro, HZ >= 1000); /* Some drivers may have called napi_schedule * prior to exhausting their budget. -- cgit v1.2.3 From 78afdadafe6fe0c74c08fda156e7be0a0b402b90 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:30 +0000 Subject: net: Use netif_threaded_enable instead of netif_set_threaded in drivers Prepare for adding an enum type for NAPI threaded states by adding netif_threaded_enable API. De-export the existing netif_set_threaded API and only use it internally. Update existing drivers to use netif_threaded_enable instead of the de-exported netif_set_threaded. Note that dev_set_threaded used by mt76 debugfs file is unchanged. Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-3-skhawaja@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/pci.c | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/wireguard/device.c | 2 +- drivers/net/wireless/ath/ath10k/snoc.c | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 18 +++++++++++++++++- net/core/dev.h | 2 ++ 8 files changed, 25 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 3a9ad4a9c1cb..7efa3fc257b3 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2688,7 +2688,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->mii.mdio_write = atl1c_mdio_write; adapter->mii.phy_id_mask = 0x1f; adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK; - netif_set_threaded(netdev, true); + netif_threaded_enable(netdev); for (i = 0; i < adapter->rx_queue_count; ++i) netif_napi_add(netdev, &adapter->rrd_ring[i].napi, atl1c_clean_rx); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index a2e97b712a3d..8769cba2c746 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -156,7 +156,7 @@ static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) } strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", sizeof(mlxsw_pci->napi_dev_rx->name)); - netif_set_threaded(mlxsw_pci->napi_dev_rx, true); + netif_threaded_enable(mlxsw_pci->napi_dev_rx); return 0; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4e79bf88688a..94b6fb94f8f1 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3075,7 +3075,7 @@ static int ravb_probe(struct platform_device *pdev) if (info->coalesce_irqs) { netdev_sw_irq_coalesce_default_on(ndev); if (num_present_cpus() == 1) - netif_set_threaded(ndev, true); + netif_threaded_enable(ndev); } /* Network device register */ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 5afec5a865f4..813bd10d3dc7 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -366,7 +366,7 @@ static int wg_newlink(struct net_device *dev, if (ret < 0) goto err_free_handshake_queue; - netif_set_threaded(dev, true); + netif_threaded_enable(dev); ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 0ee68d3dad12..f0713bd36173 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -936,7 +936,7 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) bitmap_clear(ar_snoc->pending_ce_irqs, 0, CE_COUNT_MAX); - netif_set_threaded(ar->napi_dev, true); + netif_threaded_enable(ar->napi_dev); ath10k_core_napi_enable(ar); /* IRQs are left enabled when we restart due to a firmware crash */ if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5aee8d3895f4..a97c9a337d6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -589,7 +589,7 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -int netif_set_threaded(struct net_device *dev, bool threaded); +void netif_threaded_enable(struct net_device *dev); int dev_set_threaded(struct net_device *dev, bool threaded); void napi_disable(struct napi_struct *n); diff --git a/net/core/dev.c b/net/core/dev.c index 76384b8a7871..f28661d6f5ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7029,7 +7029,23 @@ int netif_set_threaded(struct net_device *dev, bool threaded) return err; } -EXPORT_SYMBOL(netif_set_threaded); + +/** + * netif_threaded_enable() - enable threaded NAPIs + * @dev: net_device instance + * + * Enable threaded mode for the NAPI instances of the device. This may be useful + * for devices where multiple NAPI instances get scheduled by a single + * interrupt. Threaded NAPI allows moving the NAPI processing to cores other + * than the core where IRQ is mapped. + * + * This function should be called before @dev is registered. + */ +void netif_threaded_enable(struct net_device *dev) +{ + WARN_ON_ONCE(netif_set_threaded(dev, true)); +} +EXPORT_SYMBOL(netif_threaded_enable); /** * netif_queue_set_napi - Associate queue with the napi diff --git a/net/core/dev.h b/net/core/dev.h index a603387fb566..f5b567310908 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -322,6 +322,8 @@ static inline bool napi_get_threaded(struct napi_struct *n) int napi_set_threaded(struct napi_struct *n, bool threaded); +int netif_set_threaded(struct net_device *dev, bool threaded); + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) -- cgit v1.2.3 From 8e7583a4f65f3dbf3e8deb4e60f3679c276bef62 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:31 +0000 Subject: net: define an enum for the napi threaded state Instead of using '0' and '1' for napi threaded state use an enum with 'disabled' and 'enabled' states. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-4-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 13 +++++--- .../networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 10 +++--- include/uapi/linux/netdev.h | 5 +++ net/core/dev.c | 12 +++++--- net/core/dev.h | 13 +++++--- net/core/dev_api.c | 3 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl.c | 2 +- tools/include/uapi/linux/netdev.h | 5 +++ tools/testing/selftests/net/nl_netdev.py | 36 +++++++++++----------- 11 files changed, 62 insertions(+), 41 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 85d0ea6ac426..c035dc0f64fd 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -85,6 +85,10 @@ definitions: name: qstats-scope type: flags entries: [queue] + - + name: napi-threaded + type: enum + entries: [disabled, enabled] attribute-sets: - @@ -286,11 +290,10 @@ attribute-sets: - name: threaded doc: Whether the NAPI is configured to operate in threaded polling - mode. If this is set to 1 then the NAPI context operates in - threaded polling mode. - type: uint - checks: - max: 1 + mode. If this is set to enabled then the NAPI context operates + in threaded polling mode. + type: u32 + enum: napi-threaded - name: xsk-info attributes: [] diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 2d3dc4692d20..1c19bb7705df 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -68,6 +68,7 @@ unsigned_char addr_assign_type unsigned_char addr_len unsigned_char upper_level unsigned_char lower_level +u8 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_short neigh_priv_len unsigned_short padded unsigned_short dev_id @@ -165,7 +166,6 @@ struct sfp_bus* sfp_bus struct lock_class_key* qdisc_tx_busylock bool proto_down unsigned:1 wol_enabled -unsigned:1 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down unsigned_long:1 netns_immutable diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a97c9a337d6b..5e5de4b0a433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,7 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; - bool threaded; + u8 threaded; unsigned int napi_id; }; @@ -590,7 +590,8 @@ static inline bool napi_complete(struct napi_struct *n) } void netif_threaded_enable(struct net_device *dev); -int dev_set_threaded(struct net_device *dev, bool threaded); +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); void napi_disable(struct napi_struct *n); void napi_disable_locked(struct napi_struct *n); @@ -1872,6 +1873,7 @@ enum netdev_reg_state { * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. + * @threaded: napi threaded state. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address @@ -2011,8 +2013,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @threaded: napi threaded mode is enabled - * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by @@ -2248,6 +2248,7 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + u8 threaded; unsigned short neigh_priv_len; unsigned short dev_id; @@ -2429,7 +2430,6 @@ struct net_device { struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; - bool threaded; bool irq_affinity_auto; bool rx_cpu_rmap_auto; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/net/core/dev.c b/net/core/dev.c index f28661d6f5ea..1c6e755841ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6963,7 +6963,8 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } -int napi_set_threaded(struct napi_struct *napi, bool threaded) +int napi_set_threaded(struct napi_struct *napi, + enum netdev_napi_threaded threaded) { if (threaded) { if (!napi->thread) { @@ -6988,7 +6989,8 @@ int napi_set_threaded(struct napi_struct *napi, bool threaded) return 0; } -int netif_set_threaded(struct net_device *dev, bool threaded) +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { struct napi_struct *napi; int err = 0; @@ -7000,7 +7002,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) if (!napi->thread) { err = napi_kthread_create(napi); if (err) { - threaded = false; + threaded = NETDEV_NAPI_THREADED_DISABLED; break; } } @@ -7043,7 +7045,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) */ void netif_threaded_enable(struct net_device *dev) { - WARN_ON_ONCE(netif_set_threaded(dev, true)); + WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED)); } EXPORT_SYMBOL(netif_threaded_enable); @@ -7360,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, * threaded mode will not be enabled in napi_enable(). */ if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = false; + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index f5b567310908..ab69edc0c3e3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,14 +315,19 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } -static inline bool napi_get_threaded(struct napi_struct *n) +static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) { - return test_bit(NAPI_STATE_THREADED, &n->state); + if (test_bit(NAPI_STATE_THREADED, &n->state)) + return NETDEV_NAPI_THREADED_ENABLED; + + return NETDEV_NAPI_THREADED_DISABLED; } -int napi_set_threaded(struct napi_struct *n, bool threaded); +int napi_set_threaded(struct napi_struct *n, + enum netdev_napi_threaded threaded); -int netif_set_threaded(struct net_device *dev, bool threaded); +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); int rps_cpumask_housekeeping(struct cpumask *mask); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index dd7f57013ce5..f28852078aa6 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -368,7 +368,8 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -int dev_set_threaded(struct net_device *dev, bool threaded) +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { int ret; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 0994bd68a7e6..e9a2a6f26cb7 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -97,7 +97,7 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, - [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1), }; /* NETDEV_CMD_BIND_TX - do */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5875df372415..6314eb7bdf69 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -333,7 +333,7 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) int ret; threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); - ret = napi_set_threaded(napi, !!threaded); + ret = napi_set_threaded(napi, threaded); if (ret) return ret; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c8ffade79a52..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -52,14 +52,14 @@ def napi_set_threaded(nf) -> None: napi1_id = napis[1]['id'] # set napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 1}) + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) ip(f"link set dev {nsim.ifname} down") @@ -67,18 +67,18 @@ def napi_set_threaded(nf) -> None: # verify if napi threaded is still set napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is still not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # unset napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 0}) + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) # set threaded at device level @@ -86,10 +86,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded at device level @@ -97,16 +97,16 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # set napi threaded for napi0 nf.napi_set({'id': napi0_id, 'threaded': 1}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # unset threaded at device level @@ -114,10 +114,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def dev_set_threaded(nf) -> None: @@ -141,10 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded @@ -152,10 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: -- cgit v1.2.3