From ffb7ed19ac0a9fa9ea79af1d7b42c03a10da98a5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 5 Mar 2025 08:37:25 -0800 Subject: net: hold netdev instance lock during ioctl operations Convert all ndo_eth_ioctl invocations to dev_eth_ioctl which does the locking. Reflow some of the dev_siocxxx to drop else clause. Cc: Saeed Mahameed Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250305163732.2766420-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/core/dev_ioctl.c | 67 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'net/core/dev_ioctl.c') diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4c2098ac9d72..d9f350593121 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -110,7 +110,7 @@ static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) return 0; } -static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +static int netif_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; @@ -240,20 +240,6 @@ int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) return 0; } -static int dev_eth_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if (!ops->ndo_eth_ioctl) - return -EOPNOTSUPP; - - if (!netif_device_present(dev)) - return -ENODEV; - - return ops->ndo_eth_ioctl(dev, ifr, cmd); -} - /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY @@ -305,7 +291,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) return -ENODEV; kernel_cfg.ifr = ifr; + netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); + netdev_unlock_ops(dev); if (err) return err; @@ -429,7 +417,9 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (!netif_device_present(dev)) return -ENODEV; + netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); + netdev_unlock_ops(dev); if (err) return err; @@ -504,10 +494,14 @@ static int dev_siocbond(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocbond(dev, ifr, cmd); - else - return -ENODEV; + ret = ops->ndo_siocbond(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -519,10 +513,14 @@ static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocdevprivate(dev, ifr, data, cmd); - else - return -ENODEV; + ret = ops->ndo_siocdevprivate(dev, ifr, data, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -533,10 +531,14 @@ static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocwandev(dev, ifs); - else - return -ENODEV; + ret = ops->ndo_siocwandev(dev, ifs); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -580,11 +582,16 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); + netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + netdev_unlock_ops(dev); return 0; case SIOCSIFMAP: - return dev_setifmap(dev, ifr); + netdev_lock_ops(dev); + err = netif_setifmap(dev, ifr); + netdev_unlock_ops(dev); + return err; case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || @@ -592,7 +599,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || @@ -600,7 +610,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) -- cgit v1.2.3 From df43d8bf10316a7c3b1e47e3cc0057a54df4a5b8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 5 Mar 2025 08:37:29 -0800 Subject: net: replace dev_addr_sem with netdev instance lock Lockdep reports possible circular dependency in [0]. Instead of fixing the ordering, replace global dev_addr_sem with netdev instance lock. Most of the paths that set/get mac are RTNL protected. Two places where it's not, convert to explicit locking: - sysfs address_show - dev_get_mac_address via dev_ioctl 0: https://netdev-3.bots.linux.dev/vmksft-forwarding-dbg/results/993321/24-router-bridge-1d-lag-sh/stderr Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250305163732.2766420-12-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 2 +- drivers/net/tun.c | 2 +- include/linux/netdevice.h | 6 +----- net/core/dev.c | 52 ++++++++++++++++++++++++----------------------- net/core/dev.h | 3 +-- net/core/dev_api.c | 17 ++-------------- net/core/dev_ioctl.c | 2 +- net/core/net-sysfs.c | 7 ++----- net/core/rtnetlink.c | 6 +++++- 9 files changed, 41 insertions(+), 56 deletions(-) (limited to 'net/core/dev_ioctl.c') diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d4ece538f1b2..4382f5e323b0 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1017,7 +1017,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address_user(tap->dev, &sa, NULL); + ret = dev_set_mac_address(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d8f4d3e996a7..1e645d5e225c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3175,7 +3175,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca9c09dab14e..f3e6e6f27e22 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2492,7 +2492,7 @@ struct net_device { * * Protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, - * @net_shaper_hierarchy, @reg_state, @threaded + * @net_shaper_hierarchy, @reg_state, @threaded, @dev_addr * * Partially protects (writers must hold both @lock and rtnl_lock): * @up @@ -4262,10 +4262,6 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); -int netif_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); diff --git a/net/core/dev.c b/net/core/dev.c index 404047d4d943..a0f75a1d1f5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1058,6 +1058,28 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) return __netdev_put_lock(dev); } +/** + * netdev_get_by_name_lock() - find a device by its name + * @net: the applicable net namespace + * @name: name of device + * + * Search for an interface by name. If a valid device + * with @name is found it will be returned with netdev->lock held. + * netdev_unlock() must be called to release it. + * + * Return: pointer to a device with lock held, NULL if not found. + */ +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name) +{ + struct net_device *dev; + + dev = dev_get_by_name(net, name); + if (!dev) + return NULL; + + return __netdev_put_lock(dev); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) @@ -9589,44 +9611,24 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, return 0; } -DECLARE_RWSEM(dev_addr_sem); - -int netif_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) -{ - int ret; - - down_write(&dev_addr_sem); - ret = netif_set_mac_address(dev, sa, extack); - up_write(&dev_addr_sem); - return ret; -} - int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); struct net_device *dev; - int ret = 0; - down_read(&dev_addr_sem); - rcu_read_lock(); + dev = netdev_get_by_name_lock(net, dev_name); + if (!dev) + return -ENODEV; - dev = dev_get_by_name_rcu(net, dev_name); - if (!dev) { - ret = -ENODEV; - goto unlock; - } if (!dev->addr_len) memset(sa->sa_data, 0, size); else memcpy(sa->sa_data, dev->dev_addr, min_t(size_t, size, dev->addr_len)); sa->sa_family = dev->type; + netdev_unlock(dev); -unlock: - rcu_read_unlock(); - up_read(&dev_addr_sem); - return ret; + return 0; } EXPORT_SYMBOL(dev_get_mac_address); diff --git a/net/core/dev.h b/net/core/dev.h index 41b0831aba60..b50ca645c086 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -28,6 +28,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name); struct net_device *__netdev_put_lock(struct net_device *dev); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, @@ -69,8 +70,6 @@ extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; -extern struct rw_semaphore dev_addr_sem; - /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 98db990ce21c..655a95fb7baa 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -82,19 +82,6 @@ void dev_set_group(struct net_device *dev, int new_group) netdev_unlock_ops(dev); } -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) -{ - int ret; - - netdev_lock_ops(dev); - ret = netif_set_mac_address_user(dev, sa, extack); - netdev_unlock_ops(dev); - - return ret; -} -EXPORT_SYMBOL(dev_set_mac_address_user); - /** * dev_change_net_namespace() - move device to different nethost namespace * @dev: device @@ -310,9 +297,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, { int ret; - netdev_lock_ops(dev); + netdev_lock(dev); ret = netif_set_mac_address(dev, sa, extack); - netdev_unlock_ops(dev); + netdev_unlock(dev); return ret; } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index d9f350593121..296e52d1395d 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -574,7 +574,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 27eea34d1b41..02d1d40b47ae 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -262,14 +262,11 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - down_read(&dev_addr_sem); - - rcu_read_lock(); + netdev_lock(ndev); if (dev_isalive(ndev)) ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); - rcu_read_unlock(); + netdev_unlock(ndev); - up_read(&dev_addr_sem); return ret; } static DEVICE_ATTR_RO(address); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9d539c9ce1a4..88a352b02bce 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3089,7 +3089,11 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = netif_set_mac_address_user(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + err = netif_set_mac_address(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); kfree(sa); if (err) goto errout; -- cgit v1.2.3 From 8ef890df4031121a94407c84659125cbccd3fdbe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Mar 2025 10:30:06 -0800 Subject: net: move misc netdev_lock flavors to a separate header Move the more esoteric helpers for netdev instance lock to a dedicated header. This avoids growing netdevice.h to infinity and makes rebuilding the kernel much faster (after touching the header with the helpers). The main netdev_lock() / netdev_unlock() functions are used in static inlines in netdevice.h and will probably be used most commonly, so keep them in netdevice.h. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250307183006.2312761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 1 + drivers/net/dummy.c | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + drivers/net/geneve.c | 1 + drivers/net/hamradio/bpqether.c | 1 + drivers/net/hyperv/netvsc_drv.c | 1 + drivers/net/ipvlan/ipvlan_main.c | 1 + drivers/net/loopback.c | 1 + drivers/net/macsec.c | 1 + drivers/net/macvlan.c | 1 + drivers/net/netdevsim/netdev.c | 1 + drivers/net/ppp/ppp_generic.c | 1 + drivers/net/team/team_core.c | 1 + drivers/net/veth.c | 1 + drivers/net/vrf.c | 1 + drivers/net/vxlan/vxlan_core.c | 1 + include/linux/netdevice.h | 81 +----------------------- include/net/netdev_lock.h | 89 +++++++++++++++++++++++++++ kernel/bpf/offload.c | 1 + net/8021q/vlan_dev.c | 1 + net/bluetooth/6lowpan.c | 1 + net/bridge/br_device.c | 2 + net/core/dev.c | 1 + net/core/dev.h | 1 + net/core/dev_api.c | 2 + net/core/dev_ioctl.c | 1 + net/core/net-sysfs.c | 1 + net/core/rtnetlink.c | 1 + net/dsa/conduit.c | 1 + net/ethtool/cabletest.c | 1 + net/ethtool/cmis_fw_update.c | 1 + net/ethtool/features.c | 2 + net/ethtool/ioctl.c | 1 + net/ethtool/module.c | 1 + net/ethtool/netlink.c | 1 + net/ethtool/phy.c | 1 + net/ethtool/rss.c | 2 + net/ethtool/tsinfo.c | 1 + net/ieee802154/6lowpan/core.c | 1 + net/ipv4/ip_tunnel.c | 1 + net/ipv6/ip6_gre.c | 1 + net/ipv6/ip6_tunnel.c | 1 + net/ipv6/ip6_vti.c | 1 + net/ipv6/sit.c | 1 + net/l2tp/l2tp_eth.c | 1 + net/sched/sch_api.c | 1 + net/xdp/xsk.c | 1 + net/xdp/xsk_buff_pool.c | 1 + 51 files changed, 143 insertions(+), 80 deletions(-) create mode 100644 include/net/netdev_lock.h (limited to 'net/core/dev_ioctl.c') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cf0b02720dd8..6c95f478ab80 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -90,6 +90,7 @@ #include #endif #include +#include #include #include "bonding_priv.h" diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 005d79975f3b..a4938c6a5ebb 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1a1e6da77777..b09171110ec4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 0caf6e9bccb8..cf2b3ad75c9b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "bnxt_hsi.h" #include "bnxt.h" diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 032e1a58af6f..6d7ba4d67a19 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include "iavf.h" #include "iavf_ptp.h" diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 0411a1897f57..2d826077d38c 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -13,6 +13,7 @@ #include #include +#include #include #include diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2c65f867fd31..66e38ce9cd1d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #define GENEVE_NETDEV_VER "0.6" diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index f6b0bfbbc753..0e0fe32d2da4 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -77,6 +77,7 @@ #include #include +#include #include #include diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9c6501bf27bd..c51b318b8a72 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index b56144ca2fde..0ed2fd833a5d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -3,6 +3,7 @@ */ #include +#include #include "ipvlan.h" diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 201fddcd3b1e..1fb6ce6843ad 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -54,6 +54,7 @@ #include #include #include +#include #include /* blackhole_netdev - a device used for dsts that are marked expired! diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 4de5d63fd577..3d315e30ee47 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4e9d54be887c..d0dfa6bca6cc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 54d03b0628d2..d71fd2907cc8 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index ca77661688c0..53463767cc43 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index fb917560d0a2..d8fc0c79745d 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 05f5eeef539f..7bb53961c0ea 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 36cf6191335e..7168b33adadb 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 227d7f5a302a..8c49e903cb3a 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d206c9592b60..9a297757df7e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2630,40 +2630,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, - const struct lockdep_map *b) -{ - /* Only lower devices currently grab the instance lock, so no - * real ordering issues can occur. In the near future, only - * hardware devices will grab instance lock which also does not - * involve any ordering. Suppress lockdep ordering warnings - * until (if) we start grabbing instance lock on pure SW - * devices (bond/team/veth/etc). - */ - if (a == b) - return 0; - return -1; -} - -#define netdev_lockdep_set_classes(dev) \ -{ \ - static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - static struct lock_class_key dev_instance_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - lockdep_set_class(&(dev)->lock, \ - &dev_instance_lock_key); \ - lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ - lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ - &qdisc_xmit_lock_key); \ -} - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, @@ -2765,56 +2731,11 @@ static inline void netdev_lock(struct net_device *dev) mutex_lock(&dev->lock); } -static inline bool netdev_trylock(struct net_device *dev) -{ - return mutex_trylock(&dev->lock); -} - static inline void netdev_unlock(struct net_device *dev) { mutex_unlock(&dev->lock); } - -static inline void netdev_assert_locked(struct net_device *dev) -{ - lockdep_assert_held(&dev->lock); -} - -static inline void netdev_assert_locked_or_invisible(struct net_device *dev) -{ - if (dev->reg_state == NETREG_REGISTERED || - dev->reg_state == NETREG_UNREGISTERING) - netdev_assert_locked(dev); -} - -static inline bool netdev_need_ops_lock(struct net_device *dev) -{ - bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; - -#if IS_ENABLED(CONFIG_NET_SHAPER) - ret |= !!dev->netdev_ops->net_shaper_ops; -#endif - - return ret; -} - -static inline void netdev_lock_ops(struct net_device *dev) -{ - if (netdev_need_ops_lock(dev)) - netdev_lock(dev); -} - -static inline void netdev_unlock_ops(struct net_device *dev) -{ - if (netdev_need_ops_lock(dev)) - netdev_unlock(dev); -} - -static inline void netdev_ops_assert_locked(struct net_device *dev) -{ - if (netdev_need_ops_lock(dev)) - lockdep_assert_held(&dev->lock); -} +/* Additional netdev_lock()-related helpers are in net/netdev_lock.h */ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h new file mode 100644 index 000000000000..99631fbd7f54 --- /dev/null +++ b/include/net/netdev_lock.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_NETDEV_LOCK_H +#define _NET_NETDEV_LOCK_H + +#include +#include + +static inline bool netdev_trylock(struct net_device *dev) +{ + return mutex_trylock(&dev->lock); +} + +static inline void netdev_assert_locked(struct net_device *dev) +{ + lockdep_assert_held(&dev->lock); +} + +static inline void netdev_assert_locked_or_invisible(struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_assert_locked(dev); +} + +static inline bool netdev_need_ops_lock(struct net_device *dev) +{ + bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + ret |= !!dev->netdev_ops->net_shaper_ops; +#endif + + return ret; +} + +static inline void netdev_lock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_lock(dev); +} + +static inline void netdev_unlock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); +} + +static inline void netdev_ops_assert_locked(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + lockdep_assert_held(&dev->lock); +} + +static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) +{ + /* Only lower devices currently grab the instance lock, so no + * real ordering issues can occur. In the near future, only + * hardware devices will grab instance lock which also does not + * involve any ordering. Suppress lockdep ordering warnings + * until (if) we start grabbing instance lock on pure SW + * devices (bond/team/veth/etc). + */ + if (a == b) + return 0; + return -1; +} + +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + static struct lock_class_key dev_addr_list_lock_key; \ + static struct lock_class_key dev_instance_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + lockdep_set_class(&(dev)->addr_list_lock, \ + &dev_addr_list_lock_key); \ + lockdep_set_class(&(dev)->lock, \ + &dev_instance_lock_key); \ + lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + +#endif diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 4f707cfe7f10..42ae8d595c2c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /* Protects offdevs, members of bpf_offload_netdev and offload members diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ee3283400716..770a4dcf7f63 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "vlan.h" #include "vlanproc.h" diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 50cfec8ccac4..1298c8685bad 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9d8c72ed01ab..a818fdc22da9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -16,6 +16,8 @@ #include #include +#include + #include "br_private.h" #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ diff --git a/net/core/dev.c b/net/core/dev.c index a0f75a1d1f5a..1cb134ff7327 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -156,6 +156,7 @@ #include #include #include +#include #include #include #include diff --git a/net/core/dev.h b/net/core/dev.h index b50ca645c086..0ddd3631acb0 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -6,6 +6,7 @@ #include #include #include +#include struct net; struct netlink_ext_ack; diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 655a95fb7baa..1f0e24849bc6 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later + #include +#include #include "dev.h" diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 296e52d1395d..5471cf4fc984 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "dev.h" diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 02d1d40b47ae..529a0f721268 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 88a352b02bce..90597bf84e3d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) #include diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c index f21bb2551bed..4ae255cfb23f 100644 --- a/net/dsa/conduit.c +++ b/net/dsa/conduit.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "conduit.h" #include "dsa.h" diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index ddcba073321f..0364b8fb577b 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -2,6 +2,7 @@ #include #include +#include #include "netlink.h" #include "common.h" diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 946830af3e7c..df5f344209c4 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -2,6 +2,7 @@ #include #include +#include #include "common.h" #include "module_fw.h" diff --git a/net/ethtool/features.c b/net/ethtool/features.c index ccffd64d5a87..f2217983be2b 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" #include "bitset.h" diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 496a2774100c..221639407c72 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "common.h" diff --git a/net/ethtool/module.c b/net/ethtool/module.c index d3d2e135e45e..4d4e0a82579a 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "netlink.h" #include "common.h" diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 70834947f474..a163d40c6431 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 1a6b725d1f14..1f590e8d75ed 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -9,6 +9,7 @@ #include #include #include +#include struct phy_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index ec41d1d7eefe..6d9b1769896b 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 73b6a89b8731..32204cca24da 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "netlink.h" #include "common.h" diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 9a9da74b0a4f..018929563c6b 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -50,6 +50,7 @@ #include #include +#include #include "6lowpan_i.h" diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4b06dc7e04f2..1024f961ec9a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c6ebb6a6d390..957ca98fa70f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 170a6ac30889..a04dd1bb4b19 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 83c055996fbb..09ec4b0ad7dc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #define IP6_VTI_HASH_SIZE_SHIFT 5 diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6f04703fe638..9a0f32acb750 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -51,6 +51,7 @@ #include #include #include +#include #include /* diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index e83691073496..cf0b66f4fb29 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f5101c2ffc66..abace7665cfe 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f864e5d70b40..e5d104ce7b82 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 0e6ca568fdee..14716ad3d7bc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include -- cgit v1.2.3 From 8033d2aef51722fe74068b52553625ed91ea256c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 12 Mar 2025 12:05:12 -0700 Subject: Revert "net: replace dev_addr_sem with netdev instance lock" This reverts commit df43d8bf10316a7c3b1e47e3cc0057a54df4a5b8. Cc: Kohei Enju Reviewed-by: Kuniyuki Iwashima Fixes: df43d8bf1031 ("net: replace dev_addr_sem with netdev instance lock") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250312190513.1252045-2-sdf@fomichev.me Tested-by: Lei Yang Signed-off-by: Paolo Abeni --- drivers/net/tap.c | 2 +- drivers/net/tun.c | 2 +- include/linux/netdevice.h | 6 +++++- net/core/dev.c | 52 +++++++++++++++++++++++------------------------ net/core/dev.h | 3 ++- net/core/dev_api.c | 17 ++++++++++++++-- net/core/dev_ioctl.c | 2 +- net/core/net-sysfs.c | 7 +++++-- net/core/rtnetlink.c | 6 +----- 9 files changed, 56 insertions(+), 41 deletions(-) (limited to 'net/core/dev_ioctl.c') diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 4382f5e323b0..d4ece538f1b2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1017,7 +1017,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address(tap->dev, &sa, NULL); + ret = dev_set_mac_address_user(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 55b60cb68d00..f75f912a0225 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3193,7 +3193,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67527243459b..0db9fc0afe36 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2498,7 +2498,7 @@ struct net_device { * * Protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, - * @net_shaper_hierarchy, @reg_state, @threaded, @dev_addr + * @net_shaper_hierarchy, @reg_state, @threaded * * Partially protects (writers must hold both @lock and rtnl_lock): * @up @@ -4196,6 +4196,10 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); +int netif_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); diff --git a/net/core/dev.c b/net/core/dev.c index 6fa6ed5b5798..977a9946d39c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1059,28 +1059,6 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) return __netdev_put_lock(dev); } -/** - * netdev_get_by_name_lock() - find a device by its name - * @net: the applicable net namespace - * @name: name of device - * - * Search for an interface by name. If a valid device - * with @name is found it will be returned with netdev->lock held. - * netdev_unlock() must be called to release it. - * - * Return: pointer to a device with lock held, NULL if not found. - */ -struct net_device *netdev_get_by_name_lock(struct net *net, const char *name) -{ - struct net_device *dev; - - dev = dev_get_by_name(net, name); - if (!dev) - return NULL; - - return __netdev_put_lock(dev); -} - struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) @@ -9597,24 +9575,44 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, return 0; } +DECLARE_RWSEM(dev_addr_sem); + +int netif_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + down_write(&dev_addr_sem); + ret = netif_set_mac_address(dev, sa, extack); + up_write(&dev_addr_sem); + return ret; +} + int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); struct net_device *dev; + int ret = 0; - dev = netdev_get_by_name_lock(net, dev_name); - if (!dev) - return -ENODEV; + down_read(&dev_addr_sem); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, dev_name); + if (!dev) { + ret = -ENODEV; + goto unlock; + } if (!dev->addr_len) memset(sa->sa_data, 0, size); else memcpy(sa->sa_data, dev->dev_addr, min_t(size_t, size, dev->addr_len)); sa->sa_family = dev->type; - netdev_unlock(dev); - return 0; +unlock: + rcu_read_unlock(); + up_read(&dev_addr_sem); + return ret; } EXPORT_SYMBOL(dev_get_mac_address); diff --git a/net/core/dev.h b/net/core/dev.h index 0ddd3631acb0..7ee203395d8e 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -29,7 +29,6 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); -struct net_device *netdev_get_by_name_lock(struct net *net, const char *name); struct net_device *__netdev_put_lock(struct net_device *dev); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, @@ -71,6 +70,8 @@ extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; +extern struct rw_semaphore dev_addr_sem; + /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 1f0e24849bc6..2e17548af685 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -84,6 +84,19 @@ void dev_set_group(struct net_device *dev, int new_group) netdev_unlock_ops(dev); } +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_mac_address_user(dev, sa, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address_user); + /** * dev_change_net_namespace() - move device to different nethost namespace * @dev: device @@ -299,9 +312,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, { int ret; - netdev_lock(dev); + netdev_lock_ops(dev); ret = netif_set_mac_address(dev, sa, extack); - netdev_unlock(dev); + netdev_unlock_ops(dev); return ret; } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5471cf4fc984..eb8b41ec5523 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -575,7 +575,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 529a0f721268..abaa1c919b98 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -263,11 +263,14 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - netdev_lock(ndev); + down_read(&dev_addr_sem); + + rcu_read_lock(); if (dev_isalive(ndev)) ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); - netdev_unlock(ndev); + rcu_read_unlock(); + up_read(&dev_addr_sem); return ret; } static DEVICE_ATTR_RO(address); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 90597bf84e3d..9355058bf996 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3090,11 +3090,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - if (!netdev_need_ops_lock(dev)) - netdev_lock(dev); - err = netif_set_mac_address(dev, sa, extack); - if (!netdev_need_ops_lock(dev)) - netdev_unlock(dev); + err = netif_set_mac_address_user(dev, sa, extack); kfree(sa); if (err) goto errout; -- cgit v1.2.3 From ed3ba9b6e280e14cc3148c1b226ba453f02fa76c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 16 Mar 2025 12:28:37 -0700 Subject: net: Remove RTNL dance for SIOCBRADDIF and SIOCBRDELIF. SIOCBRDELIF is passed to dev_ioctl() first and later forwarded to br_ioctl_call(), which causes unnecessary RTNL dance and the splat below [0] under RTNL pressure. Let's say Thread A is trying to detach a device from a bridge and Thread B is trying to remove the bridge. In dev_ioctl(), Thread A bumps the bridge device's refcnt by netdev_hold() and releases RTNL because the following br_ioctl_call() also re-acquires RTNL. In the race window, Thread B could acquire RTNL and try to remove the bridge device. Then, rtnl_unlock() by Thread B will release RTNL and wait for netdev_put() by Thread A. Thread A, however, must hold RTNL after the unlock in dev_ifsioc(), which may take long under RTNL pressure, resulting in the splat by Thread B. Thread A (SIOCBRDELIF) Thread B (SIOCBRDELBR) ---------------------- ---------------------- sock_ioctl sock_ioctl `- sock_do_ioctl `- br_ioctl_call `- dev_ioctl `- br_ioctl_stub |- rtnl_lock | |- dev_ifsioc ' ' |- dev = __dev_get_by_name(...) |- netdev_hold(dev, ...) . / |- rtnl_unlock ------. | | |- br_ioctl_call `---> |- rtnl_lock Race | | `- br_ioctl_stub |- br_del_bridge Window | | | |- dev = __dev_get_by_name(...) | | | May take long | `- br_dev_delete(dev, ...) | | | under RTNL pressure | `- unregister_netdevice_queue(dev, ...) | | | | `- rtnl_unlock \ | |- rtnl_lock <-' `- netdev_run_todo | |- ... `- netdev_run_todo | `- rtnl_unlock |- __rtnl_unlock | |- netdev_wait_allrefs_any |- netdev_put(dev, ...) <----------------' Wait refcnt decrement and log splat below To avoid blocking SIOCBRDELBR unnecessarily, let's not call dev_ioctl() for SIOCBRADDIF and SIOCBRDELIF. In the dev_ioctl() path, we do the following: 1. Copy struct ifreq by get_user_ifreq in sock_do_ioctl() 2. Check CAP_NET_ADMIN in dev_ioctl() 3. Call dev_load() in dev_ioctl() 4. Fetch the master dev from ifr.ifr_name in dev_ifsioc() 3. can be done by request_module() in br_ioctl_call(), so we move 1., 2., and 4. to br_ioctl_stub(). Note that 2. is also checked later in add_del_if(), but it's better performed before RTNL. SIOCBRADDIF and SIOCBRDELIF have been processed in dev_ioctl() since the pre-git era, and there seems to be no specific reason to process them there. [0]: unregister_netdevice: waiting for wpan3 to become free. Usage count = 2 ref_tracker: wpan3@ffff8880662d8608 has 1/1 users at __netdev_tracker_alloc include/linux/netdevice.h:4282 [inline] netdev_hold include/linux/netdevice.h:4311 [inline] dev_ifsioc+0xc6a/0x1160 net/core/dev_ioctl.c:624 dev_ioctl+0x255/0x10c0 net/core/dev_ioctl.c:826 sock_do_ioctl+0x1ca/0x260 net/socket.c:1213 sock_ioctl+0x23a/0x6c0 net/socket.c:1318 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl fs/ioctl.c:892 [inline] __x64_sys_ioctl+0x1a4/0x210 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcb/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 893b19587534 ("net: bridge: fix ioctl locking") Reported-by: syzkaller Reported-by: yan kang Reported-by: yue sun Closes: https://lore.kernel.org/netdev/SY8P300MB0421225D54EB92762AE8F0F2A1D32@SY8P300MB0421.AUSP300.PROD.OUTLOOK.COM/ Signed-off-by: Kuniyuki Iwashima Acked-by: Stanislav Fomichev Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250316192851.19781-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/linux/if_bridge.h | 6 ++---- net/bridge/br_ioctl.c | 36 +++++++++++++++++++++++++++++++++--- net/bridge/br_private.h | 3 +-- net/core/dev_ioctl.c | 19 ------------------- net/socket.c | 19 +++++++++---------- 5 files changed, 45 insertions(+), 38 deletions(-) (limited to 'net/core/dev_ioctl.c') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3ff96ae31bf6..c5fe3b2a53e8 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -65,11 +65,9 @@ struct br_ip_list { #define BR_DEFAULT_AGEING_TIME (300 * HZ) struct net_bridge; -void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, void __user *uarg)); -int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg); +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg); #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index f213ed108361..6bc0a11f2ed3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -394,10 +394,26 @@ static int old_deviceless(struct net *net, void __user *data) return -EOPNOTSUPP; } -int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg) +int br_ioctl_stub(struct net *net, unsigned int cmd, void __user *uarg) { int ret = -EOPNOTSUPP; + struct ifreq ifr; + + if (cmd == SIOCBRADDIF || cmd == SIOCBRDELIF) { + void __user *data; + char *colon; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (get_user_ifreq(&ifr, &data, uarg)) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ - 1] = 0; + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; + } rtnl_lock(); @@ -430,7 +446,21 @@ int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, break; case SIOCBRADDIF: case SIOCBRDELIF: - ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); + { + struct net_device *dev; + + dev = __dev_get_by_name(net, ifr.ifr_name); + if (!dev || !netif_device_present(dev)) { + ret = -ENODEV; + break; + } + if (!netif_is_bridge_master(dev)) { + ret = -EOPNOTSUPP; + break; + } + + ret = add_del_if(netdev_priv(dev), ifr.ifr_ifindex, cmd == SIOCBRADDIF); + } break; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1054b8a88edc..d5b3c5936a79 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -949,8 +949,7 @@ br_port_get_check_rtnl(const struct net_device *dev) /* br_ioctl.c */ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd); -int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg); +int br_ioctl_stub(struct net *net, unsigned int cmd, void __user *uarg); /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4c2098ac9d72..57f79f8e8466 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -551,7 +551,6 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; - netdevice_tracker dev_tracker; if (!dev) return -ENODEV; @@ -614,22 +613,6 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCWANDEV: return dev_siocwandev(dev, &ifr->ifr_settings); - case SIOCBRADDIF: - case SIOCBRDELIF: - if (!netif_device_present(dev)) - return -ENODEV; - if (!netif_is_bridge_master(dev)) - return -EOPNOTSUPP; - - netdev_hold(dev, &dev_tracker, GFP_KERNEL); - rtnl_net_unlock(net); - - err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - - netdev_put(dev, &dev_tracker); - rtnl_net_lock(net); - return err; - case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: return dev_siocdevprivate(dev, ifr, data, cmd); @@ -812,8 +795,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCBRADDIF: - case SIOCBRDELIF: case SIOCSHWTSTAMP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; diff --git a/net/socket.c b/net/socket.c index 28bae5a94234..38227d00d198 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1145,12 +1145,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +static int (*br_ioctl_hook)(struct net *net, unsigned int cmd, void __user *uarg); -void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, void __user *uarg)) { mutex_lock(&br_ioctl_mutex); @@ -1159,8 +1157,7 @@ void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, } EXPORT_SYMBOL(brioctl_set); -int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg) +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg) { int err = -ENOPKG; @@ -1169,7 +1166,7 @@ int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(net, br, cmd, ifr, uarg); + err = br_ioctl_hook(net, cmd, uarg); mutex_unlock(&br_ioctl_mutex); return err; @@ -1269,7 +1266,9 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: - err = br_ioctl_call(net, NULL, cmd, NULL, argp); + case SIOCBRADDIF: + case SIOCBRDELIF: + err = br_ioctl_call(net, cmd, argp); break; case SIOCGIFVLAN: case SIOCSIFVLAN: @@ -3429,6 +3428,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGPGRP: case SIOCBRADDBR: case SIOCBRDELBR: + case SIOCBRADDIF: + case SIOCBRDELIF: case SIOCGIFVLAN: case SIOCSIFVLAN: case SIOCGSKNS: @@ -3468,8 +3469,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFPFLAGS: case SIOCGIFTXQLEN: case SIOCSIFTXQLEN: - case SIOCBRADDIF: - case SIOCBRDELIF: case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: -- cgit v1.2.3