From a82dc19db13649aa4232ce37cb6f4ceff851e2fe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:48 -0700 Subject: net: avoid potential race between netdev_get_by_index_lock() and netns switch netdev_get_by_index_lock() performs following steps: rcu_lock(); dev = lookup(netns, ifindex); dev_get(dev); rcu_unlock(); [... lock & validate the dev ...] return dev Validation right now only checks if the device is registered but since the lookup is netns-aware we must also protect against the device switching netns right after we dropped the RCU lock. Otherwise the caller in netns1 may get a pointer to a device which has just switched to netns2. We can't hold the lock for the entire netns change process (because of the NETDEV_UNREGISTER notifier), and there's no existing marking to indicate that the netns is unlisted because of netns move, so add one. AFAIU none of the existing netdev_get_by_index_lock() callers can suffer from this problem (NAPI code double checks the netns membership and other callers are either under rtnl_lock or not ns-sensitive), so this patch does not have to be treated as a fix. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4ccc6dc5303e..8c13e5339cc9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id) dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); if (!dev) return NULL; @@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) * This helper is intended for locking net_device after it has been looked up * using a lockless lookup helper. Lock prevents the instance from going away. */ -struct net_device *__netdev_put_lock(struct net_device *dev) +struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) { netdev_lock(dev); - if (dev->reg_state > NETREG_REGISTERED) { + if (dev->reg_state > NETREG_REGISTERED || + dev->moving_ns || !net_eq(dev_net(dev), net)) { netdev_unlock(dev); dev_put(dev); return NULL; @@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) if (!dev) return NULL; - return __netdev_put_lock(dev); + return __netdev_put_lock(dev, net); } struct net_device * @@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev, dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); if (dev) return dev; @@ -12157,7 +12158,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); - netdev_unlock_ops(dev); + + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + dev->moving_ns = true; + netdev_unlock(dev); synchronize_net(); @@ -12193,7 +12198,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, move_netdevice_notifiers_dev_net(dev, net); /* Actually switch the network namespace */ + netdev_lock(dev); dev_net_set(dev, net); + netdev_unlock(dev); dev->ifindex = new_ifindex; if (new_name[0]) { @@ -12219,7 +12226,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); - netdev_lock_ops(dev); + netdev_lock(dev); + dev->moving_ns = false; + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); + /* Add the device back in the hashes */ list_netdevice(dev); /* Notify protocols, that a new device appeared. */ -- cgit v1.2.3