aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-08-29 19:36:34 -0700
committerJakub Kicinski <kuba@kernel.org>2025-08-29 19:36:34 -0700
commit864ecc4a6dade82d3f70eab43dad0e277aa6fc78 (patch)
tree2c8e7f3348c118943fec8933dbd1462224555985
parentMerge branch 'inet_diag-make-dumps-faster-with-simple-filters' (diff)
parentipv4: start using dst_dev_rcu() (diff)
downloadlinux-864ecc4a6dade82d3f70eab43dad0e277aa6fc78.tar.gz
linux-864ecc4a6dade82d3f70eab43dad0e277aa6fc78.zip
Merge branch 'net-add-rcu-safety-to-dst-dev'
Eric Dumazet says: ==================== net: add rcu safety to dst->dev Followup of commit 88fe14253e18 ("net: dst: add four helpers to annotate data-races around dst->dev"). Use lockdep enabled helpers to convert our unsafe dst->dev uses one at a time. More to come... ==================== Link: https://patch.msgid.link/20250828195823.3958522-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/dst.h16
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/route.h2
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/sock.c16
-rw-r--r--net/ipv4/icmp.c6
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp_fastopen.c7
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ip6_output.c64
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/output_core.c8
-rw-r--r--net/ipv6/route.c7
19 files changed, 99 insertions, 75 deletions
diff --git a/include/net/dst.h b/include/net/dst.h
index bab01363bb97..f8aa1239b4db 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -24,7 +24,10 @@
struct sk_buff;
struct dst_entry {
- struct net_device *dev;
+ union {
+ struct net_device *dev;
+ struct net_device __rcu *dev_rcu;
+ };
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
@@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst)
static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
{
- /* In the future, use rcu_dereference(dst->dev) */
- WARN_ON_ONCE(!rcu_read_lock_held());
- return READ_ONCE(dst->dev);
+ return rcu_dereference(dst->dev_rcu);
+}
+
+static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst)
+{
+ return dev_net_rcu(dst_dev_rcu(dst));
}
static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
@@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
{
- return dev_net_rcu(skb_dst_dev(skb));
+ return dev_net_rcu(skb_dst_dev_rcu(skb));
}
struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
diff --git a/include/net/ip.h b/include/net/ip.h
index befcba575129..6dbd2bf8fa9c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -467,12 +467,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
const struct rtable *rt = dst_rtable(dst);
+ const struct net_device *dev;
unsigned int mtu, res;
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ dev = dst_dev_rcu(dst);
+ net = dev_net_rcu(dev);
if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
@@ -486,7 +488,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
if (mtu)
goto out;
- mtu = READ_ONCE(dst_dev(dst)->mtu);
+ mtu = READ_ONCE(dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
if (rt->rt_uses_gateway && mtu > 576)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9255f21818ee..59f48ca3abdf 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
mtu = IPV6_MIN_MTU;
rcu_read_lock();
- idev = __in6_dev_get(dst_dev(dst));
+ idev = __in6_dev_get(dst_dev_rcu(dst));
if (idev)
mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
diff --git a/include/net/route.h b/include/net/route.h
index c71998f464f8..f90106f383c5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
const struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
rcu_read_unlock();
}
diff --git a/net/core/dst.c b/net/core/dst.c
index e2de8b68c41d..e9d35f49c9e7 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->ops->ifdown(dst, dev);
WRITE_ONCE(dst->input, dst_discard);
WRITE_ONCE(dst->output, dst_discard_out);
- WRITE_ONCE(dst->dev, blackhole_netdev);
+ rcu_assign_pointer(dst->dev_rcu, blackhole_netdev);
netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
GFP_ATOMIC);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index e66ad1ec3a2d..9a8290fcc35d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2587,7 +2587,7 @@ free:
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
-static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
+static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev)
{
bool is_ipv6 = false;
u32 max_size;
@@ -2597,8 +2597,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
- max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) :
- READ_ONCE(dst_dev(dst)->gso_ipv4_max_size);
+ max_size = is_ipv6 ? READ_ONCE(dev->gso_max_size) :
+ READ_ONCE(dev->gso_ipv4_max_size);
if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
max_size = GSO_LEGACY_MAX_SIZE;
@@ -2607,9 +2607,12 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
+ const struct net_device *dev;
u32 max_segs = 1;
- sk->sk_route_caps = dst_dev(dst)->features;
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
+ sk->sk_route_caps = dev->features;
if (sk_is_tcp(sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2625,13 +2628,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
- sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
+ sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dev);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
- max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1);
+ max_segs = max_t(u32, READ_ONCE(dev->gso_max_segs), 1);
}
}
sk->sk_gso_max_segs = max_segs;
sk_dst_set(sk, dst);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7248c15cbd75..823c70e34de8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -319,17 +319,17 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
return true;
/* No rate limit on loopback */
- dev = dst_dev(dst);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
if (dev && (dev->flags & IFF_LOOPBACK))
goto out;
- rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
l3mdev_master_ifindex_rcu(dev));
rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
- rcu_read_unlock();
out:
+ rcu_read_unlock();
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
else
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b2584cce90ae..f7012479713b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -476,14 +476,16 @@ out_fail:
/* Process an incoming IP datagram fragment. */
int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
- struct net_device *dev = skb->dev ? : skb_dst_dev(skb);
- int vif = l3mdev_master_ifindex_rcu(dev);
+ struct net_device *dev;
struct ipq *qp;
+ int vif;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
/* Lookup (or create) queue header */
rcu_read_lock();
+ dev = skb->dev ? : skb_dst_dev_rcu(skb);
+ vif = l3mdev_master_ifindex_rcu(dev);
qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) {
int ret, refs = 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 345e5faac634..ca9eaee4c2ef 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1905,7 +1905,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt,
return -1;
}
- encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+ encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len;
if (skb_cow(skb, encap)) {
ip_rt_put(rt);
@@ -1958,7 +1958,7 @@ static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt,
* result in receiving multiple packets.
*/
NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, rt->dst.dev,
+ net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst),
ipmr_forward_finish);
return;
@@ -2302,7 +2302,7 @@ int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
guard(rcu)();
- dev = rt->dst.dev;
+ dev = dst_dev_rcu(&rt->dst);
if (IPCB(skb)->flags & IPSKB_FORWARDED)
goto mc_output;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cc86a917a1bb..50309f2ab132 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -414,11 +414,11 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev;
struct neighbour *n;
rcu_read_lock();
-
+ dev = dst_dev_rcu(dst);
if (likely(rt->rt_gw_family == AF_INET)) {
n = ip_neigh_gw4(dev, rt->rt_gw4);
} else if (rt->rt_gw_family == AF_INET6) {
@@ -1027,7 +1027,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
return;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
if (mtu < net->ipv4.ip_rt_min_pmtu) {
lock = true;
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
@@ -1327,7 +1327,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
net->ipv4.ip_rt_min_advmss);
rcu_read_unlock();
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f1884f0c9e52..7d945a527daf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -576,11 +576,12 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
}
} else if (tp->syn_fastopen_ch &&
atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
- dst = sk_dst_get(sk);
- dev = dst ? dst_dev(dst) : NULL;
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ dev = dst ? dst_dev_rcu(dst) : NULL;
if (!(dev && (dev->flags & IFF_LOOPBACK)))
atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
- dst_release(dst);
+ rcu_read_unlock();
}
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03c068ea27b6..10e86f1008e9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct net *net;
spin_lock_bh(&tcp_metrics_lock);
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
/* While waiting for the spin-lock the cache might have been populated
* with this entry and so we have to check again.
@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return NULL;
}
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
else
return NULL;
- net = dev_net_rcu(dst_dev(dst));
+ net = dst_dev_net_rcu(dst);
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f8a8e46286b8..52599584422b 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -104,7 +104,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rcu_read_lock();
rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
- dev = dst_dev(&rt->dst);
+ dev = dst_dev_rcu(&rt->dst);
netdev_hold(dev, &dev_tracker, GFP_ATOMIC);
ip6_rt_put(rt);
} else if (ishost) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 44550957fd4e..95cdd4cacb00 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -209,7 +209,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
* this lookup should be more aggressive (not longer than timeout).
*/
dst = ip6_route_output(net, sk, fl6);
- dev = dst_dev(dst);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
@@ -224,11 +225,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- rcu_read_unlock();
}
+ rcu_read_unlock();
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e1410237b6e..9d64c13bab5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,7 +60,7 @@
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev = dst_dev_rcu(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *daddr, *nexthop;
@@ -70,15 +70,12 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
/* Be paranoid, rather than too clever. */
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
- /* Make sure idev stays alive */
- rcu_read_lock();
+ /* idev stays alive because we hold rcu_read_lock(). */
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- rcu_read_unlock();
return -ENOMEM;
}
- rcu_read_unlock();
}
hdr = ipv6_hdr(skb);
@@ -123,7 +120,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- rcu_read_lock();
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
@@ -131,7 +127,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
- rcu_read_unlock();
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
@@ -139,7 +134,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
- rcu_read_unlock();
return ret;
}
@@ -233,22 +227,29 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst), *indev = skb->dev;
- struct inet6_dev *idev = ip6_dst_idev(dst);
+ struct net_device *dev, *indev = skb->dev;
+ struct inet6_dev *idev;
+ int ret;
skb->protocol = htons(ETH_P_IPV6);
+ rcu_read_lock();
+ dev = dst_dev_rcu(dst);
+ idev = ip6_dst_idev(dst);
skb->dev = dev;
if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, indev, dev,
- ip6_finish_output,
- !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, indev, dev,
+ ip6_finish_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ip6_output);
@@ -268,35 +269,36 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
- struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst_dev(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
struct hop_jumbo_hdr *hop_jumbo;
int hoplen = sizeof(*hop_jumbo);
+ struct net *net = sock_net(sk);
unsigned int head_room;
+ struct net_device *dev;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
- int hlimit = -1;
+ int ret, hlimit = -1;
u32 mtu;
+ rcu_read_lock();
+
+ dev = dst_dev_rcu(dst);
head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(head_room > skb_headroom(skb))) {
- /* Make sure idev stays alive */
- rcu_read_lock();
+ /* idev stays alive while we hold rcu_read_lock(). */
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- rcu_read_unlock();
- return -ENOBUFS;
+ ret = -ENOBUFS;
+ goto unlock;
}
- rcu_read_unlock();
}
if (opt) {
@@ -358,17 +360,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* skb to its handler for processing
*/
skb = l3mdev_ip6_out((struct sock *)sk, skb);
- if (unlikely(!skb))
- return 0;
+ if (unlikely(!skb)) {
+ ret = 0;
+ goto unlock;
+ }
/* hooks should never assume socket lock is held.
* we promote our socket to non const
*/
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dev,
- dst_output);
+ ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, (struct sock *)sk, skb, NULL, dev,
+ dst_output);
+ goto unlock;
}
+ ret = -EMSGSIZE;
skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
@@ -377,7 +383,9 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
- return -EMSGSIZE;
+unlock:
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ip6_xmit);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 55c49dc14b1b..016b572e7d6f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -180,7 +180,7 @@ static struct net_device *ip6_mc_find_dev(struct net *net,
rcu_read_lock();
rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
- dev = dst_dev(&rt->dst);
+ dev = dst_dev_rcu(&rt->dst);
dev_hold(dev);
ip6_rt_put(rt);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 57aaa7ae8ac3..f427e41e9c49 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -505,7 +505,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- dev = dst_dev(dst);
+ dev = dst_dev_rcu(dst);
idev = __in6_dev_get(dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index d21fe27fe21e..1c9b283a4132 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -104,18 +104,20 @@ EXPORT_SYMBOL(ip6_find_1stfragopt);
int ip6_dst_hoplimit(struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+
+ rcu_read_lock();
if (hoplimit == 0) {
- struct net_device *dev = dst_dev(dst);
+ struct net_device *dev = dst_dev_rcu(dst);
struct inet6_dev *idev;
- rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev)
hoplimit = READ_ONCE(idev->cnf.hop_limit);
else
hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit);
- rcu_read_unlock();
}
+ rcu_read_unlock();
+
return hoplimit;
}
EXPORT_SYMBOL(ip6_dst_hoplimit);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3299cfa12e21..3371f16b7a3e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst_dev(dst),
+ .dev = dst_dev_rcu(dst),
.gw = &rt6->rt6i_gateway,
};
@@ -3238,7 +3238,6 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect);
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
- struct net_device *dev = dst_dev(dst);
unsigned int mtu = dst_mtu(dst);
struct net *net;
@@ -3246,7 +3245,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
rcu_read_lock();
- net = dev_net_rcu(dev);
+ net = dst_dev_net_rcu(dst);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
@@ -4301,7 +4300,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst_dev(dst),
+ .dev = dst_dev_rcu(dst),
.gw = &rt->rt6i_gateway,
};