From ae3fb6d32140e5c5b491892105ca89066171d217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:04 +0200 Subject: xfrm: state: use hlist_for_each_entry_rcu helper This is required once we allow lockless access of bydst/bysrc hash tables. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c56d8c..904ab4d4ac05 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -76,18 +76,18 @@ static void xfrm_hash_transfer(struct hlist_head *list, h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family, nhashmask); - hlist_add_head(&x->bydst, ndsttable+h); + hlist_add_head_rcu(&x->bydst, ndsttable + h); h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family, nhashmask); - hlist_add_head(&x->bysrc, nsrctable+h); + hlist_add_head_rcu(&x->bysrc, nsrctable + h); if (x->id.spi) { h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family, nhashmask); - hlist_add_head(&x->byspi, nspitable+h); + hlist_add_head_rcu(&x->byspi, nspitable + h); } } } @@ -520,10 +520,10 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del(&x->bydst); - hlist_del(&x->bysrc); + hlist_del_rcu(&x->bydst); + hlist_del_rcu(&x->bysrc); if (x->id.spi) - hlist_del(&x->byspi); + hlist_del_rcu(&x->byspi); net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); @@ -659,7 +659,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -683,7 +683,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -781,7 +781,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -797,7 +797,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -852,12 +852,12 @@ found: if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, encap_family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); @@ -945,16 +945,16 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); @@ -1063,9 +1063,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); net->xfrm.state_num++; @@ -1581,7 +1581,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (x->id.spi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; -- cgit v1.2.3 From 02efdff7e209859c2755ebe93b3bd0e3d40123ab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:05 +0200 Subject: xfrm: state: use atomic_inc_not_zero to increment refcount Once xfrm_state_lookup_byaddr no longer acquires the state lock another cpu might be freeing the state entry at the same time. To detect this we use atomic_inc_not_zero, we then signal -EAGAIN to caller in case our result was stale. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 904ab4d4ac05..84c1db6254d5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -37,6 +37,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +{ + return atomic_inc_not_zero(&x->refcnt); +} + static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -668,7 +673,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -692,7 +698,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -871,10 +878,14 @@ found: } } out: - if (x) - xfrm_state_hold(x); - else + if (x) { + if (!xfrm_state_hold_rcu(x)) { + *err = -EAGAIN; + x = NULL; + } + } else { *err = acquire_in_progress ? -EAGAIN : error; + } spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); -- cgit v1.2.3 From df7274eb70b7c8488170ebe8757dd94647a8e1e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:06 +0200 Subject: xfrm: state: delay freeing until rcu grace period has elapsed The hash table backend memory and the state structs are free'd via kfree/vfree. Once we only rely on rcu during lookups we have to make sure no other cpu is currently accessing this before doing the free. Free operations already happen from worker so we can use synchronize_rcu to wait until concurrent readers are done. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 84c1db6254d5..8e373876924f 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -146,6 +146,9 @@ static void xfrm_hash_resize(struct work_struct *work) spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); + + synchronize_rcu(); + xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); @@ -369,6 +372,8 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); + synchronize_rcu(); + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); } -- cgit v1.2.3 From b65e3d7be06fd8ff5236439254f338fe1a8d4bbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:07 +0200 Subject: xfrm: state: add sequence count to detect hash resizes Once xfrm_state_find is lockless we have to cope with a concurrent resize opertion. We use a sequence counter to block in case a resize is in progress and to detect if we might have missed a state that got moved to a new hash table. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e373876924f..ac4037cf6a29 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -36,6 +36,7 @@ */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { @@ -127,6 +128,7 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); + write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -143,6 +145,7 @@ static void xfrm_hash_resize(struct work_struct *work) net->xfrm.state_byspi = nspi; net->xfrm.state_hmask = nhashmask; + write_seqcount_end(&xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); @@ -787,10 +790,13 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + unsigned int sequence; struct km_event c; to_put = NULL; + sequence = read_seqcount_begin(&xfrm_state_hash_generation); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { @@ -894,6 +900,15 @@ out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); + + if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + *err = -EAGAIN; + if (x) { + xfrm_state_put(x); + x = NULL; + } + } + return x; } -- cgit v1.2.3 From c8406998b80183ef87895ab1de4dbed8bb2d53a0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:08 +0200 Subject: xfrm: state: use rcu_deref and assign_pointer helpers Before xfrm_state_find() can use rcu_read_lock instead of xfrm_state_lock we need to switch users of the hash table to assign/obtain the pointers with the appropriate rcu helpers. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ac4037cf6a29..53e7867f9254 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -28,6 +28,9 @@ #include "xfrm_hash.h" +#define xfrm_state_deref_prot(table, net) \ + rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -131,18 +134,17 @@ static void xfrm_hash_resize(struct work_struct *work) write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, - nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); - odst = net->xfrm.state_bydst; - osrc = net->xfrm.state_bysrc; - ospi = net->xfrm.state_byspi; + osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); + ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); ohashmask = net->xfrm.state_hmask; - net->xfrm.state_bydst = ndst; - net->xfrm.state_bysrc = nsrc; - net->xfrm.state_byspi = nspi; + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; write_seqcount_end(&xfrm_state_hash_generation); -- cgit v1.2.3 From d737a5805581c6f99dad4caa9fdf80965d617d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:09 +0200 Subject: xfrm: state: don't use lock anymore unless acquire operation is needed push the lock down, after earlier patches we can rely on rcu to make sure state struct won't go away. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 6 +++--- net/xfrm/xfrm_state.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 24cd3949a9a4..1ab51d188408 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,9 +38,9 @@ struct netns_xfrm { * mode. Also, it can be used by ah/esp icmp error handler to find * offending SA. */ - struct hlist_head *state_bydst; - struct hlist_head *state_bysrc; - struct hlist_head *state_byspi; + struct hlist_head __rcu *state_bydst; + struct hlist_head __rcu *state_bysrc; + struct hlist_head __rcu *state_byspi; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 53e7867f9254..1a15b658a79e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -799,7 +799,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, sequence = read_seqcount_begin(&xfrm_state_hash_generation); - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && @@ -870,6 +870,7 @@ found: } if (km_query(x, tmpl, pol) == 0) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); @@ -883,6 +884,7 @@ found: tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -899,7 +901,7 @@ out: } else { *err = acquire_in_progress ? -EAGAIN : error; } - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); -- cgit v1.2.3 From 35db57bbc4b7ab810bba6e6d6954a0faf5a842cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 23 Aug 2016 16:00:12 +0200 Subject: xfrm: state: remove per-netns gc task After commit 5b8ef3415a21f173 ("xfrm: Remove ancient sleeping when the SA is in acquire state") gc does not need any per-netns data anymore. As far as gc is concerned all state structs are the same, so we can use a global work struct for it. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 -- net/xfrm/xfrm_state.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 177ed444d7b2..27bb9633c69d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -44,8 +44,6 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; - struct hlist_head state_gc_list; - struct work_struct state_gc_work; struct list_head policy_all; struct hlist_head *policy_byidx; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1a15b658a79e..ba8bf518ba14 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -31,6 +31,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +static void xfrm_state_gc_task(struct work_struct *work); + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -41,6 +43,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); +static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +static HLIST_HEAD(xfrm_state_gc_list); + static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { return atomic_inc_not_zero(&x->refcnt); @@ -368,13 +373,12 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *work) { - struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&net->xfrm.state_gc_list, &gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); synchronize_rcu(); @@ -515,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - struct net *net = xs_net(x); - WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&net->xfrm.state_gc_work); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -2134,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; @@ -2153,7 +2153,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, IPSEC_PROTO_ANY, false); - flush_work(&net->xfrm.state_gc_work); + flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); -- cgit v1.2.3 From b588479358ce26f32138e0f0a7ab0678f8e3e601 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 18 Sep 2016 07:42:53 +0000 Subject: xfrm: Fix memory leak of aead algorithm name commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") introduced aead. The function attach_aead kmemdup()s the algorithm name during xfrm_state_construct(). However this memory is never freed. Implementation has since been slightly modified in commit ee5c23176fcc ("xfrm: Clone states properly on migration") without resolving this leak. This patch adds a kfree() call for the aead algorithm name. Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") Signed-off-by: Ilan Tayari Acked-by: Rami Rosen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c56d8c..a30f898dc1c5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); + kfree(x->aead); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); -- cgit v1.2.3 From c2f672fc94642bae96821a393f342edcfa9794a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Sep 2016 15:45:26 +0200 Subject: xfrm: state lookup can be lockless This is called from the packet input path, we get lock contention if many cpus handle ipsec in parallel. After recent rcu conversion it is safe to call __xfrm_state_lookup without the spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ba8bf518ba14..a38fdead38ea 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1431,9 +1431,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup); -- cgit v1.2.3