aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dev_ioctl.c132
-rw-r--r--net/core/devlink.c1
-rw-r--r--net/core/filter.c303
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/net-sysfs.c25
-rw-r--r--net/core/net_namespace.c62
-rw-r--r--net/core/pktgen.c266
-rw-r--r--net/core/rtnetlink.c13
-rw-r--r--net/dsa/dsa2.c7
-rw-r--r--net/dsa/dsa_priv.h4
-rw-r--r--net/dsa/legacy.c4
-rw-r--r--net/dsa/port.c103
-rw-r--r--net/ipv4/af_inet.c28
-rw-r--r--net/ipv4/devinet.c57
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ip_gre.c38
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/ipconfig.c47
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/ip6_gre.c36
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/route.c146
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/kcm/kcmsock.c25
-rw-r--r--net/mac80211/debugfs_sta.c4
-rw-r--r--net/openvswitch/flow_netlink.c52
-rw-r--r--net/rds/tcp.c5
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/sched/act_csum.c66
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_bpf.c36
-rw-r--r--net/sched/cls_cgroup.c3
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c30
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_rsvp.h2
-rw-r--r--net/sched/cls_tcindex.c3
-rw-r--r--net/sched/cls_u32.c52
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/smc/af_smc.c207
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c40
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_close.c206
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_tx.c23
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c271
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/wireless/wext-core.c13
-rw-r--r--net/xfrm/xfrm_device.c12
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c18
83 files changed, 1552 insertions, 1136 deletions
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 80559fd11b7e..8e13a64d8c99 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -760,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
static inline bool br_multicast_is_router(struct net_bridge *br)
{
- return 0;
+ return false;
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a15c0e0d1fc7..a4399be54ff4 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -11,7 +11,7 @@ menuconfig CAN
1991, mainly for automotive, but now widely used in marine
(NMEA2000), industrial, and medical applications.
More information on the CAN network protocol family PF_CAN
- is contained in <Documentation/networking/can.txt>.
+ is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
specific driver for your controller(s) below.
diff --git a/net/core/dev.c b/net/core/dev.c
index 94435cd09072..4670ccabe23a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @dev: net_device pointer passed unmodified to notifier function
* @info: notifier information data
*
* Call all network notifier blocks. Parameters and return value
@@ -3167,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -6425,6 +6435,7 @@ rollback:
* netdev_upper_dev_link - Add a link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. The caller must hold
* the RTNL lock. On a failure a negative errno code is returned.
@@ -6446,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* @upper_dev: new upper device
* @upper_priv: upper device private
* @upper_info: upper info to be passed down via notifier
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7e690d0ccd05..0ab1af04296c 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -18,26 +18,10 @@
* match. --pb
*/
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq *ifr)
{
- struct ifreq ifr;
- int error;
-
- /*
- * Fetch the caller's info block.
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
- ifr.ifr_name[IFNAMSIZ-1] = 0;
-
- error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
- if (error)
- return error;
-
- if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+ return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
static gifconf_func_t *gifconf_list[NPROTO];
@@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf);
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(struct net *net, char __user *arg)
+int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
{
- struct ifconf ifc;
struct net_device *dev;
char __user *pos;
int len;
@@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg)
* Fetch the caller's info block.
*/
- if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
- return -EFAULT;
-
- pos = ifc.ifc_buf;
- len = ifc.ifc_len;
+ pos = ifc->ifc_buf;
+ len = ifc->ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
@@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg)
if (gifconf_list[i]) {
int done;
if (!pos)
- done = gifconf_list[i](dev, NULL, 0);
+ done = gifconf_list[i](dev, NULL, 0, size);
else
done = gifconf_list[i](dev, pos + total,
- len - total);
+ len - total, size);
if (done < 0)
return -EFAULT;
total += done;
@@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg)
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len = total;
+ ifc->ifc_len = total;
/*
* Both BSD and Solaris return 0 here, so we do too.
*/
- return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+ return 0;
}
/*
@@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
{
- struct ifreq ifr;
int ret;
char *colon;
- /* One special case: SIOCGIFCONF takes ifconf argument
- and requires shared lock, because it sleeps writing
- to user space.
- */
-
- if (cmd == SIOCGIFCONF) {
- rtnl_lock();
- ret = dev_ifconf(net, (char __user *) arg);
- rtnl_unlock();
- return ret;
- }
+ if (need_copyout)
+ *need_copyout = true;
if (cmd == SIOCGIFNAME)
- return dev_ifname(net, (struct ifreq __user *)arg);
-
- /*
- * Take care of Wireless Extensions. Unfortunately struct iwreq
- * isn't a proper subset of struct ifreq (it's 8 byte shorter)
- * so we need to treat it specially, otherwise applications may
- * fault if the struct they're passing happens to land at the
- * end of a mapped page.
- */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- struct iwreq iwr;
-
- if (copy_from_user(&iwr, arg, sizeof(iwr)))
- return -EFAULT;
-
- iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+ return dev_ifname(net, ifr);
- return wext_handle_ioctl(net, &iwr, cmd, arg);
- }
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
-
- ifr.ifr_name[IFNAMSIZ-1] = 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
+ dev_load(net, ifr->ifr_name);
+
/*
* See which interface the caller is talking about.
*/
@@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(net, ifr.ifr_name);
rcu_read_lock();
- ret = dev_ifsioc_locked(net, &ifr, cmd);
+ ret = dev_ifsioc_locked(net, ifr, cmd);
rcu_read_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
case SIOCETHTOOL:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, &ifr);
+ ret = dev_ethtool(net, ifr);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
+ if (need_copyout)
+ *need_copyout = false;
return ret;
case SIOCGIFMEM:
@@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret && copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
return ret;
}
return -ENOTTY;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 66d36705fb9d..18d385ed8237 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3182,6 +3182,7 @@ int devlink_resource_register(struct devlink *devlink,
resource_list = &parent_resource->resource_list;
resource->parent = parent_resource;
} else {
+ kfree(resource);
err = -EINVAL;
goto out;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 18da42a81d0c..08ab4c65a998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -401,8 +401,8 @@ do_pass:
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
/* All programs must keep CTX in callee saved BPF_REG_CTX.
* In eBPF case it's done by the compiler, here we need to
@@ -459,8 +459,15 @@ do_pass:
break;
if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
- fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+ /* Error with exception code on div/mod by 0.
+ * For cBPF programs, this was always return 0.
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
+ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *insn++ = BPF_EXIT_INSN();
+ }
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -3232,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ np->tclass = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+#endif
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
@@ -3241,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false, reinit);
+ ret = tcp_set_congestion_control(sk, name, false,
+ reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -3307,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ *((int *)optval) = (int)np->tclass;
+ break;
+ default:
+ goto err_clear;
+ }
+#endif
} else {
goto err_clear;
}
@@ -3328,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+ int, argval)
+{
+ struct sock *sk = bpf_sock->sk;
+ int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+#ifdef CONFIG_INET
+ if (val)
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+ return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+ return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+ .func = bpf_sock_ops_cb_flags_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3510,6 +3584,8 @@ static const struct bpf_func_proto *
return &bpf_setsockopt_proto;
case BPF_FUNC_getsockopt:
return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_ops_cb_flags_set:
+ return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3826,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static bool __is_valid_sock_ops_access(int off, int size)
+static bool sock_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (off < 0 || off >= sizeof(struct bpf_sock_ops))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
-
- return true;
-}
-static bool sock_ops_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
-{
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct bpf_sock_ops, op) ...
- offsetof(struct bpf_sock_ops, replylong[3]):
+ case offsetof(struct bpf_sock_ops, reply):
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ if (size != size_default)
+ return false;
break;
default:
return false;
}
+ } else {
+ switch (off) {
+ case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
+ bytes_acked):
+ if (size != sizeof(__u64))
+ return false;
+ break;
+ default:
+ if (size != size_default)
+ return false;
+ break;
+ }
}
- return __is_valid_sock_ops_access(off, size);
+ return true;
}
static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4470,10 +4556,37 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
is_fullsock));
break;
-/* Helper macro for adding read access to tcp_sock fields. */
-#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
+ case offsetof(struct bpf_sock_ops, state):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_state));
+ break;
+
+ case offsetof(struct bpf_sock_ops, rtt_min):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ sizeof(struct minmax));
+ BUILD_BUG_ON(sizeof(struct minmax) <
+ sizeof(struct minmax_sample));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct tcp_sock, rtt_min) +
+ FIELD_SIZEOF(struct minmax_sample, t));
+ break;
+
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@ -4485,17 +4598,159 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
- offsetof(struct tcp_sock, FIELD_NAME)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
+ OBJ_FIELD), \
+ si->dst_reg, si->dst_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ } while (0)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ int reg = BPF_REG_9; \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
+ do { \
+ if (TYPE == BPF_WRITE) \
+ SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ else \
+ SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
case offsetof(struct bpf_sock_ops, snd_cwnd):
- SOCK_OPS_GET_TCP32(snd_cwnd);
+ SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, srtt_us):
- SOCK_OPS_GET_TCP32(srtt_us);
+ SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
break;
+
+ case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+ SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_ssthresh):
+ SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rcv_nxt):
+ SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_nxt):
+ SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_una):
+ SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, mss_cache):
+ SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, ecn_flags):
+ SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_delivered):
+ SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_interval_us):
+ SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, packets_out):
+ SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, retrans_out):
+ SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, total_retrans):
+ SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_in):
+ SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_in):
+ SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_out):
+ SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_out):
+ SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, lost_out):
+ SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sacked_out):
+ SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
+ struct sock, type);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_received):
+ SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_acked):
+ SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+ break;
+
}
return insn - insn_buf;
}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 982861607f88..e38e641e98d5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (dev->ifindex != dev_get_iflink(dev))
return true;
- if (dev->priv_flags & IFF_TEAM_PORT)
+ if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7bf8b85ade16..c4a28f4667b6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
return sprintf(buf, fmt_dec,
- atomic_read(&netdev->carrier_changes));
+ atomic_read(&netdev->carrier_up_count) +
+ atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_changes);
+static ssize_t carrier_up_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
+}
+static DEVICE_ATTR_RO(carrier_up_count);
+
+static ssize_t carrier_down_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
+}
+static DEVICE_ATTR_RO(carrier_down_count);
+
/* read-write attributes */
static int change_mtu(struct net_device *dev, unsigned long new_mtu)
@@ -547,6 +568,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
&dev_attr_proto_down.attr,
+ &dev_attr_carrier_up_count.attr,
+ &dev_attr_carrier_down_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1ccb953b3b09..3cad5f51afd3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -439,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags,
return net;
}
+static void unhash_nsid(struct net *net, struct net *last)
+{
+ struct net *tmp;
+ /* This function is only called from cleanup_net() work,
+ * and this work is the only process, that may delete
+ * a net from net_namespace_list. So, when the below
+ * is executing, the list may only grow. Thus, we do not
+ * use for_each_net_rcu() or rtnl_lock().
+ */
+ for_each_net(tmp) {
+ int id;
+
+ spin_lock_bh(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ spin_unlock_bh(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ if (tmp == last)
+ break;
+ }
+ spin_lock_bh(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock_bh(&net->nsid_lock);
+}
+
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
- struct net *net, *tmp;
+ struct net *net, *tmp, *last;
struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
@@ -458,26 +485,25 @@ static void cleanup_net(struct work_struct *work)
/* Don't let anyone else find us. */
rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ list_for_each_entry(net, &net_kill_list, cleanup_list)
list_del_rcu(&net->list);
- list_add_tail(&net->exit_list, &net_exit_list);
- for_each_net(tmp) {
- int id;
-
- spin_lock_bh(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
- }
- spin_lock_bh(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ /* Cache last net. After we unlock rtnl, no one new net
+ * added to net_namespace_list can assign nsid pointer
+ * to a net from net_kill_list (see peernet2id_alloc()).
+ * So, we skip them in unhash_nsid().
+ *
+ * Note, that unhash_nsid() does not delete nsid links
+ * between net_kill_list's nets, as they've already
+ * deleted from net_namespace_list. But, this would be
+ * useless anyway, as netns_ids are destroyed there.
+ */
+ last = list_last_entry(&net_namespace_list, struct net, list);
+ rtnl_unlock();
+ list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ unhash_nsid(net, last);
+ list_add_tail(&net->exit_list, &net_exit_list);
}
- rtnl_unlock();
/*
* Another CPU might be rcu-iterating the list, wait for it.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4fcfcb14e7c6..b8ab5c829511 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -184,25 +184,44 @@
#define func_enter() pr_debug("entering %s\n", __func__);
+#define PKT_FLAGS \
+ pf(IPV6) /* Interface in IPV6 Mode */ \
+ pf(IPSRC_RND) /* IP-Src Random */ \
+ pf(IPDST_RND) /* IP-Dst Random */ \
+ pf(TXSIZE_RND) /* Transmit size is random */ \
+ pf(UDPSRC_RND) /* UDP-Src Random */ \
+ pf(UDPDST_RND) /* UDP-Dst Random */ \
+ pf(UDPCSUM) /* Include UDP checksum */ \
+ pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \
+ pf(MPLS_RND) /* Random MPLS labels */ \
+ pf(QUEUE_MAP_RND) /* queue map Random */ \
+ pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \
+ pf(FLOW_SEQ) /* Sequential flows */ \
+ pf(IPSEC) /* ipsec on for flows */ \
+ pf(MACSRC_RND) /* MAC-Src Random */ \
+ pf(MACDST_RND) /* MAC-Dst Random */ \
+ pf(VID_RND) /* Random VLAN ID */ \
+ pf(SVID_RND) /* Random SVLAN ID */ \
+ pf(NODE) /* Node memory alloc*/ \
+
+#define pf(flag) flag##_SHIFT,
+enum pkt_flags {
+ PKT_FLAGS
+};
+#undef pf
+
/* Device flag bits */
-#define F_IPSRC_RND (1<<0) /* IP-Src Random */
-#define F_IPDST_RND (1<<1) /* IP-Dst Random */
-#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */
-#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */
-#define F_MACSRC_RND (1<<4) /* MAC-Src Random */
-#define F_MACDST_RND (1<<5) /* MAC-Dst Random */
-#define F_TXSIZE_RND (1<<6) /* Transmit size is random */
-#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */
-#define F_MPLS_RND (1<<8) /* Random MPLS labels */
-#define F_VID_RND (1<<9) /* Random VLAN ID */
-#define F_SVID_RND (1<<10) /* Random SVLAN ID */
-#define F_FLOW_SEQ (1<<11) /* Sequential flows */
-#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
-#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
-#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
-#define F_NODE (1<<15) /* Node memory alloc*/
-#define F_UDPCSUM (1<<16) /* Include UDP checksum */
-#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
+#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT);
+PKT_FLAGS
+#undef pf
+
+#define pf(flag) __stringify(flag),
+static char *pkt_flag_names[] = {
+ PKT_FLAGS
+};
+#undef pf
+
+#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names)
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -534,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
{
const struct pktgen_dev *pkt_dev = seq->private;
ktime_t stopped;
+ unsigned int i;
u64 idle;
seq_printf(seq,
@@ -595,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
if (pkt_dev->nr_labels) {
- unsigned int i;
seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
@@ -631,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
- if (pkt_dev->flags & F_IPV6)
- seq_puts(seq, "IPV6 ");
-
- if (pkt_dev->flags & F_IPSRC_RND)
- seq_puts(seq, "IPSRC_RND ");
-
- if (pkt_dev->flags & F_IPDST_RND)
- seq_puts(seq, "IPDST_RND ");
-
- if (pkt_dev->flags & F_TXSIZE_RND)
- seq_puts(seq, "TXSIZE_RND ");
-
- if (pkt_dev->flags & F_UDPSRC_RND)
- seq_puts(seq, "UDPSRC_RND ");
-
- if (pkt_dev->flags & F_UDPDST_RND)
- seq_puts(seq, "UDPDST_RND ");
-
- if (pkt_dev->flags & F_UDPCSUM)
- seq_puts(seq, "UDPCSUM ");
-
- if (pkt_dev->flags & F_NO_TIMESTAMP)
- seq_puts(seq, "NO_TIMESTAMP ");
-
- if (pkt_dev->flags & F_MPLS_RND)
- seq_puts(seq, "MPLS_RND ");
-
- if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_puts(seq, "QUEUE_MAP_RND ");
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (i == F_FLOW_SEQ)
+ if (!pkt_dev->cflows)
+ continue;
- if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_puts(seq, "QUEUE_MAP_CPU ");
-
- if (pkt_dev->cflows) {
- if (pkt_dev->flags & F_FLOW_SEQ)
- seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
- else
- seq_puts(seq, "FLOW_RND ");
- }
+ if (pkt_dev->flags & (1 << i))
+ seq_printf(seq, "%s ", pkt_flag_names[i]);
+ else if (i == F_FLOW_SEQ)
+ seq_puts(seq, "FLOW_RND ");
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON) {
- seq_puts(seq, "IPSEC ");
- if (pkt_dev->spi)
+ if (i == F_IPSEC && pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
- }
#endif
-
- if (pkt_dev->flags & F_MACSRC_RND)
- seq_puts(seq, "MACSRC_RND ");
-
- if (pkt_dev->flags & F_MACDST_RND)
- seq_puts(seq, "MACDST_RND ");
-
- if (pkt_dev->flags & F_VID_RND)
- seq_puts(seq, "VID_RND ");
-
- if (pkt_dev->flags & F_SVID_RND)
- seq_puts(seq, "SVID_RND ");
-
- if (pkt_dev->flags & F_NODE)
- seq_puts(seq, "NODE_ALLOC ");
+ }
seq_puts(seq, "\n");
@@ -858,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return i;
}
+static __u32 pktgen_read_flag(const char *f, bool *disable)
+{
+ __u32 i;
+
+ if (f[0] == '!') {
+ *disable = true;
+ f++;
+ }
+
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT)
+ continue;
+
+ /* allow only disabling ipv6 flag */
+ if (!*disable && i == IPV6_SHIFT)
+ continue;
+
+ if (strcmp(f, pkt_flag_names[i]) == 0)
+ return 1 << i;
+ }
+
+ if (strcmp(f, "FLOW_RND") == 0) {
+ *disable = !*disable;
+ return F_FLOW_SEQ;
+ }
+
+ return 0;
+}
+
static ssize_t pktgen_if_write(struct file *file,
const char __user * user_buffer, size_t count,
loff_t * offset)
@@ -1215,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "flag")) {
+ __u32 flag;
char f[32];
+ bool disable = false;
+
memset(f, 0, 32);
len = strn_len(&user_buffer[i], sizeof(f) - 1);
if (len < 0)
@@ -1224,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(f, &user_buffer[i], len))
return -EFAULT;
i += len;
- if (strcmp(f, "IPSRC_RND") == 0)
- pkt_dev->flags |= F_IPSRC_RND;
-
- else if (strcmp(f, "!IPSRC_RND") == 0)
- pkt_dev->flags &= ~F_IPSRC_RND;
-
- else if (strcmp(f, "TXSIZE_RND") == 0)
- pkt_dev->flags |= F_TXSIZE_RND;
-
- else if (strcmp(f, "!TXSIZE_RND") == 0)
- pkt_dev->flags &= ~F_TXSIZE_RND;
-
- else if (strcmp(f, "IPDST_RND") == 0)
- pkt_dev->flags |= F_IPDST_RND;
-
- else if (strcmp(f, "!IPDST_RND") == 0)
- pkt_dev->flags &= ~F_IPDST_RND;
-
- else if (strcmp(f, "UDPSRC_RND") == 0)
- pkt_dev->flags |= F_UDPSRC_RND;
-
- else if (strcmp(f, "!UDPSRC_RND") == 0)
- pkt_dev->flags &= ~F_UDPSRC_RND;
-
- else if (strcmp(f, "UDPDST_RND") == 0)
- pkt_dev->flags |= F_UDPDST_RND;
-
- else if (strcmp(f, "!UDPDST_RND") == 0)
- pkt_dev->flags &= ~F_UDPDST_RND;
-
- else if (strcmp(f, "MACSRC_RND") == 0)
- pkt_dev->flags |= F_MACSRC_RND;
-
- else if (strcmp(f, "!MACSRC_RND") == 0)
- pkt_dev->flags &= ~F_MACSRC_RND;
-
- else if (strcmp(f, "MACDST_RND") == 0)
- pkt_dev->flags |= F_MACDST_RND;
-
- else if (strcmp(f, "!MACDST_RND") == 0)
- pkt_dev->flags &= ~F_MACDST_RND;
-
- else if (strcmp(f, "MPLS_RND") == 0)
- pkt_dev->flags |= F_MPLS_RND;
-
- else if (strcmp(f, "!MPLS_RND") == 0)
- pkt_dev->flags &= ~F_MPLS_RND;
- else if (strcmp(f, "VID_RND") == 0)
- pkt_dev->flags |= F_VID_RND;
+ flag = pktgen_read_flag(f, &disable);
- else if (strcmp(f, "!VID_RND") == 0)
- pkt_dev->flags &= ~F_VID_RND;
-
- else if (strcmp(f, "SVID_RND") == 0)
- pkt_dev->flags |= F_SVID_RND;
-
- else if (strcmp(f, "!SVID_RND") == 0)
- pkt_dev->flags &= ~F_SVID_RND;
-
- else if (strcmp(f, "FLOW_SEQ") == 0)
- pkt_dev->flags |= F_FLOW_SEQ;
-
- else if (strcmp(f, "QUEUE_MAP_RND") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_CPU;
-
- else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
-#ifdef CONFIG_XFRM
- else if (strcmp(f, "IPSEC") == 0)
- pkt_dev->flags |= F_IPSEC_ON;
-#endif
-
- else if (strcmp(f, "!IPV6") == 0)
- pkt_dev->flags &= ~F_IPV6;
-
- else if (strcmp(f, "NODE_ALLOC") == 0)
- pkt_dev->flags |= F_NODE;
-
- else if (strcmp(f, "!NODE_ALLOC") == 0)
- pkt_dev->flags &= ~F_NODE;
-
- else if (strcmp(f, "UDPCSUM") == 0)
- pkt_dev->flags |= F_UDPCSUM;
-
- else if (strcmp(f, "!UDPCSUM") == 0)
- pkt_dev->flags &= ~F_UDPCSUM;
-
- else if (strcmp(f, "NO_TIMESTAMP") == 0)
- pkt_dev->flags |= F_NO_TIMESTAMP;
-
- else if (strcmp(f, "!NO_TIMESTAMP") == 0)
- pkt_dev->flags &= ~F_NO_TIMESTAMP;
-
- else {
+ if (flag) {
+ if (disable)
+ pkt_dev->flags &= ~flag;
+ else
+ pkt_dev->flags |= flag;
+ } else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
@@ -2541,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].cur_daddr =
pkt_dev->cur_daddr;
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON)
+ if (pkt_dev->flags & F_IPSEC)
get_ipsec_sa(pkt_dev, flow);
#endif
pkt_dev->nflows++;
@@ -2646,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
static int process_ipsec(struct pktgen_dev *pkt_dev,
struct sk_buff *skb, __be16 protocol)
{
- if (pkt_dev->flags & F_IPSEC_ON) {
+ if (pkt_dev->flags & F_IPSEC) {
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
if (x) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 16d644a4f974..97874daa1336 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -990,6 +990,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(1) /* IFLA_PROTO_DOWN */
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ 0;
}
@@ -1551,8 +1553,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ atomic_read(&dev->carrier_up_count) +
+ atomic_read(&dev->carrier_down_count)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
+ nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
+ atomic_read(&dev->carrier_up_count)) ||
+ nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
+ atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
if (event != IFLA_EVENT_NONE) {
@@ -1656,6 +1663,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
[IFLA_IF_NETNSID] = { .type = NLA_S32 },
+ [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
+ [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 21f9bed11988..adf50fbc4c13 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- err = dsa_port_fixed_link_register_of(dp);
+ err = dsa_port_link_register_of(dp);
if (err) {
- dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
return err;
}
-
break;
case DSA_PORT_TYPE_USER:
err = dsa_slave_create(dp);
@@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- dsa_port_fixed_link_unregister_of(dp);
+ dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
if (dp->slave) {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cefb0c3c6d51..70de7895e5b8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -166,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_fixed_link_register_of(struct dsa_port *dp);
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
+int dsa_port_link_register_of(struct dsa_port *dp);
+void dsa_port_link_unregister_of(struct dsa_port *dp);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index aa56d3fb5da4..cb54b81d0bd9 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
+ ret = dsa_port_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
@@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_port_fixed_link_unregister_of(&ds->ports[port]);
+ dsa_port_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb4be2679904..7acc1169d75e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
-int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+ struct device_node *port_dn = dp->dn;
+ struct device_node *phy_dn;
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int err = 0;
+
+ phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ if (!phy_dn)
+ return 0;
+
+ phydev = of_phy_find_device(phy_dn);
+ if (!phydev) {
+ err = -EPROBE_DEFER;
+ goto err_put_of;
+ }
+
+ if (enable) {
+ err = genphy_config_init(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_resume(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_read_status(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ } else {
+ err = genphy_suspend(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ }
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
+
+err_put_dev:
+ put_device(&phydev->mdio.dev);
+err_put_of:
+ of_node_put(phy_dn);
+ return err;
+}
+
+static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp)
int mode;
int err;
- if (of_phy_is_fixed_link(dn)) {
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
- phydev = of_phy_find_device(dn);
+ phydev = of_phy_find_device(dn);
- mode = of_get_phy_mode(dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
- put_device(&phydev->mdio.dev);
- }
+ put_device(&phydev->mdio.dev);
return 0;
}
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+int dsa_port_link_register_of(struct dsa_port *dp)
{
- struct device_node *dn = dp->dn;
+ if (of_phy_is_fixed_link(dp->dn))
+ return dsa_port_fixed_link_register_of(dp);
+ else
+ return dsa_port_setup_phy_of(dp, true);
+}
- if (of_phy_is_fixed_link(dn))
- of_phy_deregister_fixed_link(dn);
+void dsa_port_link_unregister_of(struct dsa_port *dp)
+{
+ if (of_phy_is_fixed_link(dp->dn))
+ of_phy_deregister_fixed_link(dp->dn);
+ else
+ dsa_port_setup_phy_of(dp, false);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54cccdd8b1e3..c24008daa3d8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -872,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sock *sk = sock->sk;
int err = 0;
struct net *net = sock_net(sk);
+ void __user *p = (void __user *)arg;
+ struct ifreq ifr;
+ struct rtentry rt;
switch (cmd) {
case SIOCGSTAMP:
@@ -882,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
case SIOCADDRT:
case SIOCDELRT:
+ if (copy_from_user(&rt, p, sizeof(struct rtentry)))
+ return -EFAULT;
+ err = ip_rt_ioctl(net, cmd, &rt);
+ break;
case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ err = -EINVAL;
break;
case SIOCDARP:
case SIOCGARP:
@@ -891,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
- case SIOCSIFADDR:
case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
+ case SIOCGIFPFLAGS:
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
+ if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ err = -EFAULT;
+ break;
+
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
break;
default:
if (sk->sk_prot->ioctl)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7a93359fbc72..e056c0067f2c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr)
}
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
- struct ifreq ifr;
struct sockaddr_in sin_orig;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
struct in_device *in_dev;
struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
@@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
int ret = -EFAULT;
int tryaddrmatch = 0;
- /*
- * Fetch the caller's info block into kernel space
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- goto out;
- ifr.ifr_name[IFNAMSIZ - 1] = 0;
+ ifr->ifr_name[IFNAMSIZ - 1] = 0;
/* save original address for comparison */
memcpy(&sin_orig, sin, sizeof(*sin));
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
- dev_load(net, ifr.ifr_name);
+ dev_load(net, ifr->ifr_name);
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
@@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
ret = -ENODEV;
- dev = __dev_get_by_name(net, ifr.ifr_name);
+ dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
goto done;
@@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
This is checked above. */
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
ifa->ifa_local) {
break; /* found */
@@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa) {
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next)
- if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
}
}
@@ -1056,19 +1049,19 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local;
- goto rarok;
+ break;
case SIOCGIFBRDADDR: /* Get the broadcast address */
sin->sin_addr.s_addr = ifa->ifa_broadcast;
- goto rarok;
+ break;
case SIOCGIFDSTADDR: /* Get the destination address */
sin->sin_addr.s_addr = ifa->ifa_address;
- goto rarok;
+ break;
case SIOCGIFNETMASK: /* Get the netmask for the interface */
sin->sin_addr.s_addr = ifa->ifa_mask;
- goto rarok;
+ break;
case SIOCSIFFLAGS:
if (colon) {
@@ -1076,11 +1069,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa)
break;
ret = 0;
- if (!(ifr.ifr_flags & IFF_UP))
+ if (!(ifr->ifr_flags & IFF_UP))
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr.ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
@@ -1095,7 +1088,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
INIT_HLIST_NODE(&ifa->hash);
if (colon)
- memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+ memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else {
@@ -1182,28 +1175,27 @@ done:
rtnl_unlock();
out:
return ret;
-rarok:
- rtnl_unlock();
- ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
- goto out;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
+ if (WARN_ON(size > sizeof(struct ifreq)))
+ goto out;
+
if (!in_dev)
goto out;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (!buf) {
- done += sizeof(ifr);
+ done += size;
continue;
}
- if (len < (int) sizeof(ifr))
+ if (len < size)
break;
memset(&ifr, 0, sizeof(struct ifreq));
strcpy(ifr.ifr_name, ifa->ifa_label);
@@ -1212,13 +1204,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
ifa->ifa_local;
- if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
+ if (copy_to_user(buf + done, &ifr, size)) {
done = -EFAULT;
break;
}
- buf += sizeof(struct ifreq);
- len -= sizeof(struct ifreq);
- done += sizeof(struct ifreq);
+ len -= size;
+ done += size;
}
out:
return done;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 32fbd9ba3609..da5635fc52c2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 08259d078b1c..f05afaf3235c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
* Handle IP routing ioctl calls.
* These are used to manipulate the routing tables
*/
-int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
{
struct fib_config cfg;
- struct rtentry rt;
int err;
switch (cmd) {
@@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&rt, arg, sizeof(rt)))
- return -EFAULT;
-
rtnl_lock();
- err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 02f00be12bb0..10f7f74a0831 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
return htonl(INADDR_ANY);
for_ifa(in_dev) {
- if (inet_ifa_match(fl4->saddr, ifa))
+ if (fl4->saddr == ifa->ifa_local)
return fl4->saddr;
} endfor_ifa(in_dev);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b61f2285816d..6ec670fbbbdd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
+ u32 id, u32 index,
bool truncate, bool is_ipv4);
static unsigned int ipgre_net_id __read_mostly;
@@ -273,12 +273,12 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
iph = ip_hdr(skb);
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
+ ver = ershdr->ver;
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
@@ -324,14 +324,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (ver == 1) {
tunnel->index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->dir = dir;
- tunnel->hwid = hwid;
+ tunnel->dir = pkt_md->u.md2.dir;
+ tunnel->hwid = get_hwid(&pkt_md->u.md2);
}
}
@@ -615,19 +609,14 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (version == 1) {
- erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
} else if (version == 2) {
- u16 md2_flags;
- u8 direction;
- u16 hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- direction = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
- erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id),
- direction, hwid, truncate, true);
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
} else {
goto err_free_rt;
}
@@ -733,10 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 1)
- erspan_build_header(skb, tunnel->parms.o_key, tunnel->index,
+ erspan_build_header(skb, ntohl(tunnel->parms.o_key),
+ tunnel->index,
truncate, true);
else
- erspan_build_header_v2(skb, tunnel->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
tunnel->dir, tunnel->hwid,
truncate, true);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb7d7d8..6cc70fa488cb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct net_device *dev = NULL;
int ifindex;
+ int midx;
if (optlen != sizeof(int))
goto e_inval;
@@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
dev_put(dev);
err = -EINVAL;
- if (sk->sk_bound_dev_if)
+ if (sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->uc_index = ifindex;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5ddb1cb52bd4..141f5e865731 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -711,9 +711,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ if (tunnel->fwmark) {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
+ }
+ else {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ skb->mark);
+ }
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e9e488e72900..f75802ad960f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
sin->sin_port = port;
}
-static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
/*
* Set up interface addresses and routes.
*/
@@ -375,19 +342,19 @@ static int __init ic_setup_if(void)
memset(&ir, 0, sizeof(ir));
strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
set_sockaddr(sin, ic_myaddr, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface address (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface netmask (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
err);
return -1;
@@ -397,11 +364,11 @@ static int __init ic_setup_if(void)
* out, we'll try to muddle along.
*/
if (ic_dev_mtu != 0) {
- strcpy(ir.ifr_name, ic_dev->dev->name);
- ir.ifr_mtu = ic_dev_mtu;
- if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
+ rtnl_lock();
+ if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0)
pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
ic_dev_mtu, err);
+ rtnl_unlock();
}
return 0;
}
@@ -423,7 +390,7 @@ static int __init ic_setup_routes(void)
set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
rm.rt_flags = RTF_UP | RTF_GATEWAY;
- if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+ if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) {
pr_err("IP-Config: Cannot add default route (%d)\n",
err);
return -1;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 136544b36a46..7c509697ebc7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d7cf861bf699..f013ddc191e0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -463,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tcp_mtup_init(sk);
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
- tcp_call_bpf(sk, bpf_op);
+ tcp_call_bpf(sk, bpf_op, 0, NULL);
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
@@ -2042,6 +2042,30 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
+ /* We defined a new enum for TCP states that are exported in BPF
+ * so as not force the internal TCP states to be frozen. The
+ * following checks will detect if an internal state value ever
+ * differs from the BPF value. If this ever happens, then we will
+ * need to remap the internal value to the BPF value before calling
+ * tcp_call_bpf_2arg.
+ */
+ BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
+ BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
+ BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
+ BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
+ tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
+
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..ddbce73edae8 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
* within a datacenter, where we have reasonable estimates of
* RTTs
*/
- base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
if (base_rtt > 0) {
ca->nv_base_rtt = base_rtt;
ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1dcf56..4d58e2ce0b5b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95461f02ac9a..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2905,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
+ TCP_SKB_CB(skb)->seq, segs, err);
+
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
@@ -3469,7 +3473,7 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
- tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6db3124cdbda..257abdde23b0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -213,11 +213,18 @@ static int tcp_write_timeout(struct sock *sk)
icsk->icsk_user_timeout);
}
tcp_fastopen_active_detect_blackhole(sk, expired);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
+ icsk->icsk_retransmits,
+ icsk->icsk_rto, (int)expired);
+
if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
}
+
return 0;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 853321555a4e..3f018f34cf56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -977,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
if (connected)
rt = (struct rtable *)sk_dst_check(sk, 0);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b77bd0d..ea6e6e7df0ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8affc6d83d58..63faeee989a9 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 44d109c435bc..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a88480193d77..05f070e123e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -513,8 +513,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ ver = ershdr->ver;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip6gre_tunnel_lookup(skb->dev,
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -565,14 +565,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
if (ver == 1) {
tunnel->parms.index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->parms.dir = dir;
- tunnel->parms.hwid = hwid;
+ tunnel->parms.dir = pkt_md->u.md2.dir;
+ tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
}
ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
@@ -925,6 +919,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
+ __be32 tun_id;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info ||
@@ -944,23 +939,18 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (!md)
goto tx_err;
+ tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
erspan_build_header(skb,
- tunnel_id_to_key32(key->tun_id),
+ ntohl(tun_id),
ntohl(md->u.index), truncate,
false);
} else if (md->version == 2) {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
erspan_build_header_v2(skb,
- tunnel_id_to_key32(key->tun_id),
- dir, hwid, truncate,
- false);
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
}
} else {
switch (skb->protocol) {
@@ -982,11 +972,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
if (t->parms.erspan_ver == 1)
- erspan_build_header(skb, t->parms.o_key,
+ erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
else
- erspan_build_header_v2(skb, t->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a4a94452132b..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,7 +174,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
{
if (!np->autoflowlabel_set)
return ip6_default_np_autolabel(net);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2d4680e0376f..e8ffb5b5d84e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f85da2f1e729..fe3966a9c999 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2440,7 +2440,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
- const struct in6_addr *gw_addr)
+ const struct in6_addr *gw_addr,
+ u32 tbid, int flags)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -2449,15 +2450,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
- table = fib6_get_table(net, cfg->fc_table);
+ table = fib6_get_table(net, tbid);
if (!table)
return NULL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
/* if table lookup failed, fall back to full lookup */
@@ -2469,6 +2470,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+static int ip6_route_check_nh_onlink(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct rt6_info *grt;
+ int err;
+
+ err = 0;
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+ if (grt) {
+ if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ err = -EINVAL;
+ }
+
+ ip6_rt_put(grt);
+ }
+
+ return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device **_dev,
+ struct inet6_dev **idev)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ struct net_device *dev = _dev ? *_dev : NULL;
+ struct rt6_info *grt = NULL;
+ int err = -EHOSTUNREACH;
+
+ if (cfg->fc_table) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ if (!grt)
+ goto out;
+
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
+ goto out;
+ }
+ } else {
+ *_dev = dev = grt->dst.dev;
+ *idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+
+ ip6_rt_put(grt);
+
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2520,6 +2597,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+ }
+
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2664,8 +2756,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt = NULL;
-
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
@@ -2682,40 +2772,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
- if (cfg->fc_table) {
- grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
- }
-
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL,
- cfg->fc_ifindex, 1);
-
- err = -EHOSTUNREACH;
- if (!grt)
- goto out;
- if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ err = ip6_route_check_nh_onlink(net, cfg, dev,
+ extack);
} else {
- dev = grt->dst.dev;
- idev = grt->rt6i_idev;
- dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
- ip6_rt_put(grt);
-
if (err)
goto out;
}
@@ -2734,6 +2796,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!dev)
goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2751,6 +2819,7 @@ install_route:
if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
!netif_carrier_ok(dev))
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -3820,6 +3889,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
+ cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -4225,6 +4296,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..278e49cd67d4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad76f9d..2a04dc9c781b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4e12859bc2ee..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4e98f20fc2a..4a8d407f8902 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
- /* We must prevent loops or risk deadlock ! */
- if (csk->sk_family == PF_KCM)
+ /* Only allow TCP sockets to be attached for now */
+ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
+ csk->sk_protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ /* Don't allow listeners or closed sockets */
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
return -EOPNOTSUPP;
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
return err;
}
- sock_hold(csk);
-
write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is aready by KCM or someone else.
+ * Must be done under lock to prevent race conditions.
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ return -EALREADY;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
csk->sk_data_ready = psock_data_ready;
csk->sk_write_space = psock_write_space;
csk->sk_state_change = psock_state_change;
+
write_unlock_bh(&csk->sk_callback_lock);
+ sock_hold(csk);
+
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh(&mux->lock);
head = &mux->psocks;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b15412c21ac9..444ea8d127fe 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
default:
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tMAX-MPDU-UNKNOWN\n");
- };
+ }
switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case 0:
p += scnprintf(p, sizeof(buf) + buf - p,
@@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tUNKNOWN-MHZ: 0x%x\n",
(vhtc->cap >> 2) & 0x3);
- };
+ }
PFLAG(RXLDPC, "RXLDPC");
PFLAG(SHORT_GI_80, "SHORT-GI-80");
PFLAG(SHORT_GI_160, "SHORT-GI-160");
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index eb55f1b3d047..7322aa1e382e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
#include <net/mpls.h>
#include <net/vxlan.h>
#include <net/tun_proto.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
+ * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
@@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
};
static const struct ovs_len_tbl
@@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+
+ BUILD_BUG_ON(sizeof(struct erspan_metadata) >
+ sizeof(match->key->tun_opts));
+
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (!is_mask)
+ SW_FLOW_KEY_PUT(match, tun_opts_len,
+ sizeof(struct erspan_metadata), false);
+ else
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+ opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask,
+ log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ swkey_tun_opts_len, tun_opts))
+ return -EMSGSIZE;
}
return 0;
@@ -2486,6 +2534,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2e554ef6d75f..9920d2f84eff 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
sizeof(val));
}
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
- return tcp_sk(tc->t_sock->sk)->snd_nxt;
+ /* seq# of the last byte of data in tcp send buffer */
+ return tcp_sk(tc->t_sock->sk)->write_seq;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index e7858ee8ed8b..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 73c74763ca72..16f65744d984 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index af4b8ec60d9a..b7ba9b06b147 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
int bind)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tcf_csum_params *params_old, *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
@@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_csum_ops, bind, false);
+ &act_csum_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
p = to_tcf_csum(*a);
- spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
- p->update_flags = parm->update_flags;
- spin_unlock_bh(&p->tcf_lock);
+ ASSERT_RTNL();
+
+ params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+ if (unlikely(!params_new)) {
+ if (ret == ACT_P_CREATED)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
+ params_old = rtnl_dereference(p->params);
+
+ params_new->action = parm->action;
+ params_new->update_flags = parm->update_flags;
+ rcu_assign_pointer(p->params, params_new);
+ if (params_old)
+ kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
- int action;
+ struct tcf_csum_params *params;
u32 update_flags;
+ int action;
+
+ rcu_read_lock();
+ params = rcu_dereference(p->params);
- spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
- bstats_update(&p->tcf_bstats, skb);
- action = p->tcf_action;
- update_flags = p->update_flags;
- spin_unlock(&p->tcf_lock);
+ bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ action = params->action;
if (unlikely(action == TC_ACT_SHOT))
- goto drop;
+ goto drop_stats;
+ update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
@@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
+unlock:
+ rcu_read_unlock();
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+
+drop_stats:
+ qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ goto unlock;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
struct tc_csum opt = {
- .update_flags = p->update_flags,
.index = p->tcf_index,
- .action = p->tcf_action,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
};
struct tcf_t t;
+ params = rtnl_dereference(p->params);
+ opt.action = params->action;
+ opt.update_flags = params->update_flags;
+
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -600,6 +620,15 @@ nla_put_failure:
return -1;
}
+static void tcf_csum_cleanup(struct tc_action *a)
+{
+ struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
+
+ params = rcu_dereference_protected(p->params, 1);
+ kfree_rcu(params, rcu);
+}
+
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
@@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.init = tcf_csum_init,
+ .cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.size = sizeof(struct tcf_csum),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f5d293416f46..bcb4ccb5f894 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -172,9 +172,10 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp)
+static void tcf_proto_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp);
+ tp->ops->destroy(tp, extack);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
@@ -223,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
tcf_chain_head_change(chain, NULL);
while (tp) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
tp = rtnl_dereference(chain->filter_chain);
tcf_chain_put(chain);
}
@@ -1182,7 +1183,7 @@ replay:
tcf_chain_tp_remove(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
err = 0;
goto errout;
}
@@ -1200,7 +1201,7 @@ replay:
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1214,7 +1215,7 @@ replay:
goto errout;
if (last) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
}
goto errout;
case RTM_GETTFILTER:
@@ -1240,7 +1241,7 @@ replay:
RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6088be65d167..d333f5c5101d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void basic_destroy(struct tcf_proto *tp)
+static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 988ad45d78b8..8e5326bc6440 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -147,7 +147,8 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- struct cls_bpf_prog *oldprog)
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_bpf_offload cls_bpf = {};
@@ -158,14 +159,14 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
skip_sw = prog && tc_skip_sw(prog->gen_flags);
obj = prog ?: oldprog;
- tc_cls_common_offload_init(&cls_bpf.common, tp);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
+ extack);
cls_bpf.command = TC_CLSBPF_OFFLOAD;
cls_bpf.exts = &obj->exts;
cls_bpf.prog = prog ? prog->filter : NULL;
cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
cls_bpf.name = obj->bpf_name;
cls_bpf.exts_integrated = obj->exts_integrated;
- cls_bpf.gen_flags = obj->gen_flags;
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
@@ -173,7 +174,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
if (prog) {
if (err < 0) {
- cls_bpf_offload_cmd(tp, oldprog, prog);
+ cls_bpf_offload_cmd(tp, oldprog, prog, extack);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &prog->gen_flags);
@@ -192,7 +193,8 @@ static u32 cls_bpf_flags(u32 flags)
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- struct cls_bpf_prog *oldprog)
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
{
if (prog && oldprog &&
cls_bpf_flags(prog->gen_flags) !=
@@ -206,15 +208,16 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
if (!prog && !oldprog)
return 0;
- return cls_bpf_offload_cmd(tp, prog, oldprog);
+ return cls_bpf_offload_cmd(tp, prog, oldprog, extack);
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
- struct cls_bpf_prog *prog)
+ struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
int err;
- err = cls_bpf_offload_cmd(tp, NULL, prog);
+ err = cls_bpf_offload_cmd(tp, NULL, prog, extack);
if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
}
@@ -225,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct tcf_block *block = tp->chain->block;
struct tc_cls_bpf_offload cls_bpf = {};
- tc_cls_common_offload_init(&cls_bpf.common, tp);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL);
cls_bpf.command = TC_CLSBPF_STATS;
cls_bpf.exts = &prog->exts;
cls_bpf.prog = prog->filter;
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
@@ -288,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
tcf_queue_work(&prog->work);
}
-static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, prog->handle);
- cls_bpf_stop_offload(tp, prog);
+ cls_bpf_stop_offload(tp, prog, extack);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
if (tcf_exts_get_net(&prog->exts))
@@ -307,18 +310,19 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- __cls_bpf_delete(tp, arg);
+ __cls_bpf_delete(tp, arg, extack);
*last = list_empty(&head->plist);
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static void cls_bpf_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link)
- __cls_bpf_delete(tp, prog);
+ __cls_bpf_delete(tp, prog, extack);
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
@@ -514,7 +518,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout_idr;
- ret = cls_bpf_offload(tp, prog, oldprog);
+ ret = cls_bpf_offload(tp, prog, oldprog, extack);
if (ret)
goto errout_parms;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 1b54fbfca414..762da5c0cf5e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,7 +143,8 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static void cls_cgroup_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 64c24b488058..cd5fe383afdd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -600,7 +600,7 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c6ac4a612c4a..dc9acaafc0a8 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -218,12 +218,13 @@ static void fl_destroy_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
@@ -235,14 +236,15 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct flow_dissector *dissector,
struct fl_flow_key *mask,
- struct cls_fl_filter *f)
+ struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
int err;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
cls_flower.dissector = dissector;
@@ -254,7 +256,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
&cls_flower, skip_sw);
if (err < 0) {
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &f->flags);
@@ -271,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
@@ -281,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
&cls_flower, false);
}
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
call_rcu(&f->rcu, fl_destroy_filter);
@@ -314,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static void fl_destroy(struct tcf_proto *tp)
+static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list)
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
@@ -943,7 +946,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
err = fl_hw_replace_filter(tp,
&head->dissector,
&mask.key,
- fnew);
+ fnew,
+ extack);
if (err)
goto errout_idr;
}
@@ -956,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold);
+ fl_hw_destroy_filter(tp, fold, NULL);
}
*arg = fnew;
@@ -995,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
*last = list_empty(&head->filters);
return 0;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 94d159a8869a..8b207723fbc2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fw_destroy(struct tcf_proto *tp)
+static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index f67d3d7fcf40..2ba721a590a7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,12 +71,13 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
@@ -86,14 +87,15 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(head->flags);
int err;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
@@ -101,7 +103,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
&cls_mall, skip_sw);
if (err < 0) {
- mall_destroy_hw_filter(tp, head, cookie);
+ mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &head->flags);
@@ -113,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp)
+static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -121,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp)
return;
if (!tc_skip_hw(head->flags))
- mall_destroy_hw_filter(tp, head, (unsigned long) head);
+ mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
call_rcu(&head->rcu, mall_destroy_rcu);
@@ -205,7 +207,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
goto err_set_parms;
if (!tc_skip_hw(new->flags)) {
- err = mall_replace_hw_filter(tp, new, (unsigned long)new);
+ err = mall_replace_hw_filter(tp, new, (unsigned long)new,
+ extack);
if (err)
goto err_replace_hw_filter;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 55467c30d524..21a03a8ee029 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void route4_destroy(struct tcf_proto *tp)
+static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 5cc0df690cff..4f1297657c27 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 01a163e0b6aa..b49cc990a000 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -581,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static void tcindex_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 57113e936155..60c892c36a60 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -87,6 +87,7 @@ struct tc_u_hnode {
unsigned int divisor;
struct idr handle_idr;
struct rcu_head rcu;
+ u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
*/
@@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -501,7 +503,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
@@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
bool offloaded = false;
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_clear_hw_hnode(tp, h);
+ u32_clear_hw_hnode(tp, h, NULL);
return err;
} else if (err > 0) {
offloaded = true;
@@ -529,12 +531,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
+static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
@@ -543,14 +546,14 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
bool skip_sw = tc_skip_sw(flags);
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
cls_u32.knode.fshift = n->fshift;
@@ -568,7 +571,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &n->flags);
@@ -580,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_knode *n;
unsigned int h;
@@ -590,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, extack);
idr_remove_ext(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
@@ -600,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
}
}
-static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode __rcu **hn;
@@ -608,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
for (phn = rtnl_dereference(*hn);
phn;
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
- u32_clear_hw_hnode(tp, ht);
+ u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
@@ -638,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static void u32_destroy(struct tcf_proto *tp)
+static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -646,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp)
WARN_ON(root_ht == NULL);
if (root_ht && --root_ht->refcnt == 0)
- u32_destroy_hnode(tp, root_ht);
+ u32_destroy_hnode(tp, root_ht, extack);
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
@@ -657,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp)
ht;
ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
}
while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
@@ -684,7 +689,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
goto out;
if (TC_U32_KEY(ht->handle)) {
- u32_remove_hw_knode(tp, (struct tc_u_knode *)ht);
+ u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
@@ -696,7 +701,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
if (ht->refcnt == 1) {
ht->refcnt--;
- u32_destroy_hnode(tp, ht);
+ u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
return -EBUSY;
@@ -965,7 +970,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = u32_replace_hw_knode(tp, new, flags);
+ err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
u32_destroy_key(tp, new, false);
return err;
@@ -1015,8 +1020,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
+ ht->flags = flags;
- err = u32_replace_hw_hnode(tp, ht, flags);
+ err = u32_replace_hw_hnode(tp, ht, flags, extack);
if (err) {
idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
@@ -1122,7 +1128,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
- err = u32_replace_hw_knode(tp, n, flags);
+ err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
goto errhw;
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d69585..07c10bac06a0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
return 0;
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
}
static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ef8b4ecde2ac..1816bde47256 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -510,7 +510,7 @@ void netif_carrier_on(struct net_device *dev)
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
@@ -529,7 +529,7 @@ void netif_carrier_off(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_down_count);
linkwatch_fire_event(dev);
}
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b93b29..35bc7106d182 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7ff444ecee75..a40fa53c93ef 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4860,9 +4860,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
struct net *net, int *pos, void *p) {
struct rhashtable_iter hti;
struct sctp_transport *tsp;
- int ret = 0;
+ int ret;
again:
+ ret = 0;
sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index daf8075f5a4c..267e68379110 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -526,9 +535,13 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -913,6 +921,8 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
@@ -925,8 +935,12 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -936,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -949,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -991,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1008,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1042,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1111,21 +1140,15 @@ out:
static unsigned int smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
-
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- return 0;
+ return mask;
}
static unsigned int smc_poll(struct file *file, struct socket *sock,
@@ -1136,9 +1159,15 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1150,37 +1179,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d16cf29..9518986c97b1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
- smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
+ smc->clcsock->sk->sk_policy[1]) ? true : false;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
- return 0;
+ return false;
}
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d4155ff6acde..3cd086e5bd28 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
- if (smc->sk.sk_state != SMC_ACTIVE)
- /* wake up smc_close_wait_tx_pends() */
- smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ int rc;
- return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
- (struct smc_wr_tx_pend_priv **)pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+ if (!conn->alert_token_local)
+ /* abnormal termination */
+ rc = -EPIPE;
+ return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
-bool smc_cdc_tx_has_pending(struct smc_connection *conn)
-{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
- return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, (unsigned long)conn);
-}
-
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -218,6 +212,14 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_data_ready(&smc->sk);
}
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
+ }
+
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -227,15 +229,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
- }
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda1b088..ab240b37ad11 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
-bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e194c6cc308a..e339c0186dcf 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_tx_pends(struct smc_sock *smc)
+static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
signed long timeout;
- timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
-
- rc = sk_wait_event(sk, &timeout,
- !smc_cdc_tx_has_pending(&smc->conn),
- &wait);
- if (rc)
+ do {
+ release_sock(sk);
+ if (smc->clcsock)
+ timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
+ timeout);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ if (!smc->clcsock)
break;
- }
+ } while (timeout);
remove_wait_queue(sk_sleep(sk), &wait);
}
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
}
/* terminate smc socket abnormally - active abort
- * RDMA communication no longer possible
+ * link group is terminated, i.e. RDMA communication no longer possible
*/
static void smc_close_active_abort(struct smc_sock *smc)
{
+ struct sock *sk = &smc->sk;
+
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- smc->sk.sk_err = ECONNABORTED;
+ sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
}
- switch (smc->sk.sk_state) {
+ switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
if (!smc_cdc_rxed_any_close(&smc->conn))
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
else
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
- smc->sk.sk_state = SMC_PEERABORTWAIT;
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
+ /* just SHUTDOWN_SEND done */
+ sk->sk_state = SMC_PEERABORTWAIT;
} else {
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
- if (!txflags->peer_conn_closed) {
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
- }
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
- sock_set_flag(&smc->sk, SOCK_DEAD);
- smc->sk.sk_state_change(&smc->sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_state_change(sk);
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
- if (smc->smc_listen_work.func)
- cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ smc_close_wait_listen_clcsock(smc);
}
- release_sock(sk);
smc_close_cleanup_listen(sk);
- cancel_work_sync(&smc->smc_listen_work);
- lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
- if (sk->sk_err != ECONNABORTED) {
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- if (smc_cdc_rxed_any_close(conn))
+ if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
+ sk->sk_state != SMC_APPCLOSEWAIT2)
+ goto again;
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
- smc_close_wait_tx_pends(smc);
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -264,12 +273,8 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
- smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
-/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
- * or peer_done_writing.
+/* Either some kind of closing has been received: peer_conn_closed,
+ * peer_conn_abort, or peer_done_writing
+ * or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
struct sock *sk = &smc->sk;
int old_state;
- lock_sock(&smc->sk);
+ lock_sock(sk);
old_state = sk->sk_state;
if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
goto wakeup;
}
- rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
+ rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
+ /* peer has not received all data */
smc_close_passive_abort_received(smc);
+ release_sock(&smc->sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- if (!smc_cdc_rxed_any_close(&smc->conn))
+ if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ if (smc_cdc_rxed_any_close(conn)) {
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ smc_conn_free(conn);
}
- release_sock(&smc->sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ release_sock(sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_ACTIVE)
+ goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (sk->sk_state == SMC_ACTIVE)
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- else
- goto again;
+ if (rc)
+ break;
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
@@ -446,8 +464,12 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_APPCLOSEWAIT1)
+ goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 8c498885d758..19eb6a211c23 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -21,7 +21,6 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f21116dac5..2424c7100aaf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
+ if (list_empty(&lgr->list))
+ goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
+free:
spin_unlock_bh(&smc_lgr_list.lock);
smc_lgr_free(lgr);
}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!lgr)
+ if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
- schedule_work(&conn->close_work);
- sock_put(&smc->sk);
+ write_unlock_bh(&lgr->conns_lock);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
+ write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
+ wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf70224..427b91c1c964 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
- if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+ smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+ !list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f9085c..2a8957bd6d38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
return rc;
}
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *l;
+
+ list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+ smc_lgr_terminate(lgr);
+ }
+}
+
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
+ if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ smc_ib_port_terminate(smcibdev, port_idx + 1);
}
}
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
- /* fall through */
case IB_EVENT_DEVICE_FATAL:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
- break;
case IB_EVENT_PORT_ACTIVE:
port_idx = ibevent->element.port_num - 1;
set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
- ib_dealloc_pd(lnk->roce_pd);
+ if (lnk->roce_pd)
+ ib_dealloc_pd(lnk->roce_pd);
lnk->roce_pd = NULL;
}
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
+ struct smc_ib_device *smcibdev =
+ (struct smc_ib_device *)ibevent->device;
+ u8 port_idx;
+
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
case IB_EVENT_GID_CHANGE:
case IB_EVENT_PORT_ERR:
case IB_EVENT_QP_ACCESS_ERR:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
+ port_idx = ibevent->element.port_num - 1;
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
break;
default:
break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
- ib_destroy_qp(lnk->roce_qp);
+ if (lnk->roce_qp)
+ ib_destroy_qp(lnk->roce_qp);
lnk->roce_qp = NULL;
}
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
if (!smcibdev->initialized)
return;
+ smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 2e50fddf8ce9..838bce20c361 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
rc = -EPIPE;
break;
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (smc_cdc_rxed_any_close(conn)) {
rc = -ECONNRESET;
break;
}
@@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
+ smc_cdc_rxed_any_close(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
}
@@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
- if (rc)
+ if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ if (conn->alert_token_local) /* connection healthy */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
+ if (smc->sk.sk_err ||
+ !conn->alert_token_local ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ goto out;
+
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
release_sock(&smc->sk);
}
@@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
+ if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+ conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f66832..1b8af23e6e2b 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
again:
polled++;
do {
+ memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
+ u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
- u32 idx;
int rc;
*wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
- rc = wait_event_interruptible_timeout(
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ rc = wait_event_timeout(
link->wr_tx_wait,
+ list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- struct smc_link_group *lgr;
-
- lgr = container_of(link, struct smc_link_group,
- lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
- if (rc == -ERESTARTSYS)
- return -EINTR;
if (idx == link->wr_tx_cnt)
return -EPIPE;
}
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
- if (rc)
+ if (rc) {
+ struct smc_link_group *lgr =
+ container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
smc_wr_tx_put_slot(link, priv);
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data)
{
struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
+ struct smc_wr_rx_hdr *wr_tx;
int i;
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
+ wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+ if (wr_tx->type != wr_tx_hdr_type)
continue;
tx_pend = &link->wr_tx_pends[i].priv;
if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- return true;
- }
- return false;
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b06063..ef0c3494c9cb 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
diff --git a/net/socket.c b/net/socket.c
index 1536515b6437..11cc2cd0f37b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -961,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* If this ioctl is unknown try to hand it down
* to the NIC driver.
*/
- if (err == -ENOIOCTLCMD)
- err = dev_ioctl(net, cmd, argp);
+ if (err != -ENOIOCTLCMD)
+ return err;
+ if (cmd == SIOCGIFCONF) {
+ struct ifconf ifc;
+ if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
+ return -EFAULT;
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
+ rtnl_unlock();
+ if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
+ err = -EFAULT;
+ } else {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return err;
}
@@ -988,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
sock = file->private_data;
sk = sock->sk;
net = sock_net(sk);
- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
- err = dev_ioctl(net, cmd, argp);
+ if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- err = dev_ioctl(net, cmd, argp);
+ err = wext_handle_ioctl(net, cmd, argp);
} else
#endif
switch (cmd) {
@@ -2654,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
return err;
}
-static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(struct ifreq));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- err = dev_ioctl(net, SIOCGIFNAME, uifr);
- if (err)
- return err;
-
- if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- return 0;
-}
-
-static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
{
struct compat_ifconf ifc32;
struct ifconf ifc;
- struct ifconf __user *uifc;
- struct compat_ifreq __user *ifr32;
- struct ifreq __user *ifr;
- unsigned int i, j;
int err;
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
- memset(&ifc, 0, sizeof(ifc));
- if (ifc32.ifcbuf == 0) {
- ifc32.ifc_len = 0;
- ifc.ifc_len = 0;
- ifc.ifc_req = NULL;
- uifc = compat_alloc_user_space(sizeof(struct ifconf));
- } else {
- size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
- sizeof(struct ifreq);
- uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
- ifc.ifc_len = len;
- ifr = ifc.ifc_req = (void __user *)(uifc + 1);
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
- if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr++;
- ifr32++;
- }
- }
- if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
+ ifc.ifc_len = ifc32.ifc_len;
+ ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
- err = dev_ioctl(net, SIOCGIFCONF, uifc);
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
+ rtnl_unlock();
if (err)
return err;
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- ifr = ifc.ifc_req;
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0, j = 0;
- i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
- i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
- if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr32++;
- ifr++;
- }
-
- if (ifc32.ifcbuf == 0) {
- /* Translate from 64-bit structure multiple to
- * a 32-bit one.
- */
- i = ifc.ifc_len;
- i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
- ifc32.ifc_len = i;
- } else {
- ifc32.ifc_len = i;
- }
+ ifc32.ifc_len = ifc.ifc_len;
if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
@@ -2747,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
struct compat_ethtool_rxnfc __user *compat_rxnfc;
bool convert_in = false, convert_out = false;
- size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
- struct ethtool_rxnfc __user *rxnfc;
- struct ifreq __user *ifr;
+ size_t buf_size = 0;
+ struct ethtool_rxnfc __user *rxnfc = NULL;
+ struct ifreq ifr;
u32 rule_cnt = 0, actual_rule_cnt;
u32 ethcmd;
u32 data;
@@ -2786,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
case ETHTOOL_SRXCLSRLDEL:
buf_size += sizeof(struct ethtool_rxnfc);
convert_in = true;
+ rxnfc = compat_alloc_user_space(buf_size);
break;
}
- ifr = compat_alloc_user_space(buf_size);
- rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
-
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(convert_in ? rxnfc : compat_ptr(data),
- &ifr->ifr_ifru.ifru_data))
- return -EFAULT;
+ ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
if (convert_in) {
/* We expect there to be holes between fs.m_ext and
@@ -2825,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
return -EFAULT;
}
- ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+ ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
if (ret)
return ret;
@@ -2866,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
- void __user *uptr;
compat_uptr_t uptr32;
- struct ifreq __user *uifr;
+ struct ifreq ifr;
+ void __user *saved;
+ int err;
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
+ if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
return -EFAULT;
- uptr = compat_ptr(uptr32);
-
- if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
- return -EFAULT;
-
- return dev_ioctl(net, SIOCWANDEV, uifr);
-}
-
-static int bond_ioctl(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *ifr32)
-{
- struct ifreq kifr;
- mm_segment_t old_fs;
- int err;
-
- switch (cmd) {
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd,
- (struct ifreq __user __force *) &kifr);
- set_fs(old_fs);
+ saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- return err;
- default:
- return -ENOIOCTLCMD;
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ if (!err) {
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
+ if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ err = -EFAULT;
}
+ return err;
}
/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
- struct ifreq __user *u_ifreq64;
- char tmp_buf[IFNAMSIZ];
- void __user *data64;
+ struct ifreq ifreq;
u32 data32;
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
- IFNAMSIZ))
- return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
- return -EFAULT;
- data64 = compat_ptr(data32);
-
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
-
- if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
- IFNAMSIZ))
+ if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
+ if (get_user(data32, &u_ifreq32->ifr_data))
return -EFAULT;
+ ifreq.ifr_data = compat_ptr(data32);
- return dev_ioctl(net, cmd, u_ifreq64);
-}
-
-static int dev_ifsioc(struct net *net, struct socket *sock,
- unsigned int cmd, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, NULL);
}
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
@@ -2980,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
{
struct ifreq ifr;
struct compat_ifmap __user *uifmap32;
- mm_segment_t old_fs;
int err;
uifmap32 = &uifr32->ifr_ifru.ifru_map;
@@ -2994,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
- set_fs(old_fs);
+ err = dev_ioctl(net, cmd, &ifr, NULL);
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -3130,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFNAME:
- return dev_ifname32(net, argp);
case SIOCGIFCONF:
- return dev_ifconf(net, argp);
+ return compat_dev_ifconf(net, argp);
case SIOCETHTOOL:
return ethtool_ioctl(net, argp);
case SIOCWANDEV:
@@ -3141,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- return bond_ioctl(net, cmd, argp);
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
@@ -3205,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- return dev_ifsioc(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
case SIOCATMARK:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCGIFNAME:
return sock_do_ioctl(net, sock, cmd, arg);
}
@@ -3365,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sock->ops->ioctl(sock, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-EXPORT_SYMBOL(kernel_sock_ioctl);
-
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 61f394d369bf..0a9b72fbd761 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -577,6 +577,8 @@ alloc_payload:
get_page(page);
sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
+ sg_unmark_end(sg);
+
ctx->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 6cdb054484d6..9efbfc753347 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev,
}
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
- void __user *arg)
+int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct iw_request_info info = { .cmd = cmd, .flags = 0 };
+ struct iwreq iwr;
int ret;
- ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+
+ ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
ioctl_standard_call,
ioctl_private_call);
if (ret >= 0 &&
IW_IS_GET(cmd) &&
- copy_to_user(arg, iwr, sizeof(struct iwreq)))
+ copy_to_user(arg, &iwr, sizeof(struct iwreq)))
return -EFAULT;
return ret;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 75982506617b..8e70291e586a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -147,8 +147,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (!x->type_offload)
return -EINVAL;
- /* We don't yet support UDP encapsulation, TFC padding and ESN. */
- if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if (x->encap || x->tfcpad)
return -EINVAL;
dev = dev_get_by_index(net, xuo->ifindex);
@@ -178,12 +178,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return 0;
}
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ xso->dev = NULL;
+ dev_put(dev);
+ return -EINVAL;
+ }
+
xso->dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->dev = NULL;
dev_put(dev);
return err;
}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 02501817227b..1d38c6acf8af 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
bitnr = replay_esn->replay_window - (diff - pos);
}
+ xfrm_dev_state_advance_esn(x);
+
nr = bitnr >> 5;
bitnr = bitnr & 0x1F;
replay_esn->bmp[nr] |= (1U << bitnr);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 20b1e414dbee..54e21f19d722 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -317,7 +317,7 @@ retry:
if (!type && try_load) {
request_module("xfrm-offload-%d-%d", family, proto);
- try_load = 0;
+ try_load = false;
goto retry;
}
@@ -2279,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
}
- x->km.state = XFRM_STATE_VALID;
-
error:
return err;
}
@@ -2289,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true, false);
+ int err;
+
+ err = __xfrm_init_state(x, true, false);
+ if (!err)
+ x->km.state = XFRM_STATE_VALID;
+
+ return err;
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index bdb48e5dba04..7f52b8eb177d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
- if (attrs[XFRMA_OFFLOAD_DEV]) {
- err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
- if (err)
- goto error;
- }
-
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (x->km.state == XFRM_STATE_VOID)
+ x->km.state = XFRM_STATE_VALID;
+
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;