From 198d83bb3becf2e9c6c4fa744f35296c20da795a Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:14 -0700 Subject: bpf: make generic xdp compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for generic XDP we need to reflect this packet's length change by adjusting skb's tail pointer Acked-by: Alexei Starovoitov Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- net/core/dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 969462ebb296..11c789231a03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3996,9 +3996,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct netdev_rx_queue *rxqueue; + void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - void *orig_data; int hlen, off; u32 mac_len; @@ -4037,6 +4037,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data_end = xdp.data_end; orig_data = xdp.data; rxqueue = netif_get_rxqueue(skb); @@ -4051,6 +4052,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_push(skb, -off); skb->mac_header += off; + /* check if bpf_xdp_adjust_tail was used. it can only "shrink" + * pckt. + */ + off = orig_data_end - xdp.data_end; + if (off != 0) + skb_set_tail_pointer(skb, xdp.data_end - xdp.data); + switch (act) { case XDP_REDIRECT: case XDP_TX: -- cgit v1.2.3 From 0fe554a46a0ff855376053c7e4204673b7879f05 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 17 Apr 2018 14:25:30 -0700 Subject: hv_netvsc: propogate Hyper-V friendly name into interface alias This patch implement the 'Device Naming' feature of the Hyper-V network device API. In Hyper-V on the host through the GUI or PowerShell it is possible to enable the device naming feature which causes the host to make available to the guest the name of the device. This shows up in the RNDIS protocol as the friendly name. The name has no particular meaning and is limited to 256 characters. The value can only be set via PowerShell on the host, but could be scripted for mass deployments. The default value is the string 'Network Adapter' and since that is the same for all devices and useless, the driver ignores it. In Windows, the value goes into a registry key for use in SNMP ifAlias. For Linux, this patch puts the value in the network device alias property; where it is visible in ip tools and SNMP. The host provided ifAlias is just a suggestion, and can be overridden by later ip commands. Also requires exporting dev_set_alias in netdev core. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 28 ++++++++++++++++++++++++++++ net/core/dev.c | 1 + 2 files changed, 29 insertions(+) (limited to 'net/core/dev.c') diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6b127be781d9..3b6dbacaf77d 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "hyperv_net.h" #include "netvsc_trace.h" @@ -1223,6 +1224,29 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, return ret; } +static void rndis_get_friendly_name(struct net_device *net, + struct rndis_device *rndis_device, + struct netvsc_device *net_device) +{ + ucs2_char_t wname[256]; + unsigned long len; + u8 ifalias[256]; + u32 size; + + size = sizeof(wname); + if (rndis_filter_query_device(rndis_device, net_device, + RNDIS_OID_GEN_FRIENDLY_NAME, + wname, &size) != 0) + return; + + /* Convert Windows Unicode string to UTF-8 */ + len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias)); + + /* ignore the default value from host */ + if (strcmp(ifalias, "Network Adapter") != 0) + dev_set_alias(net, ifalias, len); +} + struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { @@ -1276,6 +1300,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + /* Get friendly name as ifalias*/ + if (!net->ifalias) + rndis_get_friendly_name(net, rndis_device, net_device); + /* Query and set hardware capabilities */ ret = rndis_netdev_set_hwcaps(rndis_device, net_device); if (ret != 0) diff --git a/net/core/dev.c b/net/core/dev.c index 969462ebb296..a490ef643586 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1285,6 +1285,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } +EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device -- cgit v1.2.3 From f761312023a1f0d625e34daadd54c3f9cb2a4ac2 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Wed, 25 Apr 2018 07:15:03 -0700 Subject: bpf: fix xdp_generic for bpf_adjust_tail usecase When bpf_adjust_tail was introduced for generic xdp, it changed skb's tail pointer, so it was pointing to the new "end of the packet". However skb's len field wasn't properly modified, so on the wire ethernet frame had original (or even bigger, if adjust_head was used) size. This diff is fixing this. Fixes: 198d83bb3 (" bpf: make generic xdp compatible w/ bpf_xdp_adjust_tail") Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c624a04dad1f..8f8931b93140 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4057,8 +4057,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, * pckt. */ off = orig_data_end - xdp.data_end; - if (off != 0) + if (off != 0) { skb_set_tail_pointer(skb, xdp.data_end - xdp.data); + skb->len -= off; + } switch (act) { case XDP_REDIRECT: -- cgit v1.2.3 From 3f5ecd8a90dd553167838098d92206cd9d6142e8 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 26 Apr 2018 15:18:38 +0300 Subject: net: Fix coccinelle warning kbuild test robot says: >coccinelle warnings: (new ones prefixed by >>) >>> net/core/dev.c:1588:2-3: Unneeded semicolon So, let's remove it. Reported-by: kbuild test robot Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8f8931b93140..0a2d46424069 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1587,7 +1587,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO) N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO) - }; + } #undef N return "UNKNOWN_NETDEV_EVENT"; } -- cgit v1.2.3 From 1b837d489e06a5289753ddbee99cfbc26d251d6d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 27 Apr 2018 14:06:53 -0400 Subject: net: Revoke export for __skb_tx_hash, update it to just be static skb_tx_hash I am dropping the export of __skb_tx_hash as after my patches nobody is using it outside of the net/core/dev.c file. In addition I am renaming and repurposing it to just be a static declaration of skb_tx_hash since that was the only user for it at this point. By doing this the compiler can inline it into __netdev_pick_tx as that will improve performance. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 ------------- net/core/dev.c | 10 ++++------ 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 366c32891158..82f5a9aba578 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3213,19 +3213,6 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - -/* - * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used - * as a distribution range limit for the returned value. - */ -static inline u16 skb_tx_hash(const struct net_device *dev, - struct sk_buff *skb) -{ - return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); -} - /** * netif_is_multiqueue - test if device has multiple transmit queues * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index 0a2d46424069..25ceecfdd8fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2615,17 +2615,16 @@ EXPORT_SYMBOL(netif_device_attach); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) +static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb) { u32 hash; u16 qoffset = 0; - u16 qcount = num_tx_queues; + u16 qcount = dev->real_num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; + while (unlikely(hash >= qcount)) + hash -= qcount; return hash; } @@ -2638,7 +2637,6 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } -EXPORT_SYMBOL(__skb_tx_hash); static void skb_warn_bad_offload(const struct sk_buff *skb) { -- cgit v1.2.3 From 3ac305c386f698abccd0523c64a8aef248c89bc6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 27 Apr 2018 13:11:14 -0700 Subject: net: core: Assert the size of netdev_featres_t We have about 53 netdev_features_t bits defined and counting, add a build time check to catch when an u64 type will not be enough and we will have to convert that to a bitmap. This is done in register_netdevice() for convenience. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ net/core/dev.c | 1 + 2 files changed, 7 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 82f5a9aba578..9e09dd897b74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4108,6 +4108,12 @@ const char *netdev_drivername(const struct net_device *dev); void linkwatch_run_queue(void); +static inline void netdev_features_size_check(void) +{ + BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE < + NETDEV_FEATURE_COUNT); +} + static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { diff --git a/net/core/dev.c b/net/core/dev.c index 25ceecfdd8fe..e01c21a88cae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7879,6 +7879,7 @@ int register_netdevice(struct net_device *dev) int ret; struct net *net = dev_net(dev); + netdev_features_size_check(); BUG_ON(dev_boot_phase); ASSERT_RTNL(); -- cgit v1.2.3 From ebf4e808fa0b22e551baf862e17c26c325c068f4 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 30 Apr 2018 10:16:12 +0300 Subject: net: Add Software fallback infrastructure for socket dependent offloads With socket dependent offloads we rely on the netdev to transform the transmitted packets before sending them to the wire. When a packet from an offloaded socket is rerouted to a different device we need to detect it and do the transformation in software. Signed-off-by: Ilya Lesokhin Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- include/net/sock.h | 21 +++++++++++++++++++++ net/Kconfig | 3 +++ net/core/dev.c | 4 ++++ 3 files changed, 28 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/net/sock.h b/include/net/sock.h index 74d725fdbe0f..3c568b36ee36 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -481,6 +481,11 @@ struct sock { void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; struct rcu_head sk_rcu; @@ -2332,6 +2337,22 @@ static inline bool sk_fullsock(const struct sock *sk) return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); } +/* Checks if this SKB belongs to an HW offloaded socket + * and whether any SW fallbacks are required based on dev. + */ +static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, + struct net_device *dev) +{ +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sock *sk = skb->sk; + + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) + skb = sk->sk_validate_xmit_skb(sk, dev, skb); +#endif + + return skb; +} + /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV * SYNACK messages can be attached to either ones (depending on SYNCOOKIE) */ diff --git a/net/Kconfig b/net/Kconfig index 6fa1a4493b8c..b62089fb1332 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -407,6 +407,9 @@ config GRO_CELLS bool default n +config SOCK_VALIDATE_XMIT + bool + config NET_DEVLINK tristate "Network physical/parent device Netlink interface" help diff --git a/net/core/dev.c b/net/core/dev.c index e01c21a88cae..8944e1e0059d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3112,6 +3112,10 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; + skb = sk_validate_xmit_skb(skb, dev); + if (unlikely(!skb)) + goto out_null; + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; -- cgit v1.2.3 From e283de3a4fa885aed11525129fd4570f92c1d1a9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 30 Apr 2018 14:20:05 -0700 Subject: net: core: Inline netdev_features_size_check() We do not require this inline function to be used in multiple different locations, just inline it where it gets used in register_netdevice(). Suggested-by: David Miller Suggested-by: Stephen Hemminger Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ------ net/core/dev.c | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a569bf5524fa..46dcb5f7522f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4132,12 +4132,6 @@ const char *netdev_drivername(const struct net_device *dev); void linkwatch_run_queue(void); -static inline void netdev_features_size_check(void) -{ - BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE < - NETDEV_FEATURE_COUNT); -} - static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { diff --git a/net/core/dev.c b/net/core/dev.c index 8944e1e0059d..bb81a6e1d354 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7883,7 +7883,8 @@ int register_netdevice(struct net_device *dev) int ret; struct net *net = dev_net(dev); - netdev_features_size_check(); + BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE < + NETDEV_FEATURE_COUNT); BUG_ON(dev_boot_phase); ASSERT_RTNL(); -- cgit v1.2.3 From 02671e23e7b383763fe1ae4f20b56d8029f9dfc6 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 2 May 2018 13:01:30 +0200 Subject: xsk: wire up XDP_SKB side of AF_XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit wires up the xskmap to XDP_SKB layer. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 +- net/core/dev.c | 35 +++++++++++++++++++---------------- net/core/filter.c | 17 ++++++++++++++--- 3 files changed, 34 insertions(+), 20 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/filter.h b/include/linux/filter.h index 64899c04c1a6..b7f81e3a70cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -760,7 +760,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *prog); + struct xdp_buff *xdp, struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); diff --git a/net/core/dev.c b/net/core/dev.c index 8f8931b93140..aea36b5a2fed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3994,12 +3994,12 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) } static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct netdev_rx_queue *rxqueue; void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; - struct xdp_buff xdp; int hlen, off; u32 mac_len; @@ -4034,19 +4034,19 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, */ mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_meta = xdp.data; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data_end = xdp.data_end; - orig_data = xdp.data; + xdp->data = skb->data - mac_len; + xdp->data_meta = xdp->data; + xdp->data_end = xdp->data + hlen; + xdp->data_hard_start = skb->data - skb_headroom(skb); + orig_data_end = xdp->data_end; + orig_data = xdp->data; rxqueue = netif_get_rxqueue(skb); - xdp.rxq = &rxqueue->xdp_rxq; + xdp->rxq = &rxqueue->xdp_rxq; - act = bpf_prog_run_xdp(xdp_prog, &xdp); + act = bpf_prog_run_xdp(xdp_prog, xdp); - off = xdp.data - orig_data; + off = xdp->data - orig_data; if (off > 0) __skb_pull(skb, off); else if (off < 0) @@ -4056,10 +4056,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* check if bpf_xdp_adjust_tail was used. it can only "shrink" * pckt. */ - off = orig_data_end - xdp.data_end; + off = orig_data_end - xdp->data_end; if (off != 0) { - skb_set_tail_pointer(skb, xdp.data_end - xdp.data); + skb_set_tail_pointer(skb, xdp->data_end - xdp->data); skb->len -= off; + } switch (act) { @@ -4068,7 +4069,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_push(skb, mac_len); break; case XDP_PASS: - metalen = xdp.data - xdp.data_meta; + metalen = xdp->data - xdp->data_meta; if (metalen) skb_metadata_set(skb, metalen); break; @@ -4118,17 +4119,19 @@ static struct static_key generic_xdp_needed __read_mostly; int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); + struct xdp_buff xdp; + u32 act; int err; + act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - xdp_prog); + &xdp, xdp_prog); if (err) goto out_redir; - /* fallthru to submit skb */ + break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; diff --git a/net/core/filter.c b/net/core/filter.c index 40d4bbb4508d..120bc8a202d9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -59,6 +59,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -2973,13 +2974,14 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, + struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; - struct net_device *fwd = NULL; u32 index = ri->ifindex; + void *fwd = NULL; int err = 0; ri->ifindex = 0; @@ -3001,6 +3003,14 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) goto err; skb->dev = fwd; + generic_xdp_tx(skb, xdp_prog); + } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { + struct xdp_sock *xs = fwd; + + err = xsk_generic_rcv(xs, xdp); + if (err) + goto err; + consume_skb(skb); } else { /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ err = -EBADRQC; @@ -3015,7 +3025,7 @@ err: } int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); u32 index = ri->ifindex; @@ -3023,7 +3033,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, int err = 0; if (ri->map) - return xdp_do_generic_redirect_map(dev, skb, xdp_prog); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); ri->ifindex = 0; fwd = dev_get_by_index_rcu(dev_net(dev), index); @@ -3037,6 +3047,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, skb->dev = fwd; _trace_xdp_redirect(dev, xdp_prog, index); + generic_xdp_tx(skb, xdp_prog); return 0; err: _trace_xdp_redirect_err(dev, xdp_prog, index, err); -- cgit v1.2.3 From 865b03f21162e4edfda51fc08693c864b1d4fdaf Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 2 May 2018 13:01:33 +0200 Subject: dev: packet: make packet_direct_xmit a common function The new dev_direct_xmit will be used by AF_XDP in later commits. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 1 + net/core/dev.c | 38 ++++++++++++++++++++++++++++++++++++++ net/packet/af_packet.c | 42 +++++------------------------------------- 3 files changed, 44 insertions(+), 37 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 366c32891158..a30435118530 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2486,6 +2486,7 @@ void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); +int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); diff --git a/net/core/dev.c b/net/core/dev.c index aea36b5a2fed..d3fdc86516e8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3625,6 +3625,44 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) } EXPORT_SYMBOL(dev_queue_xmit_accel); +int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + struct net_device *dev = skb->dev; + struct sk_buff *orig_skb = skb; + struct netdev_queue *txq; + int ret = NETDEV_TX_BUSY; + bool again = false; + + if (unlikely(!netif_running(dev) || + !netif_carrier_ok(dev))) + goto drop; + + skb = validate_xmit_skb_list(skb, dev, &again); + if (skb != orig_skb) + goto drop; + + skb_set_queue_mapping(skb, queue_id); + txq = skb_get_tx_queue(dev, skb); + + local_bh_disable(); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq, false); + HARD_TX_UNLOCK(dev, txq); + + local_bh_enable(); + + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + + return ret; +drop: + atomic_long_inc(&dev->tx_dropped); + kfree_skb_list(skb); + return NET_XMIT_DROP; +} +EXPORT_SYMBOL(dev_direct_xmit); /************************************************************************* * Receiver routines diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01f3515cada0..611a26d5235c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -209,7 +209,7 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *, static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb); +static u16 packet_pick_tx_queue(struct sk_buff *skb); struct packet_skb_cb { union { @@ -243,40 +243,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { - struct net_device *dev = skb->dev; - struct sk_buff *orig_skb = skb; - struct netdev_queue *txq; - int ret = NETDEV_TX_BUSY; - bool again = false; - - if (unlikely(!netif_running(dev) || - !netif_carrier_ok(dev))) - goto drop; - - skb = validate_xmit_skb_list(skb, dev, &again); - if (skb != orig_skb) - goto drop; - - packet_pick_tx_queue(dev, skb); - txq = skb_get_tx_queue(dev, skb); - - local_bh_disable(); - - HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_drv_stopped(txq)) - ret = netdev_start_xmit(skb, dev, txq, false); - HARD_TX_UNLOCK(dev, txq); - - local_bh_enable(); - - if (!dev_xmit_complete(ret)) - kfree_skb(skb); - - return ret; -drop: - atomic_long_inc(&dev->tx_dropped); - kfree_skb_list(skb); - return NET_XMIT_DROP; + return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } static struct net_device *packet_cached_dev_get(struct packet_sock *po) @@ -313,8 +280,9 @@ static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; } -static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +static u16 packet_pick_tx_queue(struct sk_buff *skb) { + struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index; @@ -326,7 +294,7 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) queue_index = __packet_pick_tx_queue(dev, skb); } - skb_set_queue_mapping(skb, queue_index); + return queue_index; } /* __register_prot_hook must be invoked through register_prot_hook -- cgit v1.2.3 From aabf6772cc745f90d1e15b0054eca734ba787784 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 May 2018 09:07:00 -0700 Subject: net: Update [e/in]gress_needed static key to modern api No changes in semantics -- key init is false; replace static_key_slow_inc|dec with static_branch_inc|dec static_key_false with static_branch_unlikely Added a '_key' suffix to both ingress_needed and egress_needed, for better self documentation. Signed-off-by: Davidlohr Bueso Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 29bf39174900..ad4288f6e105 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1755,33 +1755,33 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) EXPORT_SYMBOL(call_netdevice_notifiers); #ifdef CONFIG_NET_INGRESS -static struct static_key ingress_needed __read_mostly; +static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); void net_inc_ingress_queue(void) { - static_key_slow_inc(&ingress_needed); + static_branch_inc(&ingress_needed_key); } EXPORT_SYMBOL_GPL(net_inc_ingress_queue); void net_dec_ingress_queue(void) { - static_key_slow_dec(&ingress_needed); + static_branch_dec(&ingress_needed_key); } EXPORT_SYMBOL_GPL(net_dec_ingress_queue); #endif #ifdef CONFIG_NET_EGRESS -static struct static_key egress_needed __read_mostly; +static DEFINE_STATIC_KEY_FALSE(egress_needed_key); void net_inc_egress_queue(void) { - static_key_slow_inc(&egress_needed); + static_branch_inc(&egress_needed_key); } EXPORT_SYMBOL_GPL(net_inc_egress_queue); void net_dec_egress_queue(void) { - static_key_slow_dec(&egress_needed); + static_branch_dec(&egress_needed_key); } EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif @@ -3532,7 +3532,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) #ifdef CONFIG_NET_CLS_ACT skb->tc_at_ingress = 0; # ifdef CONFIG_NET_EGRESS - if (static_key_false(&egress_needed)) { + if (static_branch_unlikely(&egress_needed_key)) { skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; @@ -4566,7 +4566,7 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS - if (static_key_false(&ingress_needed)) { + if (static_branch_unlikely(&ingress_needed_key)) { skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; -- cgit v1.2.3 From 39e8392201dcd02b7d70f2149e678035b20cc26a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 May 2018 09:07:01 -0700 Subject: net: Update netstamp_needed static key to modern api No changes in refcount semantics -- key init is false; replace static_key_slow_inc|dec with static_branch_inc|dec static_key_false with static_branch_unlikely Added a '_key' suffix to netstamp_needed, for better self documentation. Signed-off-by: Davidlohr Bueso Signed-off-by: David S. Miller --- net/core/dev.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ad4288f6e105..d6fd1578f770 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1786,7 +1786,7 @@ void net_dec_egress_queue(void) EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif -static struct static_key netstamp_needed __read_mostly; +static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; static atomic_t netstamp_wanted; @@ -1797,9 +1797,9 @@ static void netstamp_clear(struct work_struct *work) wanted = atomic_add_return(deferred, &netstamp_wanted); if (wanted > 0) - static_key_enable(&netstamp_needed); + static_branch_enable(&netstamp_needed_key); else - static_key_disable(&netstamp_needed); + static_branch_disable(&netstamp_needed_key); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif @@ -1819,7 +1819,7 @@ void net_enable_timestamp(void) atomic_inc(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else - static_key_slow_inc(&netstamp_needed); + static_branch_inc(&netstamp_needed_key); #endif } EXPORT_SYMBOL(net_enable_timestamp); @@ -1839,7 +1839,7 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else - static_key_slow_dec(&netstamp_needed); + static_branch_dec(&netstamp_needed_key); #endif } EXPORT_SYMBOL(net_disable_timestamp); @@ -1847,15 +1847,15 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; - if (static_key_false(&netstamp_needed)) + if (static_branch_unlikely(&netstamp_needed_key)) __net_timestamp(skb); } -#define net_timestamp_check(COND, SKB) \ - if (static_key_false(&netstamp_needed)) { \ - if ((COND) && !(SKB)->tstamp) \ - __net_timestamp(SKB); \ - } \ +#define net_timestamp_check(COND, SKB) \ + if (static_branch_unlikely(&netstamp_needed_key)) { \ + if ((COND) && !(SKB)->tstamp) \ + __net_timestamp(SKB); \ + } \ bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) { -- cgit v1.2.3 From 02786475c74c7d0721b0142fac74108112cb456c Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 May 2018 09:07:02 -0700 Subject: net: Update generic_xdp_needed static key to modern api No changes in refcount semantics -- key init is false; replace static_key_slow_inc|dec with static_branch_inc|dec static_key_false with static_branch_unlikely Added a '_key' suffix to generic_xdp_needed, for better self documentation. Signed-off-by: Davidlohr Bueso Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d6fd1578f770..9f4390182384 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4154,7 +4154,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } EXPORT_SYMBOL_GPL(generic_xdp_tx); -static struct static_key generic_xdp_needed __read_mostly; +static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { @@ -4194,7 +4194,7 @@ static int netif_rx_internal(struct sk_buff *skb) trace_netif_rx(skb); - if (static_key_false(&generic_xdp_needed)) { + if (static_branch_unlikely(&generic_xdp_needed_key)) { int ret; preempt_disable(); @@ -4726,9 +4726,9 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) bpf_prog_put(old); if (old && !new) { - static_key_slow_dec(&generic_xdp_needed); + static_branch_dec(&generic_xdp_needed_key); } else if (new && !old) { - static_key_slow_inc(&generic_xdp_needed); + static_branch_inc(&generic_xdp_needed_key); dev_disable_lro(dev); dev_disable_gro_hw(dev); } @@ -4756,7 +4756,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; - if (static_key_false(&generic_xdp_needed)) { + if (static_branch_unlikely(&generic_xdp_needed_key)) { int ret; preempt_disable(); -- cgit v1.2.3 From 32f7b44d0f5661044fcfa84e9ad402ed9d759107 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 May 2018 10:50:31 +0200 Subject: sched: manipulate __QDISC_STATE_RUNNING in qdisc_run_* helpers Currently NOLOCK qdiscs pay a measurable overhead to atomically manipulate the __QDISC_STATE_RUNNING. Such bit is flipped twice per packet in the uncontended scenario with packet rate below the line rate: on packed dequeue and on the next, failing dequeue attempt. This changeset moves the bit manipulation into the qdisc_run_{begin,end} helpers, so that the bit is now flipped only once per packet, with measurable performance improvement in the uncontended scenario. This also allows simplifying the qdisc teardown code path - since qdisc_is_running() is now effective for each qdisc type - and avoid a possible race between qdisc_run() and dev_deactivate_many(), as now the some_qdisc_is_busy() can properly detect NOLOCK qdiscs being busy dequeuing packets. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 10 +++++++++- net/core/dev.c | 2 +- net/sched/sch_generic.c | 31 +++++++++---------------------- 3 files changed, 19 insertions(+), 24 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5154c8300262..4d2b37226e75 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -113,13 +113,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) static inline bool qdisc_is_running(const struct Qdisc *qdisc) { + if (qdisc->flags & TCQ_F_NOLOCK) + return test_bit(__QDISC_STATE_RUNNING, &qdisc->state); return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { - if (qdisc_is_running(qdisc)) + if (qdisc->flags & TCQ_F_NOLOCK) { + if (test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state)) + return false; + } else if (qdisc_is_running(qdisc)) { return false; + } /* Variant of write_seqcount_begin() telling lockdep a trylock * was attempted. */ @@ -131,6 +137,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); + if (qdisc->flags & TCQ_F_NOLOCK) + clear_bit(__QDISC_STATE_RUNNING, &qdisc->state); } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) diff --git a/net/core/dev.c b/net/core/dev.c index 9f4390182384..7ca19f47a92a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3244,7 +3244,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_DROP; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - __qdisc_run(q); + qdisc_run(q); } if (unlikely(to_free)) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 39c144b6ff98..ff3ce71aec93 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -373,33 +373,24 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, */ static inline bool qdisc_restart(struct Qdisc *q, int *packets) { - bool more, validate, nolock = q->flags & TCQ_F_NOLOCK; spinlock_t *root_lock = NULL; struct netdev_queue *txq; struct net_device *dev; struct sk_buff *skb; + bool validate; /* Dequeue packet */ - if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) - return false; - skb = dequeue_skb(q, &validate, packets); - if (unlikely(!skb)) { - if (nolock) - clear_bit(__QDISC_STATE_RUNNING, &q->state); + if (unlikely(!skb)) return false; - } - if (!nolock) + if (!(q->flags & TCQ_F_NOLOCK)) root_lock = qdisc_lock(q); dev = qdisc_dev(q); txq = skb_get_tx_queue(dev, skb); - more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate); - if (nolock) - clear_bit(__QDISC_STATE_RUNNING, &q->state); - return more; + return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); } void __qdisc_run(struct Qdisc *q) @@ -1131,17 +1122,13 @@ static bool some_qdisc_is_busy(struct net_device *dev) dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc_sleeping; - if (q->flags & TCQ_F_NOLOCK) { - val = test_bit(__QDISC_STATE_SCHED, &q->state); - } else { - root_lock = qdisc_lock(q); - spin_lock_bh(root_lock); + root_lock = qdisc_lock(q); + spin_lock_bh(root_lock); - val = (qdisc_is_running(q) || - test_bit(__QDISC_STATE_SCHED, &q->state)); + val = (qdisc_is_running(q) || + test_bit(__QDISC_STATE_SCHED, &q->state)); - spin_unlock_bh(root_lock); - } + spin_unlock_bh(root_lock); if (val) return true; -- cgit v1.2.3