aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 10:15:09 -0800
commit3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree72dfc8a1d12675c6f2981d13102df954b678f11b /net/core
parentMerge tag 'gcc-plugins-v4.11-rc1' of git://git.kernel.org/pub/scm/linux/kerne... (diff)
parentRevert "ath10k: Search SMBIOS for OEM board file extension" (diff)
downloadlinux-3051bf36c25d5153051704291782f8d44e744d36.tar.gz
linux-3051bf36c25d5153051704291782f8d44e744d36.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini Varadhan. 2) Simplify classifier state on sk_buff in order to shrink it a bit. From Willem de Bruijn. 3) Introduce SIPHASH and it's usage for secure sequence numbers and syncookies. From Jason A. Donenfeld. 4) Reduce CPU usage for ICMP replies we are going to limit or suppress, from Jesper Dangaard Brouer. 5) Introduce Shared Memory Communications socket layer, from Ursula Braun. 6) Add RACK loss detection and allow it to actually trigger fast recovery instead of just assisting after other algorithms have triggered it. From Yuchung Cheng. 7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot. 8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert. 9) Export MPLS packet stats via netlink, from Robert Shearman. 10) Significantly improve inet port bind conflict handling, especially when an application is restarted and changes it's setting of reuseport. From Josef Bacik. 11) Implement TX batching in vhost_net, from Jason Wang. 12) Extend the dummy device so that VF (virtual function) features, such as configuration, can be more easily tested. From Phil Sutter. 13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric Dumazet. 14) Add new bpf MAP, implementing a longest prefix match trie. From Daniel Mack. 15) Packet sample offloading support in mlxsw driver, from Yotam Gigi. 16) Add new aquantia driver, from David VomLehn. 17) Add bpf tracepoints, from Daniel Borkmann. 18) Add support for port mirroring to b53 and bcm_sf2 drivers, from Florian Fainelli. 19) Remove custom busy polling in many drivers, it is done in the core networking since 4.5 times. From Eric Dumazet. 20) Support XDP adjust_head in virtio_net, from John Fastabend. 21) Fix several major holes in neighbour entry confirmation, from Julian Anastasov. 22) Add XDP support to bnxt_en driver, from Michael Chan. 23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan. 24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi. 25) Support GRO in IPSEC protocols, from Steffen Klassert" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits) Revert "ath10k: Search SMBIOS for OEM board file extension" net: socket: fix recvmmsg not returning error from sock_error bnxt_en: use eth_hw_addr_random() bpf: fix unlocking of jited image when module ronx not set arch: add ARCH_HAS_SET_MEMORY config net: napi_watchdog() can use napi_schedule_irqoff() tcp: Revert "tcp: tcp_probe: use spin_lock_bh()" net/hsr: use eth_hw_addr_random() net: mvpp2: enable building on 64-bit platforms net: mvpp2: switch to build_skb() in the RX path net: mvpp2: simplify MVPP2_PRS_RI_* definitions net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT net: mvpp2: remove unused register definitions net: mvpp2: simplify mvpp2_bm_bufs_add() net: mvpp2: drop useless fields in mvpp2_bm_pool and related code net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue' net: mvpp2: release reference to txq_cpu[] entry after unmapping net: mvpp2: handle too large value in mvpp2_rx_time_coal_set() net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set() net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c431
-rw-r--r--net/core/devlink.c50
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/ethtool.c40
-rw-r--r--net/core/filter.c270
-rw-r--r--net/core/flow_dissector.c57
-rw-r--r--net/core/gro_cells.c92
-rw-r--r--net/core/lwt_bpf.c4
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/request_sock.c2
-rw-r--r--net/core/rtnetlink.c78
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/secure_seq.c147
-rw-r--r--net/core/skbuff.c20
-rw-r--r--net/core/sock.c21
-rw-r--r--net/core/sysctl_net_core.c38
19 files changed, 724 insertions, 539 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index f6761b6e3b29..79f9479e9658 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
+obj-$(CONFIG_GRO_CELLS) += gro_cells.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 29101c98399f..304f2deae5f9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1,5 +1,5 @@
/*
- * NET3 Protocol independent device support routines.
+ * NET3 Protocol independent device support routines.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -7,7 +7,7 @@
* 2 of the License, or (at your option) any later version.
*
* Derived from the non IP parts of dev.c 1.0.19
- * Authors: Ross Biro
+ * Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
*
@@ -21,9 +21,9 @@
*
* Changes:
* D.J. Barrow : Fixed bug where dev->refcnt gets set
- * to 2 if register_netdev gets called
- * before net_dev_init & also removed a
- * few lines of code in the process.
+ * to 2 if register_netdev gets called
+ * before net_dev_init & also removed a
+ * few lines of code in the process.
* Alan Cox : device private ioctl copies fields back.
* Alan Cox : Transmit queue code does relevant
* stunts to keep the queue safe.
@@ -36,7 +36,7 @@
* Alan Cox : 100 backlog just doesn't cut it when
* you start doing multicast video 8)
* Alan Cox : Rewrote net_bh and list manager.
- * Alan Cox : Fix ETH_P_ALL echoback lengths.
+ * Alan Cox : Fix ETH_P_ALL echoback lengths.
* Alan Cox : Took out transmit every packet pass
* Saved a few bytes in the ioctl handler
* Alan Cox : Network driver sets packet type before
@@ -46,7 +46,7 @@
* Richard Kooijman: Timestamp fixes.
* Alan Cox : Wrong field in SIOCGIFDSTADDR
* Alan Cox : Device lock protection.
- * Alan Cox : Fixed nasty side effect of device close
+ * Alan Cox : Fixed nasty side effect of device close
* changes.
* Rudi Cilibrasi : Pass the right thing to
* set_mac_address()
@@ -67,8 +67,8 @@
* Paul Rusty Russell : SIOCSIFNAME
* Pekka Riikonen : Netdev boot-time settings code
* Andrew Morton : Make unregister_netdevice wait
- * indefinitely on dev->refcnt
- * J Hadi Salim : - Backlog queue sampling
+ * indefinitely on dev->refcnt
+ * J Hadi Salim : - Backlog queue sampling
* - netif_rx() feedback
*/
@@ -192,7 +192,8 @@ static seqcount_t devnet_rename_seq;
static inline void dev_base_seq_inc(struct net *net)
{
- while (++net->dev_base_seq == 0);
+ while (++net->dev_base_seq == 0)
+ ;
}
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
@@ -274,8 +275,8 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
* according to dev->type
*/
-static const unsigned short netdev_lock_type[] =
- {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
+static const unsigned short netdev_lock_type[] = {
+ ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
@@ -291,22 +292,22 @@ static const unsigned short netdev_lock_type[] =
ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
-static const char *const netdev_lock_name[] =
- {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
- "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
- "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
- "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
- "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
- "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
- "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
- "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
- "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
- "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
- "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
- "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
- "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
- "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
- "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
+static const char *const netdev_lock_name[] = {
+ "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
+ "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
+ "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
+ "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
+ "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
+ "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+ "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
+ "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+ "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
+ "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
+ "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
+ "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
+ "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
+ "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
+ "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -352,10 +353,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
#endif
/*******************************************************************************
+ *
+ * Protocol management and registration routines
+ *
+ *******************************************************************************/
- Protocol management and registration routines
-
-*******************************************************************************/
/*
* Add a protocol ID to the list. Now that the input handler is
@@ -538,10 +540,10 @@ void dev_remove_offload(struct packet_offload *po)
EXPORT_SYMBOL(dev_remove_offload);
/******************************************************************************
-
- Device Boot-time Settings Routines
-
-*******************************************************************************/
+ *
+ * Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
/* Boot time configuration table */
static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
@@ -574,13 +576,13 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map)
}
/**
- * netdev_boot_setup_check - check boot time settings
- * @dev: the netdevice
+ * netdev_boot_setup_check - check boot time settings
+ * @dev: the netdevice
*
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
*/
int netdev_boot_setup_check(struct net_device *dev)
{
@@ -590,10 +592,10 @@ int netdev_boot_setup_check(struct net_device *dev)
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
!strcmp(dev->name, s[i].name)) {
- dev->irq = s[i].map.irq;
- dev->base_addr = s[i].map.base_addr;
- dev->mem_start = s[i].map.mem_start;
- dev->mem_end = s[i].map.mem_end;
+ dev->irq = s[i].map.irq;
+ dev->base_addr = s[i].map.base_addr;
+ dev->mem_start = s[i].map.mem_start;
+ dev->mem_end = s[i].map.mem_end;
return 1;
}
}
@@ -603,14 +605,14 @@ EXPORT_SYMBOL(netdev_boot_setup_check);
/**
- * netdev_boot_base - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
+ * netdev_boot_base - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
*
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
*/
unsigned long netdev_boot_base(const char *prefix, int unit)
{
@@ -663,10 +665,10 @@ int __init netdev_boot_setup(char *str)
__setup("netdev=", netdev_boot_setup);
/*******************************************************************************
-
- Device Interface Subroutines
-
-*******************************************************************************/
+ *
+ * Device Interface Subroutines
+ *
+ *******************************************************************************/
/**
* dev_get_iflink - get 'iflink' value of a interface
@@ -737,15 +739,15 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name)
EXPORT_SYMBOL(__dev_get_by_name);
/**
- * dev_get_by_name_rcu - find a device by its name
- * @net: the applicable net namespace
- * @name: name to find
+ * dev_get_by_name_rcu - find a device by its name
+ * @net: the applicable net namespace
+ * @name: name to find
*
- * Find an interface by name.
- * If the name is found a pointer to the device is returned.
- * If the name is not found then %NULL is returned.
- * The reference counters are not incremented so the caller must be
- * careful with locks. The caller must hold RCU lock.
+ * Find an interface by name.
+ * If the name is found a pointer to the device is returned.
+ * If the name is not found then %NULL is returned.
+ * The reference counters are not incremented so the caller must be
+ * careful with locks. The caller must hold RCU lock.
*/
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
@@ -1289,8 +1291,8 @@ void netdev_state_change(struct net_device *dev)
EXPORT_SYMBOL(netdev_state_change);
/**
- * netdev_notify_peers - notify network peers about existence of @dev
- * @dev: network device
+ * netdev_notify_peers - notify network peers about existence of @dev
+ * @dev: network device
*
* Generate traffic such that interested network peers are aware of
* @dev, such as by generating a gratuitous ARP. This may be used when
@@ -1518,17 +1520,17 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
static int dev_boot_phase = 1;
/**
- * register_netdevice_notifier - register a network notifier block
- * @nb: notifier
+ * register_netdevice_notifier - register a network notifier block
+ * @nb: notifier
*
- * Register a notifier to be called when network device events occur.
- * The notifier passed is linked into the kernel structures and must
- * not be reused until it has been unregistered. A negative errno code
- * is returned on a failure.
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
*
- * When registered all registration and up events are replayed
- * to the new notifier to allow device to have a race free
- * view of the network device list.
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
*/
int register_netdevice_notifier(struct notifier_block *nb)
@@ -1585,17 +1587,17 @@ outroll:
EXPORT_SYMBOL(register_netdevice_notifier);
/**
- * unregister_netdevice_notifier - unregister a network notifier block
- * @nb: notifier
+ * unregister_netdevice_notifier - unregister a network notifier block
+ * @nb: notifier
*
- * Unregister a notifier previously registered by
- * register_netdevice_notifier(). The notifier is unlinked into the
- * kernel structures and may then be reused. A negative errno code
- * is returned on a failure.
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
*
- * After unregistering unregister and down device events are synthesized
- * for all devices on the device list to the removed notifier to remove
- * the need for special case cleanup code.
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
*/
int unregister_netdevice_notifier(struct notifier_block *nb)
@@ -2403,28 +2405,6 @@ void netif_schedule_queue(struct netdev_queue *txq)
}
EXPORT_SYMBOL(netif_schedule_queue);
-/**
- * netif_wake_subqueue - allow sending packets on subqueue
- * @dev: network device
- * @queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
- struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-
- if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
- struct Qdisc *q;
-
- rcu_read_lock();
- q = rcu_dereference(txq->qdisc);
- __netif_schedule(q);
- rcu_read_unlock();
- }
-}
-EXPORT_SYMBOL(netif_wake_subqueue);
-
void netif_tx_wake_queue(struct netdev_queue *dev_queue)
{
if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
@@ -2518,6 +2498,7 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
if (dev->num_tc) {
u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
qoffset = dev->tc_to_txq[tc].offset;
qcount = dev->tc_to_txq[tc].count;
}
@@ -2654,9 +2635,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
- else
- return skb->ip_summed == CHECKSUM_NONE;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_NONE;
+
+ return skb->ip_summed == CHECKSUM_NONE;
}
/**
@@ -2675,11 +2657,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
{
+ struct sk_buff *segs;
+
if (unlikely(skb_needs_check(skb, tx_path))) {
int err;
- skb_warn_bad_offload(skb);
-
+ /* We're going to init ->check field in TCP or UDP header */
err = skb_cow_head(skb, 0);
if (err < 0)
return ERR_PTR(err);
@@ -2707,7 +2690,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- return skb_mac_gso_segment(skb, features);
+ segs = skb_mac_gso_segment(skb, features);
+
+ if (unlikely(skb_needs_check(skb, tx_path)))
+ skb_warn_bad_offload(skb);
+
+ return segs;
}
EXPORT_SYMBOL(__skb_gso_segment);
@@ -2732,9 +2720,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
+
if (!(dev->features & NETIF_F_HIGHDMA)) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
if (PageHighMem(skb_frag_page(frag)))
return 1;
}
@@ -2748,6 +2738,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
dma_addr_t addr = page_to_phys(skb_frag_page(frag));
+
if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
return 1;
}
@@ -3148,9 +3139,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
if (!cl)
return skb;
- /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
- * earlier by the caller.
- */
+ /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3225,6 +3214,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
int new_index = get_xps_queue(dev, skb);
+
if (new_index < 0)
new_index = skb_tx_hash(dev, skb);
@@ -3254,6 +3244,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
+
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
__netdev_pick_tx);
@@ -3315,7 +3306,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
+ skb->tc_at_ingress = 0;
# ifdef CONFIG_NET_EGRESS
if (static_key_false(&egress_needed)) {
skb = sch_handle_egress(skb, &rc, dev);
@@ -3342,16 +3333,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
}
/* The device has no queue. Common case for software devices:
- loopback, all the sorts of tunnels...
+ * loopback, all the sorts of tunnels...
- Really, it is unlikely that netif_tx_lock protection is necessary
- here. (f.e. loopback and IP tunnels are clean ignoring statistics
- counters.)
- However, it is possible, that they rely on protection
- made by us here.
+ * Really, it is unlikely that netif_tx_lock protection is necessary
+ * here. (f.e. loopback and IP tunnels are clean ignoring statistics
+ * counters.)
+ * However, it is possible, that they rely on protection
+ * made by us here.
- Check this and shot the lock. It is not prone from deadlocks.
- Either shot noqueue qdisc, it is even simpler 8)
+ * Check this and shot the lock. It is not prone from deadlocks.
+ *Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
@@ -3413,16 +3404,20 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
EXPORT_SYMBOL(dev_queue_xmit_accel);
-/*=======================================================================
- Receiver routines
- =======================================================================*/
+/*************************************************************************
+ * Receiver routines
+ *************************************************************************/
int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
-int weight_p __read_mostly = 64; /* old backlog weight */
+int weight_p __read_mostly = 64; /* old backlog weight */
+int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
+int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
+int dev_rx_weight __read_mostly = 64;
+int dev_tx_weight __read_mostly = 64;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -3779,6 +3774,7 @@ static int netif_rx_internal(struct sk_buff *skb)
#endif
{
unsigned int qtail;
+
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
@@ -3838,6 +3834,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
while (clist) {
struct sk_buff *skb = clist;
+
clist = clist->next;
WARN_ON(atomic_read(&skb->users));
@@ -3911,7 +3908,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3976,9 +3973,7 @@ int netdev_rx_handler_register(struct net_device *dev,
rx_handler_func_t *rx_handler,
void *rx_handler_data)
{
- ASSERT_RTNL();
-
- if (dev->rx_handler)
+ if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
/* Note: rx_handler_data must be set before rx_handler */
@@ -4084,12 +4079,8 @@ another_round:
goto out;
}
-#ifdef CONFIG_NET_CLS_ACT
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- goto ncls;
- }
-#endif
+ if (skb_skip_tc_classify(skb))
+ goto skip_classify;
if (pfmemalloc)
goto skip_taps;
@@ -4117,10 +4108,8 @@ skip_taps:
goto out;
}
#endif
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-ncls:
-#endif
+ skb_reset_tc(skb);
+skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
@@ -4521,6 +4510,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (&ptype->list == head)
goto normal;
+ if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
+ ret = GRO_CONSUMED;
+ goto ok;
+ }
+
same_flow = NAPI_GRO_CB(skb)->same_flow;
ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
@@ -4616,6 +4610,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
skb_dst_drop(skb);
+ secpath_reset(skb);
kmem_cache_free(skbuff_head_cache, skb);
} else {
__kfree_skb(skb);
@@ -4624,6 +4619,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_HELD:
case GRO_MERGED:
+ case GRO_CONSUMED:
break;
}
@@ -4656,6 +4652,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ secpath_reset(skb);
napi->skb = skb;
}
@@ -4694,6 +4691,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_MERGED:
+ case GRO_CONSUMED:
break;
}
@@ -4830,7 +4828,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = weight_p;
+ napi->weight = dev_rx_weight;
while (again) {
struct sk_buff *skb;
@@ -4897,23 +4895,6 @@ void __napi_schedule_irqoff(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
-bool __napi_complete(struct napi_struct *n)
-{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-
- /* Some drivers call us directly, instead of calling
- * napi_complete_done().
- */
- if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
- return false;
-
- list_del_init(&n->poll_list);
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- return true;
-}
-EXPORT_SYMBOL(__napi_complete);
-
bool napi_complete_done(struct napi_struct *n, int work_done)
{
unsigned long flags;
@@ -4940,14 +4921,13 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
else
napi_gro_flush(n, false);
}
- if (likely(list_empty(&n->poll_list))) {
- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
- } else {
+ if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
- __napi_complete(n);
+ list_del_init(&n->poll_list);
local_irq_restore(flags);
}
+ WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4993,7 +4973,6 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
- int (*busy_poll)(struct napi_struct *dev);
void *have_poll_lock = NULL;
struct napi_struct *napi;
int rc;
@@ -5008,17 +4987,10 @@ restart:
if (!napi)
goto out;
- /* Note: ndo_busy_poll method is optional in linux-4.5 */
- busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
-
preempt_disable();
for (;;) {
rc = 0;
local_bh_disable();
- if (busy_poll) {
- rc = busy_poll(napi);
- goto count;
- }
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -5043,9 +5015,6 @@ count:
LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
- if (rc == LL_FLUSH_FAILED)
- break; /* permanent failure */
-
if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
busy_loop_timeout(end_time))
break;
@@ -5120,7 +5089,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
napi = container_of(timer, struct napi_struct, timer);
if (napi->gro_list)
- napi_schedule(napi);
+ napi_schedule_irqoff(napi);
return HRTIMER_NORESTART;
}
@@ -5706,6 +5675,7 @@ static int netdev_adjacent_sysfs_add(struct net_device *dev,
struct list_head *dev_list)
{
char linkname[IFNAMSIZ+7];
+
sprintf(linkname, dev_list == &dev->adj_list.upper ?
"upper_%s" : "lower_%s", adj_dev->name);
return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
@@ -5716,6 +5686,7 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev,
struct list_head *dev_list)
{
char linkname[IFNAMSIZ+7];
+
sprintf(linkname, dev_list == &dev->adj_list.upper ?
"upper_%s" : "lower_%s", name);
sysfs_remove_link(&(dev->dev.kobj), linkname);
@@ -5985,6 +5956,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
struct netdev_notifier_changeupper_info changeupper_info;
+
ASSERT_RTNL();
changeupper_info.upper_dev = upper_dev;
@@ -6151,50 +6123,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
-int netdev_default_l2upper_neigh_construct(struct net_device *dev,
- struct neighbour *n)
-{
- struct net_device *lower_dev, *stop_dev;
- struct list_head *iter;
- int err;
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (!lower_dev->netdev_ops->ndo_neigh_construct)
- continue;
- err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
- if (err) {
- stop_dev = lower_dev;
- goto rollback;
- }
- }
- return 0;
-
-rollback:
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (lower_dev == stop_dev)
- break;
- if (!lower_dev->netdev_ops->ndo_neigh_destroy)
- continue;
- lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
-
-void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
- struct neighbour *n)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- if (!lower_dev->netdev_ops->ndo_neigh_destroy)
- continue;
- lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
- }
-}
-EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
-
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6447,8 +6375,8 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
}
/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
- is important. Some (broken) drivers set IFF_PROMISC, when
- IFF_ALLMULTI is requested not asking us and not reporting.
+ * is important. Some (broken) drivers set IFF_PROMISC, when
+ * IFF_ALLMULTI is requested not asking us and not reporting.
*/
if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
@@ -6746,6 +6674,7 @@ EXPORT_SYMBOL(dev_change_xdp_fd);
static int dev_new_index(struct net *net)
{
int ifindex = net->ifindex;
+
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
@@ -6812,8 +6741,8 @@ static void rollback_registered_many(struct list_head *head)
/* Notify protocols, that we are about to destroy
- this device. They should clean all the things.
- */
+ * this device. They should clean all the things.
+ */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
if (!dev->rtnl_link_ops ||
@@ -6971,13 +6900,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~dev->gso_partial_features;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- if (dev->netdev_ops->ndo_busy_poll)
- features |= NETIF_F_BUSY_POLL;
- else
-#endif
- features &= ~NETIF_F_BUSY_POLL;
-
return features;
}
@@ -7166,6 +7088,7 @@ void netif_tx_stop_all_queues(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
netif_tx_stop_queue(txq);
}
}
@@ -7640,17 +7563,17 @@ void netdev_freemem(struct net_device *dev)
}
/**
- * alloc_netdev_mqs - allocate network device
- * @sizeof_priv: size of private data to allocate space for
- * @name: device name format string
- * @name_assign_type: origin of device name
- * @setup: callback to initialize device
- * @txqs: the number of TX subqueues to allocate
- * @rxqs: the number of RX subqueues to allocate
- *
- * Allocates a struct net_device with private data area for driver use
- * and performs basic initialization. Also allocates subqueue structs
- * for each queue on the device.
+ * alloc_netdev_mqs - allocate network device
+ * @sizeof_priv: size of private data to allocate space for
+ * @name: device name format string
+ * @name_assign_type: origin of device name
+ * @setup: callback to initialize device
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
+ *
+ * Allocates a struct net_device with private data area for driver use
+ * and performs basic initialization. Also allocates subqueue structs
+ * for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned char name_assign_type,
@@ -7762,13 +7685,13 @@ free_dev:
EXPORT_SYMBOL(alloc_netdev_mqs);
/**
- * free_netdev - free network device
- * @dev: device
+ * free_netdev - free network device
+ * @dev: device
*
- * This function does the last stage of destroying an allocated device
- * interface. The reference to the device object is released.
- * If this is the last reference then it will be freed.
- * Must be called in process context.
+ * This function does the last stage of destroying an allocated device
+ * interface. The reference to the device object is released. If this
+ * is the last reference then it will be freed.Must be called in process
+ * context.
*/
void free_netdev(struct net_device *dev)
{
@@ -7950,12 +7873,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_shutdown(dev);
/* Notify protocols, that we are about to destroy
- this device. They should clean all the things.
-
- Note that dev->reg_state stays at NETREG_REGISTERED.
- This is wanted because this way 8021q and macvlan know
- the device is just moving and can keep their slaves up.
- */
+ * this device. They should clean all the things.
+ *
+ * Note that dev->reg_state stays at NETREG_REGISTERED.
+ * This is wanted because this way 8021q and macvlan know
+ * the device is just moving and can keep their slaves up.
+ */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2b5bf9efa720..e9c1e6acfb6d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1392,9 +1392,9 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
void *hdr;
@@ -1408,50 +1408,52 @@ static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
err = devlink_nl_put_handle(msg, devlink);
if (err)
- goto out;
+ goto nla_put_failure;
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- goto out;
- err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
- if (err)
- goto out;
+ if (ops->eswitch_mode_get) {
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto nla_put_failure;
+ }
if (ops->eswitch_inline_mode_get) {
err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
if (err)
- goto out;
+ goto nla_put_failure;
err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
inline_mode);
if (err)
- goto out;
+ goto nla_put_failure;
}
genlmsg_end(msg, hdr);
return 0;
-out:
+nla_put_failure:
genlmsg_cancel(msg, hdr);
return err;
}
-static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
struct sk_buff *msg;
int err;
- if (!ops || !ops->eswitch_mode_get)
+ if (!ops)
return -EOPNOTSUPP;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
- err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
- info->snd_portid, info->snd_seq, 0);
+ err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
+ info->snd_portid, info->snd_seq, 0);
if (err) {
nlmsg_free(msg);
@@ -1461,8 +1463,8 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
@@ -1629,15 +1631,15 @@ static const struct genl_ops devlink_nl_ops[] = {
DEVLINK_NL_FLAG_LOCK_PORTS,
},
{
- .cmd = DEVLINK_CMD_ESWITCH_MODE_GET,
- .doit = devlink_nl_cmd_eswitch_mode_get_doit,
+ .cmd = DEVLINK_CMD_ESWITCH_GET,
+ .doit = devlink_nl_cmd_eswitch_get_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
- .cmd = DEVLINK_CMD_ESWITCH_MODE_SET,
- .doit = devlink_nl_cmd_eswitch_mode_set_doit,
+ .cmd = DEVLINK_CMD_ESWITCH_SET,
+ .doit = devlink_nl_cmd_eswitch_set_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
diff --git a/net/core/dst.c b/net/core/dst.c
index b5cbbe07f786..960e503b5a52 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
- dst->pending_confirm = 0;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d92de0a1f0a4..be7bab1adcde 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -102,7 +102,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXFCS_BIT] = "rx-fcs",
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
};
@@ -1820,11 +1819,13 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
ret = __ethtool_get_sset_count(dev, gstrings.string_set);
if (ret < 0)
return ret;
+ if (ret > S32_MAX / ETH_GSTRING_LEN)
+ return -ENOMEM;
+ WARN_ON_ONCE(!ret);
gstrings.len = ret;
-
- data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
- if (!data)
+ data = vzalloc(gstrings.len * ETH_GSTRING_LEN);
+ if (gstrings.len && !data)
return -ENOMEM;
__ethtool_get_strings(dev, gstrings.string_set, data);
@@ -1833,12 +1834,13 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
goto out;
useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+ if (gstrings.len &&
+ copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
@@ -1915,14 +1917,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
if (n_stats < 0)
return n_stats;
- WARN_ON(n_stats == 0);
-
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ WARN_ON_ONCE(!n_stats);
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = n_stats;
- data = kmalloc(n_stats * sizeof(u64), GFP_USER);
- if (!data)
+ data = vzalloc(n_stats * sizeof(u64));
+ if (n_stats && !data)
return -ENOMEM;
ops->get_ethtool_stats(dev, &stats, data);
@@ -1931,12 +1934,12 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
@@ -1951,17 +1954,18 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return -EOPNOTSUPP;
n_stats = phy_get_sset_count(phydev);
-
if (n_stats < 0)
return n_stats;
- WARN_ON(n_stats == 0);
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ WARN_ON_ONCE(!n_stats);
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = n_stats;
- data = kmalloc_array(n_stats, sizeof(u64), GFP_USER);
- if (!data)
+ data = vzalloc(n_stats * sizeof(u64));
+ if (n_stats && !data)
return -ENOMEM;
mutex_lock(&phydev->lock);
@@ -1972,12 +1976,12 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
- kfree(data);
+ vfree(data);
return ret;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 1969b3f118c1..e466e0040137 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -76,9 +76,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
* allow SOCK_MEMALLOC sockets to use it as this socket is
* helping free memory
*/
- if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
+ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
return -ENOMEM;
-
+ }
err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
if (err)
return err;
@@ -1416,8 +1417,8 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
@@ -1447,8 +1448,8 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_RAW_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
};
BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
@@ -1522,10 +1523,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
{
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+ bool do_mforce = flags & BPF_F_MARK_ENFORCE;
__sum16 *ptr;
- if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
- BPF_F_HDR_FIELD_MASK)))
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1533,7 +1535,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
return -EFAULT;
ptr = (__sum16 *)(skb->data + offset);
- if (is_mmzero && !*ptr)
+ if (is_mmzero && !do_mforce && !*ptr)
return 0;
switch (flags & BPF_F_HDR_FIELD_MASK) {
@@ -1601,10 +1603,10 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_STACK,
- .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
- .arg3_type = ARG_PTR_TO_STACK,
- .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
@@ -2306,8 +2308,8 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -2377,8 +2379,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_RAW_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -2412,8 +2414,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_RAW_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
};
static struct metadata_dst __percpu *md_dst;
@@ -2483,8 +2485,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -2509,8 +2511,8 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
@@ -2593,12 +2595,12 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -2626,6 +2628,17 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -2680,7 +2693,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2695,7 +2708,7 @@ xdp_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2706,7 +2719,7 @@ cg_skb_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2733,7 +2746,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return sk_filter_func_proto(func_id);
+ return bpf_base_func_proto(func_id);
}
}
@@ -2776,11 +2789,22 @@ static bool __is_valid_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
+
+ switch (off) {
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+ if (off + size >
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+ return false;
+ break;
+ default:
+ if (size != sizeof(__u32))
+ return false;
+ }
return true;
}
@@ -2799,7 +2823,7 @@ static bool sk_filter_is_valid_access(int off, int size,
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
break;
default:
return false;
@@ -2823,7 +2847,7 @@ static bool lwt_is_valid_access(int off, int size,
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
break;
default:
return false;
@@ -2915,7 +2939,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case offsetof(struct __sk_buff, tc_index):
case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
case offsetof(struct __sk_buff, tc_classid):
break;
default:
@@ -2972,32 +2996,33 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+static u32 bpf_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
+ int off;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct __sk_buff, len):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, len));
break;
case offsetof(struct __sk_buff, protocol):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, protocol));
break;
case offsetof(struct __sk_buff, vlan_proto):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, vlan_proto));
break;
@@ -3005,17 +3030,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, priority));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, priority));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, skb_iif));
break;
@@ -3023,17 +3048,17 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
- *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
case offsetof(struct __sk_buff, hash):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, hash));
break;
@@ -3041,63 +3066,77 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, mark));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, mark));
break;
case offsetof(struct __sk_buff, pkt_type):
- return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+ return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
+ si->src_reg, insn);
case offsetof(struct __sk_buff, queue_mapping):
- return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+ return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
+ si->src_reg, insn);
case offsetof(struct __sk_buff, vlan_present):
return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
- dst_reg, src_reg, insn);
+ si->dst_reg, si->src_reg, insn);
case offsetof(struct __sk_buff, vlan_tci):
return convert_skb_access(SKF_AD_VLAN_TAG,
- dst_reg, src_reg, insn);
+ si->dst_reg, si->src_reg, insn);
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+ offsetof(struct qdisc_skb_cb, data)) %
+ sizeof(__u64));
prog->cb_access = 1;
- ctx_off -= offsetof(struct __sk_buff, cb[0]);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct qdisc_skb_cb, data);
+ off = si->off;
+ off -= offsetof(struct __sk_buff, cb[0]);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, tc_classid):
- ctx_off -= offsetof(struct __sk_buff, tc_classid);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, tc_classid);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, tc_classid);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
+ si->src_reg, off);
else
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, data));
break;
case offsetof(struct __sk_buff, data_end):
- ctx_off -= offsetof(struct __sk_buff, data_end);
- ctx_off += offsetof(struct sk_buff, cb);
- ctx_off += offsetof(struct bpf_skb_data_end, data_end);
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
- ctx_off);
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
break;
case offsetof(struct __sk_buff, tc_index):
@@ -3105,110 +3144,107 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, tc_index));
else
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sk_buff, tc_index));
- break;
#else
if (type == BPF_WRITE)
- *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+ *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
else
- *insn++ = BPF_MOV64_IMM(dst_reg, 0);
- break;
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
+ break;
}
return insn - insn_buf;
}
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
- int dst_reg, int src_reg,
- int ctx_off,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
else
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
break;
case offsetof(struct bpf_sock, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_family));
break;
case offsetof(struct bpf_sock, type):
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
break;
case offsetof(struct bpf_sock, protocol):
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
}
return insn - insn_buf;
}
-static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct __sk_buff, ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
default:
- return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
- ctx_off, insn_buf, prog);
+ return bpf_convert_ctx_access(type, si, insn_buf, prog);
}
return insn - insn_buf;
}
-static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
+static u32 xdp_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct xdp_md, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
- dst_reg, src_reg,
+ si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
}
@@ -3219,7 +3255,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
@@ -3238,69 +3274,69 @@ static const struct bpf_verifier_ops xdp_ops = {
static const struct bpf_verifier_ops cg_skb_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops lwt_inout_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
};
static const struct bpf_verifier_ops lwt_xmit_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
};
static const struct bpf_verifier_ops cg_sock_ops = {
- .get_func_proto = sk_filter_func_proto,
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-static struct bpf_prog_type_list sk_filter_type __read_mostly = {
+static struct bpf_prog_type_list sk_filter_type __ro_after_init = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};
-static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+static struct bpf_prog_type_list sched_cls_type __ro_after_init = {
.ops = &tc_cls_act_ops,
.type = BPF_PROG_TYPE_SCHED_CLS,
};
-static struct bpf_prog_type_list sched_act_type __read_mostly = {
+static struct bpf_prog_type_list sched_act_type __ro_after_init = {
.ops = &tc_cls_act_ops,
.type = BPF_PROG_TYPE_SCHED_ACT,
};
-static struct bpf_prog_type_list xdp_type __read_mostly = {
+static struct bpf_prog_type_list xdp_type __ro_after_init = {
.ops = &xdp_ops,
.type = BPF_PROG_TYPE_XDP,
};
-static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+static struct bpf_prog_type_list cg_skb_type __ro_after_init = {
.ops = &cg_skb_ops,
.type = BPF_PROG_TYPE_CGROUP_SKB,
};
-static struct bpf_prog_type_list lwt_in_type __read_mostly = {
+static struct bpf_prog_type_list lwt_in_type __ro_after_init = {
.ops = &lwt_inout_ops,
.type = BPF_PROG_TYPE_LWT_IN,
};
-static struct bpf_prog_type_list lwt_out_type __read_mostly = {
+static struct bpf_prog_type_list lwt_out_type __ro_after_init = {
.ops = &lwt_inout_ops,
.type = BPF_PROG_TYPE_LWT_OUT,
};
-static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
+static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = {
.ops = &lwt_xmit_ops,
.type = BPF_PROG_TYPE_LWT_XMIT,
};
-static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+static struct bpf_prog_type_list cg_sock_type __ro_after_init = {
.ops = &cg_sock_ops,
.type = BPF_PROG_TYPE_CGROUP_SOCK
};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1b7673aac59d..c35aae13c8d2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -138,6 +138,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
@@ -379,6 +380,62 @@ mpls:
nhoff += FCOE_HEADER_LEN;
goto out_good;
+
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP): {
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr *_arp;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ goto out_bad;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ goto out_bad;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen,
+ &_arp_eth);
+ if (!arp_eth)
+ goto out_bad;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP)) {
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip,
+ sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip,
+ sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+ }
+
+ goto out_good;
+ }
+
default:
goto out_bad;
}
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
new file mode 100644
index 000000000000..c98bbfbd26b8
--- /dev/null
+++ b/net/core/gro_cells.c
@@ -0,0 +1,92 @@
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <net/gro_cells.h>
+
+struct gro_cell {
+ struct sk_buff_head napi_skbs;
+ struct napi_struct napi;
+};
+
+int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct gro_cell *cell;
+
+ if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO))
+ return netif_rx(skb);
+
+ cell = this_cpu_ptr(gcells->cells);
+
+ if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ __skb_queue_tail(&cell->napi_skbs, skb);
+ if (skb_queue_len(&cell->napi_skbs) == 1)
+ napi_schedule(&cell->napi);
+ return NET_RX_SUCCESS;
+}
+EXPORT_SYMBOL(gro_cells_receive);
+
+/* called under BH context */
+static int gro_cell_poll(struct napi_struct *napi, int budget)
+{
+ struct gro_cell *cell = container_of(napi, struct gro_cell, napi);
+ struct sk_buff *skb;
+ int work_done = 0;
+
+ while (work_done < budget) {
+ skb = __skb_dequeue(&cell->napi_skbs);
+ if (!skb)
+ break;
+ napi_gro_receive(napi, skb);
+ work_done++;
+ }
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+ return work_done;
+}
+
+int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
+{
+ int i;
+
+ gcells->cells = alloc_percpu(struct gro_cell);
+ if (!gcells->cells)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
+
+ __skb_queue_head_init(&cell->napi_skbs);
+
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state);
+
+ netif_napi_add(dev, &cell->napi, gro_cell_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&cell->napi);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(gro_cells_init);
+
+void gro_cells_destroy(struct gro_cells *gcells)
+{
+ int i;
+
+ if (!gcells->cells)
+ return;
+ for_each_possible_cpu(i) {
+ struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
+
+ netif_napi_del(&cell->napi);
+ __skb_queue_purge(&cell->napi_skbs);
+ }
+ free_percpu(gcells->cells);
+ gcells->cells = NULL;
+}
+EXPORT_SYMBOL(gro_cells_destroy);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index b3eef90b2df9..0cfe7b0216c3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -237,7 +237,7 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
[LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
};
-static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
+static int bpf_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
@@ -352,7 +352,7 @@ static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
0;
}
-int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
+static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
{
/* FIXME:
* The LWT state is currently rebuilt for delete requests which
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index c23465005f2f..6df9f8fabf0c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -101,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
}
EXPORT_SYMBOL(lwtunnel_encap_del_ops);
-int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+int lwtunnel_build_state(u16 encap_type,
struct nlattr *encap, unsigned int family,
const void *cfg, struct lwtunnel_state **lws)
{
@@ -116,7 +116,7 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
- ret = ops->build_state(dev, encap, family, cfg, lws);
+ ret = ops->build_state(encap, family, cfg, lws);
if (ret)
module_put(ops->owner);
}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 2ec86fc552df..756637dc7a57 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8e69ce472236..96947f5d41e4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0; /* reset reclass/redir ttl */
-#endif
+ skb_reset_tc(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 5d26056b6d8f..9b8727c67b58 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -34,8 +34,6 @@
* and it will increase in proportion to the memory of machine.
* Note : Dont forget somaxconn that may limit backlog too.
*/
-int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 75e3ea7bda08..c4e84c558240 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -837,8 +837,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
static inline int rtnl_vfinfo_size(const struct net_device *dev,
u32 ext_filter_mask)
{
- if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
- (ext_filter_mask & RTEXT_FILTER_VF)) {
+ if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) {
int num_vfs = dev_num_vf(dev->dev.parent);
size_t size = nla_total_size(0);
size += num_vfs *
@@ -877,8 +876,6 @@ static size_t rtnl_port_size(const struct net_device *dev,
{
size_t port_size = nla_total_size(4) /* PORT_VF */
+ nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
- + nla_total_size(sizeof(struct ifla_port_vsi))
- /* PORT_VSI_TYPE */
+ nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */
+ nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */
+ nla_total_size(1) /* PROT_VDP_REQUEST */
@@ -1492,14 +1489,19 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_VF] = { .type = NLA_U32 },
[IFLA_PORT_PROFILE] = { .type = NLA_STRING,
.len = PORT_PROFILE_MAX },
- [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_port_vsi)},
[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
.len = PORT_UUID_MAX },
[IFLA_PORT_HOST_UUID] = { .type = NLA_STRING,
.len = PORT_UUID_MAX },
[IFLA_PORT_REQUEST] = { .type = NLA_U8, },
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
+
+ /* Unused, but we need to keep it here since user space could
+ * fill it. It's also broken with regard to NLA_BINARY use in
+ * combination with structs.
+ */
+ [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_port_vsi) },
};
static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
@@ -2356,7 +2358,6 @@ struct net_device *rtnl_create_link(struct net *net,
const char *ifname, unsigned char name_assign_type,
const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
- int err;
struct net_device *dev;
unsigned int num_tx_queues = 1;
unsigned int num_rx_queues = 1;
@@ -2371,11 +2372,10 @@ struct net_device *rtnl_create_link(struct net *net,
else if (ops->get_num_rx_queues)
num_rx_queues = ops->get_num_rx_queues();
- err = -ENOMEM;
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
ops->setup, num_tx_queues, num_rx_queues);
if (!dev)
- goto err;
+ return ERR_PTR(-ENOMEM);
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
@@ -2401,9 +2401,6 @@ struct net_device *rtnl_create_link(struct net *net,
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
return dev;
-
-err:
- return ERR_PTR(err);
}
EXPORT_SYMBOL(rtnl_create_link);
@@ -2571,7 +2568,7 @@ replay:
return -ENODEV;
}
- if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
if (!ops) {
@@ -2653,6 +2650,11 @@ replay:
if (err < 0)
goto out_unregister;
}
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ if (err)
+ goto out_unregister;
+ }
out:
if (link_net)
put_net(link_net);
@@ -3829,6 +3831,39 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = 0;
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) {
+ struct rtnl_af_ops *af_ops;
+
+ *idxattr = IFLA_STATS_AF_SPEC;
+ attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC);
+ if (!attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_stats_af) {
+ struct nlattr *af;
+ int err;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ goto nla_put_failure;
+
+ err = af_ops->fill_stats_af(skb, dev);
+
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+ }
+
+ nla_nest_end(skb, attr);
+
+ *idxattr = 0;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3885,6 +3920,23 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
size += rtnl_get_offload_stats_size(dev);
+ if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
+ struct rtnl_af_ops *af_ops;
+
+ /* for IFLA_STATS_AF_SPEC */
+ size += nla_total_size(0);
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_stats_af_size) {
+ size += nla_total_size(
+ af_ops->get_stats_af_size(dev));
+
+ /* for AF_* */
+ size += nla_total_size(0);
+ }
+ }
+ }
+
return size;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index d8820438ba37..b6d83686e149 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -71,7 +71,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
struct file **fpp;
int i, num;
- num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+ num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int);
if (num <= 0)
return 0;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 88a8e429fc3e..758f140b6bed 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -1,3 +1,7 @@
+/*
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cryptohash.h>
@@ -8,18 +12,18 @@
#include <linux/ktime.h>
#include <linux/string.h>
#include <linux/net.h>
-
+#include <linux/siphash.h>
#include <net/secure_seq.h>
#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#include <linux/in6.h>
#include <net/tcp.h>
-#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+static siphash_key_t net_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
- net_get_random_once(net_secret, sizeof(net_secret));
+ net_get_random_once(&net_secret, sizeof(net_secret));
}
#endif
@@ -44,80 +48,70 @@ static u32 seq_scale(u32 seq)
u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
- u32 i;
-
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .sport = sport,
+ .dport = dport
+ };
+ u64 hash;
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32)daddr[i];
- secret[4] = net_secret[4] +
- (((__force u16)sport << 16) + (__force u16)dport);
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
- return seq_scale(hash[0]);
+ hash = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ return seq_scale(hash);
}
EXPORT_SYMBOL(secure_tcpv6_sequence_number);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
- u32 i;
-
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .dport = dport
+ };
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32) daddr[i];
- secret[4] = net_secret[4] + (__force u32)dport;
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- return hash[0];
+ return siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
}
EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
+/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+ * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
+ * it would be easy enough to have the former function use siphash_4u32, passing
+ * the arguments as separate u32.
+ */
+
u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
- u32 hash[MD5_DIGEST_WORDS];
-
+ u64 hash;
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
- return seq_scale(hash[0]);
+ hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u32)sport << 16 | (__force u32)dport,
+ &net_secret);
+ *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ return seq_scale(hash);
}
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
- u32 hash[MD5_DIGEST_WORDS];
-
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = (__force u32)dport ^ net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
+ return siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport, &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
@@ -126,21 +120,13 @@ EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
- u32 hash[MD5_DIGEST_WORDS];
u64 seq;
-
net_secret_init();
- hash[0] = (__force u32)saddr;
- hash[1] = (__force u32)daddr;
- hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- seq = hash[0] | (((u64)hash[1]) << 32);
+ seq = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+ (__force u32)sport << 16 | (__force u32)dport,
+ &net_secret);
seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
-
return seq;
}
EXPORT_SYMBOL(secure_dccp_sequence_number);
@@ -149,26 +135,23 @@ EXPORT_SYMBOL(secure_dccp_sequence_number);
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
__be16 sport, __be16 dport)
{
- u32 secret[MD5_MESSAGE_BYTES / 4];
- u32 hash[MD5_DIGEST_WORDS];
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ .sport = sport,
+ .dport = dport
+ };
u64 seq;
- u32 i;
-
net_secret_init();
- memcpy(hash, saddr, 16);
- for (i = 0; i < 4; i++)
- secret[i] = net_secret[i] + (__force u32)daddr[i];
- secret[4] = net_secret[4] +
- (((__force u16)sport << 16) + (__force u16)dport);
- for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
- secret[i] = net_secret[i];
-
- md5_transform(hash, secret);
-
- seq = hash[0] | (((u64)hash[1]) << 32);
+ seq = siphash(&combined, offsetofend(typeof(combined), dport),
+ &net_secret);
seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
-
return seq;
}
EXPORT_SYMBOL(secure_dccpv6_sequence_number);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 734c71468b01..f3557958e9bf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -271,7 +271,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
- fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -655,7 +654,7 @@ static void skb_release_head_state(struct sk_buff *skb)
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- nf_conntrack_put(skb->nfct);
+ nf_conntrack_put(skb_nfct(skb));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
@@ -878,9 +877,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- CHECK_SKB_FIELD(tc_verd);
-#endif
#endif
}
@@ -1195,10 +1191,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone);
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
gfp_t gfp_mask)
{
- int i;
- u8 *data;
- int size = nhead + skb_end_offset(skb) + ntail;
+ int i, osize = skb_end_offset(skb);
+ int size = osize + nhead + ntail;
long off;
+ u8 *data;
BUG_ON(nhead < 0);
@@ -1260,6 +1256,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+
+ /* It is not generally safe to change skb->truesize.
+ * For the moment, we really care of rx path, or
+ * when skb is orphaned (not attached to a socket).
+ */
+ if (!skb->sk || skb->destructor == sock_edemux)
+ skb->truesize += size - osize;
+
return 0;
nofrags:
diff --git a/net/core/sock.c b/net/core/sock.c
index 4eca27dc5c94..e7d74940e863 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX"
+ "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_QIPCRTR", "slock-AF_MAX"
+ "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_QIPCRTR", "clock-AF_MAX"
+ "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX"
};
/*
@@ -367,7 +367,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
if (tv.tv_sec == 0 && tv.tv_usec == 0)
return 0;
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
- *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
+ *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
return 0;
}
@@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
+ sk->sk_dst_pending_confirm = 0;
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
@@ -762,11 +763,8 @@ set_rcvbuf:
goto set_rcvbuf;
case SO_KEEPALIVE:
-#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM)
- tcp_set_keepalive(sk, valbool);
-#endif
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
break;
@@ -1148,7 +1146,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+ v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
}
break;
@@ -1159,7 +1157,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.tm.tv_usec = 0;
} else {
v.tm.tv_sec = sk->sk_sndtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+ v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
}
break;
@@ -1522,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
af_family_clock_key_strings[newsk->sk_family]);
newsk->sk_dst_cache = NULL;
+ newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
atomic_set(&newsk->sk_drops, 0);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2a46e4009f62..4ead336e14ea 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
}
#endif
+static int proc_do_dev_weight(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret != 0)
+ return ret;
+
+ dev_rx_weight = weight_p * dev_weight_rx_bias;
+ dev_tx_weight = weight_p * dev_weight_tx_bias;
+
+ return ret;
+}
+
static int proc_do_rss_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
.data = &weight_p,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_do_dev_weight,
+ },
+ {
+ .procname = "dev_weight_rx_bias",
+ .data = &dev_weight_rx_bias,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_dev_weight,
+ },
+ {
+ .procname = "dev_weight_tx_bias",
+ .data = &dev_weight_tx_bias,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_dev_weight,
},
{
.procname = "netdev_max_backlog",
@@ -305,6 +334,13 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "bpf_jit_kallsyms",
+ .data = &bpf_jit_kallsyms,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
# endif
#endif
{