aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/netconsole.rst3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flex_pipe.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sbq_cmd.h1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c6
-rw-r--r--drivers/net/mctp/mctp-usb.c8
-rw-r--r--drivers/net/netconsole.c21
-rw-r--r--drivers/net/phy/dp83869.c4
-rw-r--r--drivers/net/usb/asix_devices.c12
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/trace/events/tcp.h9
-rw-r--r--net/ipv4/tcp_input.c21
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/protocol.c83
-rw-r--r--net/mptcp/protocol.h2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh2
23 files changed, 156 insertions, 70 deletions
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index 59cb9982afe6..2555e75e5cc1 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -19,9 +19,6 @@ Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024
Sysdata append support by Breno Leitao <leitao@debian.org>, Jan 15 2025
-Please send bug reports to Matt Mackall <mpm@selenic.com>
-Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
-
Introduction:
=============
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 2250426ec91b..2532b6f82e97 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4382,6 +4382,15 @@ int ice_get_phy_lane_number(struct ice_hw *hw)
unsigned int lane;
int err;
+ /* E82X does not have sequential IDs, lane number is PF ID.
+ * For E825 device, the exception is the variant with external
+ * PHY (0x579F), in which there is also 1:1 pf_id -> lane_number
+ * mapping.
+ */
+ if (hw->mac_type == ICE_MAC_GENERIC ||
+ hw->device_id == ICE_DEV_ID_E825C_SGMII)
+ return hw->pf_id;
+
options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL);
if (!options)
return -ENOMEM;
@@ -6497,6 +6506,28 @@ u32 ice_get_link_speed(u16 index)
}
/**
+ * ice_get_dest_cgu - get destination CGU dev for given HW
+ * @hw: pointer to the HW struct
+ *
+ * Get CGU client id for CGU register read/write operations.
+ *
+ * Return: CGU device id to use in SBQ transactions.
+ */
+static enum ice_sbq_dev_id ice_get_dest_cgu(struct ice_hw *hw)
+{
+ /* On dual complex E825 only complex 0 has functional CGU powering all
+ * the PHYs.
+ * SBQ destination device cgu points to CGU on a current complex and to
+ * access primary CGU from the secondary complex, the driver should use
+ * cgu_peer as a destination device.
+ */
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) &&
+ !ice_is_primary(hw))
+ return ice_sbq_dev_cgu_peer;
+ return ice_sbq_dev_cgu;
+}
+
+/**
* ice_read_cgu_reg - Read a CGU register
* @hw: Pointer to the HW struct
* @addr: Register address to read
@@ -6510,8 +6541,8 @@ u32 ice_get_link_speed(u16 index)
int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val)
{
struct ice_sbq_msg_input cgu_msg = {
+ .dest_dev = ice_get_dest_cgu(hw),
.opcode = ice_sbq_msg_rd,
- .dest_dev = ice_sbq_dev_cgu,
.msg_addr_low = addr
};
int err;
@@ -6542,8 +6573,8 @@ int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val)
int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val)
{
struct ice_sbq_msg_input cgu_msg = {
+ .dest_dev = ice_get_dest_cgu(hw),
.opcode = ice_sbq_msg_wr,
- .dest_dev = ice_sbq_dev_cgu,
.msg_addr_low = addr,
.data = val
};
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 363ae79a3620..013c93b6605e 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -1479,7 +1479,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk)
per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs;
hw->blk[blk].masks.count = per_pf;
- hw->blk[blk].masks.first = hw->pf_id * per_pf;
+ hw->blk[blk].masks.first = hw->logical_pf_id * per_pf;
memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks));
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
index 183dd5457d6a..21bb861febbf 100644
--- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -50,6 +50,7 @@ enum ice_sbq_dev_id {
ice_sbq_dev_phy_0 = 0x02,
ice_sbq_dev_cgu = 0x06,
ice_sbq_dev_phy_0_peer = 0x0D,
+ ice_sbq_dev_cgu_peer = 0x0F,
};
enum ice_sbq_msg_opcode {
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index f8a208c84f15..10e2445e0ded 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2281,7 +2281,7 @@ static int igb_get_sset_count(struct net_device *netdev, int sset)
case ETH_SS_PRIV_FLAGS:
return IGB_PRIV_FLAGS_STR_LEN;
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
}
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f3e7218ba6f3..bb783042d1af 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -810,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset)
case ETH_SS_PRIV_FLAGS:
return IGC_PRIV_FLAGS_STR_LEN;
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
}
@@ -2094,6 +2094,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev,
netdev_info(adapter->netdev, "Offline testing starting");
set_bit(__IGC_TESTING, &adapter->state);
+ /* power up PHY for link test */
+ igc_power_up_phy_copper(&adapter->hw);
+
/* Link test performed before hardware reset so autoneg doesn't
* interfere with test result
*/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ca1ccc630001..3190ce7e44c7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -11507,10 +11507,10 @@ static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter)
shutdown_aci:
mutex_destroy(&adapter->hw.aci.lock);
ixgbe_release_hw_control(adapter);
- devlink_free(adapter->devlink);
clean_up_probe:
disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
free_netdev(netdev);
+ devlink_free(adapter->devlink);
pci_release_mem_regions(pdev);
if (disable_dev)
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 114dd88fc71c..6885d2343c48 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -641,7 +641,7 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp,
* disabled
*/
if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (on)
adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 76382626ad41..929adeb50a98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -66,7 +66,6 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
esw->fdb_table.legacy.addr_grp = NULL;
esw->fdb_table.legacy.allmulti_grp = NULL;
esw->fdb_table.legacy.promisc_grp = NULL;
- atomic64_set(&esw->user_count, 0);
}
static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 34749814f19b..44a142a041b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1978,7 +1978,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
/* Holds true only as long as DMFS is the default */
mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
MLX5_FLOW_STEERING_MODE_DMFS);
- atomic64_set(&esw->user_count, 0);
}
static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 132626a3f9f7..9ef72f294117 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2557,14 +2557,16 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
&nn->tlv_caps);
if (err)
- goto err_free_nn;
+ goto err_free_xsk_pools;
err = nfp_ccm_mbox_alloc(nn);
if (err)
- goto err_free_nn;
+ goto err_free_xsk_pools;
return nn;
+err_free_xsk_pools:
+ kfree(nn->dp.xsk_pools);
err_free_nn:
if (nn->dp.netdev)
free_netdev(nn->dp.netdev);
diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c
index 36ccc53b1797..ef860cfc629f 100644
--- a/drivers/net/mctp/mctp-usb.c
+++ b/drivers/net/mctp/mctp-usb.c
@@ -96,11 +96,13 @@ static netdev_tx_t mctp_usb_start_xmit(struct sk_buff *skb,
skb->data, skb->len,
mctp_usb_out_complete, skb);
+ /* Stops TX queue first to prevent race condition with URB complete */
+ netif_stop_queue(dev);
rc = usb_submit_urb(urb, GFP_ATOMIC);
- if (rc)
+ if (rc) {
+ netif_wake_queue(dev);
goto err_drop;
- else
- netif_stop_queue(dev);
+ }
return NETDEV_TX_OK;
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 194570443493..5d8d0214786c 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -886,8 +886,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf)
static void update_userdata(struct netconsole_target *nt)
{
- int complete_idx = 0, child_count = 0;
struct list_head *entry;
+ int child_count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&target_list_lock, flags);
/* Clear the current string in case the last userdatum was deleted */
nt->userdata_length = 0;
@@ -897,8 +900,11 @@ static void update_userdata(struct netconsole_target *nt)
struct userdatum *udm_item;
struct config_item *item;
- if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS))
- break;
+ if (child_count >= MAX_EXTRADATA_ITEMS) {
+ spin_unlock_irqrestore(&target_list_lock, flags);
+ WARN_ON_ONCE(1);
+ return;
+ }
child_count++;
item = container_of(entry, struct config_item, ci_entry);
@@ -912,12 +918,11 @@ static void update_userdata(struct netconsole_target *nt)
* one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is
* checked to not exceed MAX items with child_count above
*/
- complete_idx += scnprintf(&nt->extradata_complete[complete_idx],
- MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n",
- item->ci_name, udm_item->value);
+ nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length],
+ MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n",
+ item->ci_name, udm_item->value);
}
- nt->userdata_length = strnlen(nt->extradata_complete,
- sizeof(nt->extradata_complete));
+ spin_unlock_irqrestore(&target_list_lock, flags);
}
static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index a2cd1cc35cde..1f381d7b13ff 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -84,7 +84,7 @@
#define DP83869_CLK_DELAY_DEF 7
/* STRAP_STS1 bits */
-#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0)
+#define DP83869_STRAP_OP_MODE_MASK GENMASK(11, 9)
#define DP83869_STRAP_STS1_RESERVED BIT(11)
#define DP83869_STRAP_MIRROR_ENABLED BIT(12)
@@ -528,7 +528,7 @@ static int dp83869_set_strapped_mode(struct phy_device *phydev)
if (val < 0)
return val;
- dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK;
+ dp83869->mode = FIELD_GET(DP83869_STRAP_OP_MODE_MASK, val);
return 0;
}
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 85bd5d845409..232bbd79a4de 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
int i;
unsigned long gpio_bits = dev->driver_info->data;
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ goto out;
/* Toggle the GPIOs in a manufacturer/model specific way */
for (i = 2; i >= 0; i--) {
@@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->driver_priv = priv;
- usbnet_get_endpoints(dev, intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Maybe the boot loader passed the MAC address via device tree */
if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) {
@@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
int ret;
u8 buf[ETH_ALEN] = {0};
- usbnet_get_endpoints(dev,intf);
+ ret = usbnet_get_endpoints(dev, intf);
+ if (ret)
+ return ret;
/* Get the MAC address */
ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5ca230ed526a..ab20f549b8f9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
-void tcp_rcvbuf_grow(struct sock *sk);
+void tcp_rcvbuf_grow(struct sock *sk, u32 newval);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 9d2c36c6a0ed..6757233bd064 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow,
__field(__u32, space)
__field(__u32, ooo_space)
__field(__u32, rcvbuf)
+ __field(__u32, rcv_ssthresh)
+ __field(__u32, window_clamp)
+ __field(__u32, rcv_wnd)
__field(__u8, scaling_ratio)
__field(__u16, sport)
__field(__u16, dport)
@@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow,
tp->rcv_nxt;
__entry->rcvbuf = sk->sk_rcvbuf;
+ __entry->rcv_ssthresh = tp->rcv_ssthresh;
+ __entry->window_clamp = tp->window_clamp;
+ __entry->rcv_wnd = tp->rcv_wnd;
__entry->scaling_ratio = tp->scaling_ratio;
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
@@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow,
),
TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u "
+ "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u "
"family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 "
"saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx",
__entry->time, __entry->rtt_us, __entry->copied,
__entry->inq, __entry->space, __entry->ooo_space,
__entry->scaling_ratio, __entry->rcvbuf,
+ __entry->rcv_ssthresh, __entry->window_clamp,
+ __entry->rcv_wnd,
show_family_name(__entry->family),
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 31ea5af49f2d..e4a979b75cc6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -891,18 +891,27 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
}
}
-void tcp_rcvbuf_grow(struct sock *sk)
+void tcp_rcvbuf_grow(struct sock *sk, u32 newval)
{
const struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int rcvwin, rcvbuf, cap;
+ u32 rcvwin, rcvbuf, cap, oldval;
+ u64 grow;
+
+ oldval = tp->rcvq_space.space;
+ tp->rcvq_space.space = newval;
if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
return;
+ /* DRS is always one RTT late. */
+ rcvwin = newval << 1;
+
/* slow start: allow the sender to double its rate. */
- rcvwin = tp->rcvq_space.space << 1;
+ grow = (u64)rcvwin * (newval - oldval);
+ do_div(grow, oldval);
+ rcvwin += grow << 1;
if (!RB_EMPTY_ROOT(&tp->out_of_order_queue))
rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt;
@@ -943,9 +952,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
trace_tcp_rcvbuf_grow(sk, time);
- tp->rcvq_space.space = copied;
-
- tcp_rcvbuf_grow(sk);
+ tcp_rcvbuf_grow(sk, copied);
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
@@ -5270,7 +5277,7 @@ end:
}
/* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
if (sk->sk_socket)
- tcp_rcvbuf_grow(sk);
+ tcp_rcvbuf_grow(sk, tp->rcvq_space.space);
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 6003e47c770a..171643815076 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
+ SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE),
};
/* mptcp_mib_alloc - allocate percpu mib counters
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 309bac6fea32..a1d3e9369fbb 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -88,6 +88,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
+ MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0292162a14ee..2d6b8de35c44 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -194,17 +194,26 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
* - mptcp does not maintain a msk-level window clamp
* - returns true when the receive buffer is actually updated
*/
-static bool mptcp_rcvbuf_grow(struct sock *sk)
+static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval)
{
struct mptcp_sock *msk = mptcp_sk(sk);
const struct net *net = sock_net(sk);
- int rcvwin, rcvbuf, cap;
+ u32 rcvwin, rcvbuf, cap, oldval;
+ u64 grow;
+ oldval = msk->rcvq_space.space;
+ msk->rcvq_space.space = newval;
if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
return false;
- rcvwin = msk->rcvq_space.space << 1;
+ /* DRS is always one RTT late. */
+ rcvwin = newval << 1;
+
+ /* slow start: allow the sender to double its rate. */
+ grow = (u64)rcvwin * (newval - oldval);
+ do_div(grow, oldval);
+ rcvwin += grow << 1;
if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq;
@@ -334,7 +343,7 @@ end:
skb_set_owner_r(skb, sk);
/* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
if (sk->sk_socket)
- mptcp_rcvbuf_grow(sk);
+ mptcp_rcvbuf_grow(sk, msk->rcvq_space.space);
}
static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset,
@@ -998,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk)
if (WARN_ON_ONCE(!msk->recovery))
break;
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ msk->first_pending = mptcp_send_next(sk);
}
dfrag_clear(sk, dfrag);
@@ -1290,7 +1299,12 @@ alloc_skb:
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
- if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
+ /* No need for zero probe if there are any data pending
+ * either at the msk or ssk level; skb is the current write
+ * queue tail and can be empty at this point.
+ */
+ if (snd_una != msk->snd_nxt || skb->len ||
+ skb != tcp_send_head(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@@ -1341,6 +1355,7 @@ alloc_skb:
mpext->dsn64);
if (zero_window_probe) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
mpext->frozen = 1;
if (READ_ONCE(msk->csum_enabled))
@@ -1543,7 +1558,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
mptcp_update_post_push(msk, dfrag, ret);
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ msk->first_pending = mptcp_send_next(sk);
if (msk->snd_burst <= 0 ||
!sk_stream_memory_free(ssk) ||
@@ -1903,7 +1918,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
get_page(dfrag->page);
list_add_tail(&dfrag->list, &msk->rtx_queue);
if (!msk->first_pending)
- WRITE_ONCE(msk->first_pending, dfrag);
+ msk->first_pending = dfrag;
}
pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
@@ -1936,22 +1951,36 @@ do_error:
static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
-static int __mptcp_recvmsg_mskq(struct sock *sk,
- struct msghdr *msg,
- size_t len, int flags,
+static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags, int copied_total,
struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb, *tmp;
+ int total_data_len = 0;
int copied = 0;
skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
- u32 offset = MPTCP_SKB_CB(skb)->offset;
+ u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
u32 data_len = skb->len - offset;
- u32 count = min_t(size_t, len - copied, data_len);
+ u32 count;
int err;
+ if (flags & MSG_PEEK) {
+ /* skip already peeked skbs */
+ if (total_data_len + data_len <= copied_total) {
+ total_data_len += data_len;
+ continue;
+ }
+
+ /* skip the already peeked data in the current skb */
+ delta = copied_total - total_data_len;
+ offset += delta;
+ data_len -= delta;
+ }
+
+ count = min_t(size_t, len - copied, data_len);
if (!(flags & MSG_TRUNC)) {
err = skb_copy_datagram_msg(skb, offset, msg, count);
if (unlikely(err < 0)) {
@@ -1968,16 +1997,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
copied += count;
- if (count < data_len) {
- if (!(flags & MSG_PEEK)) {
+ if (!(flags & MSG_PEEK)) {
+ msk->bytes_consumed += count;
+ if (count < data_len) {
MPTCP_SKB_CB(skb)->offset += count;
MPTCP_SKB_CB(skb)->map_seq += count;
- msk->bytes_consumed += count;
+ break;
}
- break;
- }
- if (!(flags & MSG_PEEK)) {
/* avoid the indirect call, we know the destructor is sock_rfree */
skb->destructor = NULL;
skb->sk = NULL;
@@ -1985,7 +2012,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
sk_mem_uncharge(sk, skb->truesize);
__skb_unlink(skb, &sk->sk_receive_queue);
skb_attempt_defer_free(skb);
- msk->bytes_consumed += count;
}
if (copied >= len)
@@ -2049,9 +2075,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- msk->rcvq_space.space = msk->rcvq_space.copied;
- if (mptcp_rcvbuf_grow(sk)) {
-
+ if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) {
/* Make subflows follow along. If we do not do this, we
* get drops at subflow level if skbs can't be moved to
* the mptcp rx queue fast enough (announced rcv_win can
@@ -2063,8 +2087,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk);
- tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied;
- tcp_rcvbuf_grow(ssk);
+ /* subflows can be added before tcp_init_transfer() */
+ if (tcp_sk(ssk)->rcvq_space.space)
+ tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied);
unlock_sock_fast(ssk, slow);
}
}
@@ -2183,7 +2208,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int err, bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
+ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
+ copied, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2874,7 +2900,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- WRITE_ONCE(msk->first_pending, NULL);
+ msk->first_pending = NULL;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
}
@@ -3414,9 +3440,6 @@ void __mptcp_data_acked(struct sock *sk)
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
{
- if (!mptcp_send_head(sk))
- return;
-
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, false);
else
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 52f9cfa4ce95..379a88e14e8d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
{
const struct mptcp_sock *msk = mptcp_sk(sk);
- return READ_ONCE(msk->first_pending);
+ return msk->first_pending;
}
static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index 4046131e7888..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Test various bareudp tunnel configurations.