diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 14:45:08 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 14:45:08 -0700 |
| commit | aae3dbb4776e7916b6cd442d00159bea27a695c1 (patch) | |
| tree | d074c5d783a81e7e2e084b1eba77f57459da7e37 /drivers/net/hyperv/netvsc_drv.c | |
| parent | Merge tag 'wberr-v4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jl... (diff) | |
| parent | Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirshe... (diff) | |
| download | linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.gz linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.zip | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Support ipv6 checksum offload in sunvnet driver, from Shannon
Nelson.
2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric
Dumazet.
3) Allow generic XDP to work on virtual devices, from John Fastabend.
4) Add bpf device maps and XDP_REDIRECT, which can be used to build
arbitrary switching frameworks using XDP. From John Fastabend.
5) Remove UFO offloads from the tree, gave us little other than bugs.
6) Remove the IPSEC flow cache, from Florian Westphal.
7) Support ipv6 route offload in mlxsw driver.
8) Support VF representors in bnxt_en, from Sathya Perla.
9) Add support for forward error correction modes to ethtool, from
Vidya Sagar Ravipati.
10) Add time filter for packet scheduler action dumping, from Jamal Hadi
Salim.
11) Extend the zerocopy sendmsg() used by virtio and tap to regular
sockets via MSG_ZEROCOPY. From Willem de Bruijn.
12) Significantly rework value tracking in the BPF verifier, from Edward
Cree.
13) Add new jump instructions to eBPF, from Daniel Borkmann.
14) Rework rtnetlink plumbing so that operations can be run without
taking the RTNL semaphore. From Florian Westphal.
15) Support XDP in tap driver, from Jason Wang.
16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal.
17) Add Huawei hinic ethernet driver.
18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan
Delalande.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits)
i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq
i40e: avoid NVM acquire deadlock during NVM update
drivers: net: xgene: Remove return statement from void function
drivers: net: xgene: Configure tx/rx delay for ACPI
drivers: net: xgene: Read tx/rx delay for ACPI
rocker: fix kcalloc parameter order
rds: Fix non-atomic operation on shared flag variable
net: sched: don't use GFP_KERNEL under spin lock
vhost_net: correctly check tx avail during rx busy polling
net: mdio-mux: add mdio_mux parameter to mdio_mux_init()
rxrpc: Make service connection lookup always check for retry
net: stmmac: Delete dead code for MDIO registration
gianfar: Fix Tx flow control deactivation
cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6
cxgb4: Fix pause frame count in t4_get_port_stats
cxgb4: fix memory leak
tun: rename generic_xdp to skb_xdp
tun: reserve extra headroom only when XDP is set
net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping
net: dsa: bcm_sf2: Advertise number of egress queues
...
Diffstat (limited to 'drivers/net/hyperv/netvsc_drv.c')
| -rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 816 |
1 files changed, 606 insertions, 210 deletions
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d91cbc6c3ca4..165ba4b3b423 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -33,6 +33,9 @@ #include <linux/if_vlan.h> #include <linux/in.h> #include <linux/slab.h> +#include <linux/rtnetlink.h> +#include <linux/netpoll.h> + #include <net/arp.h> #include <net/route.h> #include <net/sock.h> @@ -42,8 +45,14 @@ #include "hyperv_net.h" -#define RING_SIZE_MIN 64 +#define RING_SIZE_MIN 64 +#define NETVSC_MIN_TX_SECTIONS 10 +#define NETVSC_DEFAULT_TX 192 /* ~1M */ +#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ +#define NETVSC_DEFAULT_RX 2048 /* ~4M */ + #define LINKCHANGE_INT (2 * HZ) +#define VF_TAKEOVER_INT (HZ / 10) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); @@ -69,7 +78,8 @@ static void netvsc_set_multicast_list(struct net_device *net) static int netvsc_open(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = ndev_ctx->nvdev; + struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); struct rndis_device *rdev; int ret = 0; @@ -85,22 +95,40 @@ static int netvsc_open(struct net_device *net) netif_tx_wake_all_queues(net); rdev = nvdev->extension; - if (!rdev->link_state && !ndev_ctx->datapath) + + if (!rdev->link_state) netif_carrier_on(net); - return ret; + if (vf_netdev) { + /* Setting synthetic device up transparently sets + * slave as up. If open fails, then slave will be + * still be offline (and not used). + */ + ret = dev_open(vf_netdev); + if (ret) + netdev_warn(net, + "unable to open slave: %s: %d\n", + vf_netdev->name, ret); + } + return 0; } static int netvsc_close(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev + = rtnl_dereference(net_device_ctx->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); - int ret; + int ret = 0; u32 aread, i, msec = 10, retry = 0, retry_max = 20; struct vmbus_channel *chn; netif_tx_disable(net); + /* No need to close rndis filter if it is removed already */ + if (!nvdev) + goto out; + ret = rndis_filter_close(nvdev); if (ret != 0) { netdev_err(net, "unable to close device (ret %d).\n", ret); @@ -139,11 +167,15 @@ static int netvsc_close(struct net_device *net) ret = -ETIMEDOUT; } +out: + if (vf_netdev) + dev_close(vf_netdev); + return ret; } static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, - int pkt_type) + int pkt_type) { struct rndis_packet *rndis_pkt; struct rndis_per_packet_info *ppi; @@ -163,10 +195,12 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, return ppi; } -/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute - * hash for non-TCP traffic with only IP numbers. +/* Azure hosts don't support non-TCP port numbers in hashing for fragmented + * packets. We can use ethtool to change UDP hash level when necessary. */ -static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk) +static inline u32 netvsc_get_hash( + struct sk_buff *skb, + const struct net_device_context *ndc) { struct flow_keys flow; u32 hash; @@ -177,7 +211,11 @@ static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk) if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) return 0; - if (flow.basic.ip_proto == IPPROTO_TCP) { + if (flow.basic.ip_proto == IPPROTO_TCP || + (flow.basic.ip_proto == IPPROTO_UDP && + ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) || + (flow.basic.n_proto == htons(ETH_P_IPV6) && + ndc->udp6_l4_hash)))) { return skb_get_hash(skb); } else { if (flow.basic.n_proto == htons(ETH_P_IP)) @@ -200,7 +238,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev, struct sock *sk = skb->sk; int q_idx; - q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) & + q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) & (VRSS_SEND_TAB_SIZE - 1)]; /* If queue index changed record the new value */ @@ -222,13 +260,11 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev, * * TODO support XPS - but get_xps_queue not exported */ -static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) +static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb) { - unsigned int num_tx_queues = ndev->real_num_tx_queues; int q_idx = sk_tx_queue_get(skb->sk); - if (q_idx < 0 || skb->ooo_okay) { + if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) { /* If forwarding a packet, we use the recorded queue when * available for better cache locality. */ @@ -238,14 +274,35 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, q_idx = netvsc_get_tx_queue(ndev, skb, q_idx); } - while (unlikely(q_idx >= num_tx_queues)) - q_idx -= num_tx_queues; - return q_idx; } +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct net_device_context *ndc = netdev_priv(ndev); + struct net_device *vf_netdev; + u16 txq; + + rcu_read_lock(); + vf_netdev = rcu_dereference(ndc->vf_netdev); + if (vf_netdev) { + txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; + } else { + txq = netvsc_pick_tx(ndev, skb); + } + rcu_read_unlock(); + + while (unlikely(txq >= ndev->real_num_tx_queues)) + txq -= ndev->real_num_tx_queues; + + return txq; +} + static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, - struct hv_page_buffer *pb) + struct hv_page_buffer *pb) { int j = 0; @@ -280,9 +337,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, - struct hv_page_buffer **page_buf) + struct hv_page_buffer *pb) { - struct hv_page_buffer *pb = *page_buf; u32 slots_used = 0; char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; @@ -293,10 +349,9 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - if (hdr != NULL) - slots_used += fill_pg_buf(virt_to_page(hdr), - offset_in_page(hdr), - len, &pb[slots_used]); + slots_used += fill_pg_buf(virt_to_page(hdr), + offset_in_page(hdr), + len, &pb[slots_used]); packet->rmsg_size = len; packet->rmsg_pgcnt = slots_used; @@ -359,13 +414,40 @@ static u32 net_checksum_info(struct sk_buff *skb) if (ip6->nexthdr == IPPROTO_TCP) return TRANSPORT_INFO_IPV6_TCP; - else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) + else if (ip6->nexthdr == IPPROTO_UDP) return TRANSPORT_INFO_IPV6_UDP; } return TRANSPORT_INFO_NOT_IP; } +/* Send skb on the slave VF device. */ +static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev, + struct sk_buff *skb) +{ + struct net_device_context *ndev_ctx = netdev_priv(net); + unsigned int len = skb->len; + int rc; + + skb->dev = vf_netdev; + skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; + + rc = dev_queue_xmit(skb); + if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) { + struct netvsc_vf_pcpu_stats *pcpu_stats + = this_cpu_ptr(ndev_ctx->vf_stats); + + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(ndev_ctx->vf_stats->tx_dropped); + } + + return rc; +} + static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); @@ -374,11 +456,19 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) unsigned int num_data_pgs; struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; + struct net_device *vf_netdev; u32 rndis_msg_size; struct rndis_per_packet_info *ppi; u32 hash; - struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + + /* if VF is present and up then redirect packets + * already called with rcu_read_lock_bh + */ + vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); + if (vf_netdev && netif_running(vf_netdev) && + !netpoll_tx_running(net)) + return netvsc_vf_xmit(net, vf_netdev, skb); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number @@ -448,9 +538,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, - IEEE_8021Q_INFO); - vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + - ppi->ppi_offset); + IEEE_8021Q_INFO); + + vlan = (void *)ppi + ppi->ppi_offset; vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; @@ -463,8 +553,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); - lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + - ppi->ppi_offset); + lso_info = (void *)ppi + ppi->ppi_offset; lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (skb->protocol == htons(ETH_P_IP)) { @@ -524,12 +613,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, - skb, packet, &pb); + skb, packet, pb); /* timestamp packet in software */ skb_tx_timestamp(skb); - ret = netvsc_send(net_device_ctx->device_ctx, packet, - rndis_msg, &pb, skb); + + ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb); if (likely(ret == 0)) return NETDEV_TX_OK; @@ -551,6 +640,7 @@ no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; } + /* * netvsc_linkstatus_callback - Link up/down notification */ @@ -574,8 +664,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; - speed = *(u32 *)((void *)indicate + indicate-> - status_buf_offset) / 10000; + speed = *(u32 *)((void *)indicate + + indicate->status_buf_offset) / 10000; ndev_ctx->speed = speed; return; } @@ -658,29 +748,18 @@ int netvsc_recv_callback(struct net_device *net, struct netvsc_device *net_device; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct netvsc_channel *nvchan; - struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - /* - * If necessary, inject this packet into the VF interface. - * On Hyper-V, multicast and brodcast packets are only delivered - * to the synthetic interface (after subjecting these to - * policy filters on the host). Deliver these via the VF - * interface in the guest. - */ rcu_read_lock(); net_device = rcu_dereference(net_device_ctx->nvdev); if (unlikely(!net_device)) goto drop; nvchan = &net_device->chan_table[q_idx]; - vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); - if (vf_netdev && (vf_netdev->flags & IFF_UP)) - net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, &nvchan->napi, @@ -692,8 +771,7 @@ drop: return NVSP_STAT_FAIL; } - if (net != vf_netdev) - skb_record_rx_queue(skb, q_idx); + skb_record_rx_queue(skb, q_idx); /* * Even if injecting the packet, record the statistics @@ -736,48 +814,22 @@ static void netvsc_get_channels(struct net_device *net, } } -static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, - u32 num_chn) -{ - struct netvsc_device_info device_info; - int ret; - - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = num_chn; - device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = num_chn; - - ret = rndis_filter_device_add(dev, &device_info); - if (ret) - return ret; - - ret = netif_set_real_num_tx_queues(net, num_chn); - if (ret) - return ret; - - ret = netif_set_real_num_rx_queues(net, num_chn); - - return ret; -} - static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); - unsigned int count = channels->combined_count; - bool was_running; - int ret; + unsigned int orig, count = channels->combined_count; + struct netvsc_device_info device_info; + bool was_opened; + int ret = 0; /* We do not support separate count for rx, tx, or other */ if (count == 0 || channels->rx_count || channels->tx_count || channels->other_count) return -EINVAL; - if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX) - return -EINVAL; - if (!nvdev || nvdev->destroy) return -ENODEV; @@ -787,25 +839,40 @@ static int netvsc_set_channels(struct net_device *net, if (count > nvdev->max_chn) return -EINVAL; - was_running = netif_running(net); - if (was_running) { - ret = netvsc_close(net); - if (ret) - return ret; - } + orig = nvdev->num_chn; + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = count; + device_info.ring_size = ring_size; + device_info.send_sections = nvdev->send_section_cnt; + device_info.recv_sections = nvdev->recv_section_cnt; rndis_filter_device_remove(dev, nvdev); - ret = netvsc_set_queues(net, dev, count); - if (ret == 0) - nvdev->num_chn = count; - else - netvsc_set_queues(net, dev, nvdev->num_chn); + nvdev = rndis_filter_device_add(dev, &device_info); + if (!IS_ERR(nvdev)) { + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); + } else { + ret = PTR_ERR(nvdev); + device_info.num_chn = orig; + nvdev = rndis_filter_device_add(dev, &device_info); + + if (IS_ERR(nvdev)) { + netdev_err(net, "restoring channel setting failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } + } - if (was_running) - ret = netvsc_open(net); + if (was_opened) + rndis_filter_open(nvdev); /* We may have missed link change notifications */ + net_device_ctx->last_reconfig = 0; schedule_delayed_work(&net_device_ctx->dwork, 0); return ret; @@ -832,6 +899,9 @@ static void netvsc_init_settings(struct net_device *dev) { struct net_device_context *ndc = netdev_priv(dev); + ndc->udp4_l4_hash = true; + ndc->udp6_l4_hash = true; + ndc->speed = SPEED_UNKNOWN; ndc->duplex = DUPLEX_FULL; } @@ -869,41 +939,61 @@ static int netvsc_set_link_ksettings(struct net_device *dev, static int netvsc_change_mtu(struct net_device *ndev, int mtu) { struct net_device_context *ndevctx = netdev_priv(ndev); + struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); struct hv_device *hdev = ndevctx->device_ctx; + int orig_mtu = ndev->mtu; struct netvsc_device_info device_info; - bool was_running; + bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) return -ENODEV; - was_running = netif_running(ndev); - if (was_running) { - ret = netvsc_close(ndev); + /* Change MTU of underlying VF netdev first. */ + if (vf_netdev) { + ret = dev_set_mtu(vf_netdev, mtu); if (ret) return ret; } + netif_device_detach(ndev); + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); + memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = nvdev->num_chn; - device_info.max_num_vrss_chns = nvdev->num_chn; + device_info.send_sections = nvdev->send_section_cnt; + device_info.recv_sections = nvdev->recv_section_cnt; rndis_filter_device_remove(hdev, nvdev); - /* 'nvdev' has been freed in rndis_filter_device_remove() -> - * netvsc_device_remove () -> free_netvsc_device(). - * We mustn't access it before it's re-created in - * rndis_filter_device_add() -> netvsc_device_add(). - */ - ndev->mtu = mtu; - rndis_filter_device_add(hdev, &device_info); + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + + /* Attempt rollback to original MTU */ + ndev->mtu = orig_mtu; + nvdev = rndis_filter_device_add(hdev, &device_info); - if (was_running) - ret = netvsc_open(ndev); + if (vf_netdev) + dev_set_mtu(vf_netdev, orig_mtu); + + if (IS_ERR(nvdev)) { + netdev_err(ndev, "restoring mtu failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } + } + + if (was_opened) + rndis_filter_open(nvdev); + + netif_device_attach(ndev); /* We may have missed link change notifications */ schedule_delayed_work(&ndevctx->dwork, 0); @@ -911,16 +1001,56 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) return ret; } +static void netvsc_get_vf_stats(struct net_device *net, + struct netvsc_vf_pcpu_stats *tot) +{ + struct net_device_context *ndev_ctx = netdev_priv(net); + int i; + + memset(tot, 0, sizeof(*tot)); + + for_each_possible_cpu(i) { + const struct netvsc_vf_pcpu_stats *stats + = per_cpu_ptr(ndev_ctx->vf_stats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + rx_packets = stats->rx_packets; + tx_packets = stats->tx_packets; + rx_bytes = stats->rx_bytes; + tx_bytes = stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + tot->tx_dropped += stats->tx_dropped; + } +} + static void netvsc_get_stats64(struct net_device *net, struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_vf_pcpu_stats vf_tot; int i; if (!nvdev) return; + netdev_stats_to_stats64(t, &net->stats); + + netvsc_get_vf_stats(net, &vf_tot); + t->rx_packets += vf_tot.rx_packets; + t->tx_packets += vf_tot.tx_packets; + t->rx_bytes += vf_tot.rx_bytes; + t->tx_bytes += vf_tot.tx_bytes; + t->tx_dropped += vf_tot.tx_dropped; + for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; const struct netvsc_stats *stats; @@ -949,33 +1079,36 @@ static void netvsc_get_stats64(struct net_device *net, t->rx_packets += packets; t->multicast += multicast; } - - t->tx_dropped = net->stats.tx_dropped; - t->tx_errors = net->stats.tx_errors; - - t->rx_dropped = net->stats.rx_dropped; - t->rx_errors = net->stats.rx_errors; } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) { + struct net_device_context *ndc = netdev_priv(ndev); + struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); struct sockaddr *addr = p; - char save_adr[ETH_ALEN]; - unsigned char save_aatype; int err; - memcpy(save_adr, ndev->dev_addr, ETH_ALEN); - save_aatype = ndev->addr_assign_type; - - err = eth_mac_addr(ndev, p); - if (err != 0) + err = eth_prepare_mac_addr_change(ndev, p); + if (err) return err; - err = rndis_filter_set_device_mac(ndev, addr->sa_data); - if (err != 0) { - /* roll back to saved MAC */ - memcpy(ndev->dev_addr, save_adr, ETH_ALEN); - ndev->addr_assign_type = save_aatype; + if (!nvdev) + return -ENODEV; + + if (vf_netdev) { + err = dev_set_mac_address(vf_netdev, addr); + if (err) + return err; + } + + err = rndis_filter_set_device_mac(nvdev, addr->sa_data); + if (!err) { + eth_commit_mac_addr_change(ndev, p); + } else if (vf_netdev) { + /* rollback change on VF */ + memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); + dev_set_mac_address(vf_netdev, addr); } return err; @@ -990,9 +1123,18 @@ static const struct { { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, + { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, + { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, +}, vf_stats[] = { + { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, + { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) }, + { "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) }, + { "vf_tx_bytes", offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) }, + { "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) }, }; #define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats) +#define NETVSC_VF_STATS_LEN ARRAY_SIZE(vf_stats) /* 4 statistics per queue (rx/tx packets/bytes) */ #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) @@ -1007,7 +1149,9 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set) switch (string_set) { case ETH_SS_STATS: - return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev); + return NETVSC_GLOBAL_STATS_LEN + + NETVSC_VF_STATS_LEN + + NETVSC_QUEUE_STATS_LEN(nvdev); default: return -EINVAL; } @@ -1017,9 +1161,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); const void *nds = &ndc->eth_stats; const struct netvsc_stats *qstats; + struct netvsc_vf_pcpu_stats sum; unsigned int start; u64 packets, bytes; int i, j; @@ -1030,6 +1175,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); + netvsc_get_vf_stats(dev, &sum); + for (j = 0; j < NETVSC_VF_STATS_LEN; j++) + data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset); + for (j = 0; j < nvdev->num_chn; j++) { qstats = &nvdev->chan_table[j].tx_stats; @@ -1055,7 +1204,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); u8 *p = data; int i; @@ -1064,11 +1213,16 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) - memcpy(p + i * ETH_GSTRING_LEN, - netvsc_stats[i].name, ETH_GSTRING_LEN); + for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) { + memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < ARRAY_SIZE(vf_stats); i++) { + memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } - p += i * ETH_GSTRING_LEN; for (i = 0; i < nvdev->num_chn; i++) { sprintf(p, "tx_queue_%u_packets", i); p += ETH_GSTRING_LEN; @@ -1085,7 +1239,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) } static int -netvsc_get_rss_hash_opts(struct netvsc_device *nvdev, +netvsc_get_rss_hash_opts(struct net_device_context *ndc, struct ethtool_rxnfc *info) { info->data = RXH_IP_SRC | RXH_IP_DST; @@ -1094,9 +1248,20 @@ netvsc_get_rss_hash_opts(struct netvsc_device *nvdev, case TCP_V4_FLOW: case TCP_V6_FLOW: info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fallthrough */ + break; + case UDP_V4_FLOW: + if (ndc->udp4_l4_hash) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + break; + case UDP_V6_FLOW: + if (ndc->udp6_l4_hash) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + break; + case IPV4_FLOW: case IPV6_FLOW: break; @@ -1113,7 +1278,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rules) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); if (!nvdev) return -ENODEV; @@ -1124,8 +1289,48 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return 0; case ETHTOOL_GRXFH: - return netvsc_get_rss_hash_opts(nvdev, info); + return netvsc_get_rss_hash_opts(ndc, info); + } + return -EOPNOTSUPP; +} + +static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, + struct ethtool_rxnfc *info) +{ + if (info->data == (RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (info->flow_type == UDP_V4_FLOW) + ndc->udp4_l4_hash = true; + else if (info->flow_type == UDP_V6_FLOW) + ndc->udp6_l4_hash = true; + else + return -EOPNOTSUPP; + + return 0; } + + if (info->data == (RXH_IP_SRC | RXH_IP_DST)) { + if (info->flow_type == UDP_V4_FLOW) + ndc->udp4_l4_hash = false; + else if (info->flow_type == UDP_V6_FLOW) + ndc->udp6_l4_hash = false; + else + return -EOPNOTSUPP; + + return 0; + } + + return -EOPNOTSUPP; +} + +static int +netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info) +{ + struct net_device_context *ndc = netdev_priv(ndev); + + if (info->cmd == ETHTOOL_SRXFH) + return netvsc_set_rss_hash_opts(ndc, info); + return -EOPNOTSUPP; } @@ -1163,7 +1368,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *ndev = rcu_dereference(ndc->nvdev); + struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); struct rndis_device *rndis_dev; int i; @@ -1202,7 +1407,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - if (indir[i] >= VRSS_CHANNEL_MAX) + if (indir[i] >= ndev->num_chn) return -EINVAL; for (i = 0; i < ITAB_NUM; i++) @@ -1216,7 +1421,105 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, key = rndis_dev->rss_key; } - return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); + return rndis_filter_set_rss_param(rndis_dev, key); +} + +/* Hyper-V RNDIS protocol does not have ring in the HW sense. + * It does have pre-allocated receive area which is divided into sections. + */ +static void __netvsc_get_ringparam(struct netvsc_device *nvdev, + struct ethtool_ringparam *ring) +{ + u32 max_buf_size; + + ring->rx_pending = nvdev->recv_section_cnt; + ring->tx_pending = nvdev->send_section_cnt; + + if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + + ring->rx_max_pending = max_buf_size / nvdev->recv_section_size; + ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE + / nvdev->send_section_size; +} + +static void netvsc_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + + if (!nvdev) + return; + + __netvsc_get_ringparam(nvdev, ring); +} + +static int netvsc_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); + struct hv_device *hdev = ndevctx->device_ctx; + struct netvsc_device_info device_info; + struct ethtool_ringparam orig; + u32 new_tx, new_rx; + bool was_opened; + int ret = 0; + + if (!nvdev || nvdev->destroy) + return -ENODEV; + + memset(&orig, 0, sizeof(orig)); + __netvsc_get_ringparam(nvdev, &orig); + + new_tx = clamp_t(u32, ring->tx_pending, + NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending); + new_rx = clamp_t(u32, ring->rx_pending, + NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending); + + if (new_tx == orig.tx_pending && + new_rx == orig.rx_pending) + return 0; /* no change */ + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = nvdev->num_chn; + device_info.ring_size = ring_size; + device_info.send_sections = new_tx; + device_info.recv_sections = new_rx; + + netif_device_detach(ndev); + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); + + rndis_filter_device_remove(hdev, nvdev); + + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + + device_info.send_sections = orig.tx_pending; + device_info.recv_sections = orig.rx_pending; + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + netdev_err(ndev, "restoring ringparam failed: %ld\n", + PTR_ERR(nvdev)); + return ret; + } + } + + if (was_opened) + rndis_filter_open(nvdev); + netif_device_attach(ndev); + + /* We may have missed link change notifications */ + ndevctx->last_reconfig = 0; + schedule_delayed_work(&ndevctx->dwork, 0); + + return ret; } static const struct ethtool_ops ethtool_ops = { @@ -1229,12 +1532,15 @@ static const struct ethtool_ops ethtool_ops = { .set_channels = netvsc_set_channels, .get_ts_info = ethtool_op_get_ts_info, .get_rxnfc = netvsc_get_rxnfc, + .set_rxnfc = netvsc_set_rxnfc, .get_rxfh_key_size = netvsc_get_rxfh_key_size, .get_rxfh_indir_size = netvsc_rss_indir_size, .get_rxfh = netvsc_get_rxfh, .set_rxfh = netvsc_set_rxfh, .get_link_ksettings = netvsc_get_link_ksettings, .set_link_ksettings = netvsc_set_link_ksettings, + .get_ringparam = netvsc_get_ringparam, + .set_ringparam = netvsc_set_ringparam, }; static const struct net_device_ops device_ops = { @@ -1313,8 +1619,7 @@ static void netvsc_link_change(struct work_struct *w) case RNDIS_STATUS_MEDIA_CONNECT: if (rdev->link_state) { rdev->link_state = false; - if (!ndev_ctx->datapath) - netif_carrier_on(net); + netif_carrier_on(net); netif_tx_wake_all_queues(net); } else { notify = true; @@ -1391,7 +1696,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) continue; /* not a netvsc device */ net_device_ctx = netdev_priv(dev); - if (net_device_ctx->nvdev == NULL) + if (!rtnl_dereference(net_device_ctx->nvdev)) continue; /* device is removed */ if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) @@ -1401,6 +1706,108 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) return NULL; } +/* Called when VF is injecting data into network stack. + * Change the associated network device from VF to netvsc. + * note: already called with rcu_read_lock + */ +static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data); + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct netvsc_vf_pcpu_stats *pcpu_stats + = this_cpu_ptr(ndev_ctx->vf_stats); + + skb->dev = ndev; + + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + return RX_HANDLER_ANOTHER; +} + +static int netvsc_vf_join(struct net_device *vf_netdev, + struct net_device *ndev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + int ret; + + ret = netdev_rx_handler_register(vf_netdev, + netvsc_vf_handle_frame, ndev); + if (ret != 0) { + netdev_err(vf_netdev, + "can not register netvsc VF receive handler (err = %d)\n", + ret); + goto rx_handler_failed; + } + + ret = netdev_upper_dev_link(vf_netdev, ndev); + if (ret != 0) { + netdev_err(vf_netdev, + "can not set master device %s (err = %d)\n", + ndev->name, ret); + goto upper_link_failed; + } + + /* set slave flag before open to prevent IPv6 addrconf */ + vf_netdev->flags |= IFF_SLAVE; + + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + + call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); + + netdev_info(vf_netdev, "joined to %s\n", ndev->name); + return 0; + +upper_link_failed: + netdev_rx_handler_unregister(vf_netdev); +rx_handler_failed: + return ret; +} + +static void __netvsc_vf_setup(struct net_device *ndev, + struct net_device *vf_netdev) +{ + int ret; + + /* Align MTU of VF with master */ + ret = dev_set_mtu(vf_netdev, ndev->mtu); + if (ret) + netdev_warn(vf_netdev, + "unable to change mtu to %u\n", ndev->mtu); + + if (netif_running(ndev)) { + ret = dev_open(vf_netdev); + if (ret) + netdev_warn(vf_netdev, + "unable to open: %d\n", ret); + } +} + +/* Setup VF as slave of the synthetic device. + * Runs in workqueue to avoid recursion in netlink callbacks. + */ +static void netvsc_vf_setup(struct work_struct *w) +{ + struct net_device_context *ndev_ctx + = container_of(w, struct net_device_context, vf_takeover.work); + struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx); + struct net_device *vf_netdev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vf_takeover, 0); + return; + } + + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (vf_netdev) + __netvsc_vf_setup(ndev, vf_netdev); + + rtnl_unlock(); +} + static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device *ndev; @@ -1424,56 +1831,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev) if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; + if (netvsc_vf_join(vf_netdev, ndev) != 0) + return NOTIFY_DONE; + netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - /* - * Take a reference on the module. - */ - try_module_get(THIS_MODULE); dev_hold(vf_netdev); rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); return NOTIFY_OK; } -static int netvsc_vf_up(struct net_device *vf_netdev) +/* VF up/down change detected, schedule to change data path */ +static int netvsc_vf_changed(struct net_device *vf_netdev) { - struct net_device *ndev; - struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - - ndev = get_netvsc_byref(vf_netdev); - if (!ndev) - return NOTIFY_DONE; - - net_device_ctx = netdev_priv(ndev); - netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); - - netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - - /* - * Open the device before switching data path. - */ - rndis_filter_open(netvsc_dev); - - /* - * notify the host to switch the data path. - */ - netvsc_switch_datapath(ndev, true); - netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name); - - netif_carrier_off(ndev); - - /* Now notify peers through VF device. */ - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev); - - return NOTIFY_OK; -} - -static int netvsc_vf_down(struct net_device *vf_netdev) -{ - struct net_device *ndev; struct netvsc_device *netvsc_dev; - struct net_device_context *net_device_ctx; + struct net_device *ndev; + bool vf_is_up = netif_running(vf_netdev); ndev = get_netvsc_byref(vf_netdev); if (!ndev) @@ -1481,15 +1855,12 @@ static int netvsc_vf_down(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); + if (!netvsc_dev) + return NOTIFY_DONE; - netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_switch_datapath(ndev, false); - netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); - rndis_filter_close(netvsc_dev); - netif_carrier_on(ndev); - - /* Now notify peers through netvsc device. */ - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev); + netvsc_switch_datapath(ndev, vf_is_up); + netdev_info(ndev, "Data path switched %s VF: %s\n", + vf_is_up ? "to" : "from", vf_netdev->name); return NOTIFY_OK; } @@ -1504,12 +1875,15 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); + cancel_delayed_work_sync(&net_device_ctx->vf_takeover); netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); + netdev_rx_handler_unregister(vf_netdev); + netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); - module_put(THIS_MODULE); + return NOTIFY_OK; } @@ -1520,12 +1894,12 @@ static int netvsc_probe(struct hv_device *dev, struct net_device_context *net_device_ctx; struct netvsc_device_info device_info; struct netvsc_device *nvdev; - int ret; + int ret = -ENOMEM; net = alloc_etherdev_mq(sizeof(struct net_device_context), VRSS_CHANNEL_MAX); if (!net) - return -ENOMEM; + goto no_net; netif_carrier_off(net); @@ -1544,6 +1918,12 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); + INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + + net_device_ctx->vf_stats + = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); + if (!net_device_ctx->vf_stats) + goto no_stats; net->netdev_ops = &device_ops; net->ethtool_ops = ðtool_ops; @@ -1556,13 +1936,16 @@ static int netvsc_probe(struct hv_device *dev, memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; - ret = rndis_filter_device_add(dev, &device_info); - if (ret != 0) { + device_info.send_sections = NETVSC_DEFAULT_TX; + device_info.recv_sections = NETVSC_DEFAULT_RX; + + nvdev = rndis_filter_device_add(dev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - free_netdev(net); - hv_set_drvdata(dev, NULL); - return ret; + goto rndis_failed; } + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); /* hw_features computed in rndis_filter_device_add */ @@ -1571,11 +1954,11 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - /* RCU not necessary here, device not registered */ - nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); + netdev_lockdep_set_classes(net); + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) @@ -1586,20 +1969,29 @@ static int netvsc_probe(struct hv_device *dev, ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); - rndis_filter_device_remove(dev, nvdev); - free_netdev(net); + goto register_failed; } return ret; + +register_failed: + rndis_filter_device_remove(dev, nvdev); +rndis_failed: + free_percpu(net_device_ctx->vf_stats); +no_stats: + hv_set_drvdata(dev, NULL); + free_netdev(net); +no_net: + return ret; } static int netvsc_remove(struct hv_device *dev) { - struct net_device *net; struct net_device_context *ndev_ctx; + struct net_device *vf_netdev; + struct net_device *net; net = hv_get_drvdata(dev); - if (net == NULL) { dev_err(&dev->device, "No net device to remove\n"); return 0; @@ -1616,13 +2008,18 @@ static int netvsc_remove(struct hv_device *dev) * removed. Also blocks mtu and channel changes. */ rtnl_lock(); - rndis_filter_device_remove(dev, ndev_ctx->nvdev); - rtnl_unlock(); + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (vf_netdev) + netvsc_unregister_vf(vf_netdev); - unregister_netdev(net); + rndis_filter_device_remove(dev, + rtnl_dereference(ndev_ctx->nvdev)); + unregister_netdevice(net); + rtnl_unlock(); hv_set_drvdata(dev, NULL); + free_percpu(ndev_ctx->vf_stats); free_netdev(net); return 0; } @@ -1677,9 +2074,8 @@ static int netvsc_netdev_event(struct notifier_block *this, case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: - return netvsc_vf_up(event_dev); case NETDEV_DOWN: - return netvsc_vf_down(event_dev); + return netvsc_vf_changed(event_dev); default: return NOTIFY_DONE; } |
