aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
commit8d65b08debc7e62b2c6032d7fe7389d895b92cbc (patch)
tree0c3141b60c3a03cc32742b5750c5e763b9dae489 /drivers/net/hyperv
parentMerge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert... (diff)
parentMerge branch 'tipc-refactor-socket-receive-functions' (diff)
downloadlinux-8d65b08debc7e62b2c6032d7fe7389d895b92cbc.tar.gz
linux-8d65b08debc7e62b2c6032d7fe7389d895b92cbc.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar: "Here are some highlights from the 2065 networking commits that happened this development cycle: 1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri) 2) Add a generic XDP driver, so that anyone can test XDP even if they lack a networking device whose driver has explicit XDP support (me). 3) Sparc64 now has an eBPF JIT too (me) 4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei Starovoitov) 5) Make netfitler network namespace teardown less expensive (Florian Westphal) 6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana) 7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger) 8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky) 9) Multiqueue support in stmmac driver (Joao Pinto) 10) Remove TCP timewait recycling, it never really could possibly work well in the real world and timestamp randomization really zaps any hint of usability this feature had (Soheil Hassas Yeganeh) 11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay Aleksandrov) 12) Add socket busy poll support to epoll (Sridhar Samudrala) 13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso, and several others) 14) IPSEC hw offload infrastructure (Steffen Klassert)" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits) tipc: refactor function tipc_sk_recv_stream() tipc: refactor function tipc_sk_recvmsg() net: thunderx: Optimize page recycling for XDP net: thunderx: Support for XDP header adjustment net: thunderx: Add support for XDP_TX net: thunderx: Add support for XDP_DROP net: thunderx: Add basic XDP support net: thunderx: Cleanup receive buffer allocation net: thunderx: Optimize CQE_TX handling net: thunderx: Optimize RBDR descriptor handling net: thunderx: Support for page recycling ipx: call ipxitf_put() in ioctl error path net: sched: add helpers to handle extended actions qed*: Fix issues in the ptp filter config implementation. qede: Fix concurrency issue in PTP Tx path processing. stmmac: Add support for SIMATIC IOT2000 platform net: hns: fix ethtool_get_strings overflow in hns driver tcp: fix wraparound issue in tcp_lp bpf, arm64: fix jit branch offset related to ldimm64 bpf, arm64: implement jiting of BPF_XADD ...
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h19
-rw-r--r--drivers/net/hyperv/netvsc.c256
-rw-r--r--drivers/net/hyperv/netvsc_drv.c261
-rw-r--r--drivers/net/hyperv/rndis_filter.c115
4 files changed, 375 insertions, 276 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index db23cb36ae5c..262b2ea576a3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -196,6 +196,7 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
+int netvsc_poll(struct napi_struct *napi, int budget);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
int rndis_filter_device_add(struct hv_device *dev,
@@ -632,7 +633,7 @@ struct nvsp_message {
#define NETVSC_PACKET_SIZE 4096
-#define VRSS_SEND_TAB_SIZE 16
+#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
#define VRSS_CHANNEL_DEFAULT 8
@@ -685,7 +686,7 @@ struct net_device_context {
/* point back to our device context */
struct hv_device *device_ctx;
/* netvsc_device */
- struct netvsc_device *nvdev;
+ struct netvsc_device __rcu *nvdev;
/* reconfigure work */
struct delayed_work dwork;
/* last reconfig time */
@@ -707,9 +708,6 @@ struct net_device_context {
u32 speed;
struct netvsc_ethtool_stats eth_stats;
- /* the device is going away */
- bool start_remove;
-
/* State to manage the associated VF interface. */
struct net_device __rcu *vf_netdev;
@@ -722,6 +720,8 @@ struct net_device_context {
/* Per channel data */
struct netvsc_channel {
struct vmbus_channel *channel;
+ const struct vmpacket_descriptor *desc;
+ struct napi_struct napi;
struct multi_send_data msd;
struct multi_recv_comp mrc;
atomic_t queue_sends;
@@ -760,8 +760,8 @@ struct netvsc_device {
u32 max_chn;
u32 num_chn;
- spinlock_t sc_lock; /* Protects num_sc_offered variable */
- u32 num_sc_offered;
+
+ refcount_t sc_offered;
/* Holds rndis device info */
void *extension;
@@ -776,6 +776,8 @@ struct netvsc_device {
atomic_t open_cnt;
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
+
+ struct rcu_head rcu;
};
static inline struct netvsc_device *
@@ -1424,9 +1426,6 @@ struct rndis_message {
((void *) rndis_msg)
-#define __struct_bcount(x)
-
-
#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \
sizeof(union rndis_message_container))
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 15ef713d96c0..15749d359e60 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -80,8 +80,10 @@ static struct netvsc_device *alloc_net_device(void)
return net_device;
}
-static void free_netvsc_device(struct netvsc_device *nvdev)
+static void free_netvsc_device(struct rcu_head *head)
{
+ struct netvsc_device *nvdev
+ = container_of(head, struct netvsc_device, rcu);
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
@@ -90,14 +92,9 @@ static void free_netvsc_device(struct netvsc_device *nvdev)
kfree(nvdev);
}
-
-static inline bool netvsc_channel_idle(const struct netvsc_device *net_device,
- u16 q_idx)
+static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
{
- const struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
-
- return atomic_read(&net_device->num_outstanding_recvs) == 0 &&
- atomic_read(&nvchan->queue_sends) == 0;
+ call_rcu(&nvdev->rcu, free_netvsc_device);
}
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
@@ -138,6 +135,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
/*
* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
@@ -198,6 +202,15 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
+
/* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
*/
@@ -555,10 +568,11 @@ void netvsc_device_remove(struct hv_device *device)
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct netvsc_device *net_device = net_device_ctx->nvdev;
+ int i;
netvsc_disconnect_vsp(device);
- net_device_ctx->nvdev = NULL;
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
/*
* At this point, no one should be accessing net_device
@@ -569,8 +583,12 @@ void netvsc_device_remove(struct hv_device *device)
/* Now, we can close the channel safely */
vmbus_close(device->channel);
+ /* And dissassociate NAPI context from device */
+ for (i = 0; i < net_device->num_chn; i++)
+ netif_napi_del(&net_device->chan_table[i].napi);
+
/* Release all resources */
- free_netvsc_device(net_device);
+ free_netvsc_device_rcu(net_device);
}
#define RING_AVAIL_PERCENT_HIWATER 20
@@ -599,11 +617,11 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
static void netvsc_send_tx_complete(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -627,7 +645,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
tx_stats->bytes += packet->total_bytes;
u64_stats_update_end(&tx_stats->syncp);
- dev_consume_skb_any(skb);
+ napi_consume_skb(skb, budget);
}
queue_sends =
@@ -637,7 +655,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
wake_up(&net_device->wait_drain);
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
- !net_device_ctx->start_remove &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
queue_sends < 1))
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
@@ -646,14 +663,12 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
static void netvsc_send_completion(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct nvsp_message *nvsp_packet;
+ struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
struct net_device *ndev = hv_get_drvdata(device);
- nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
- (packet->offset8 << 3));
-
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -667,7 +682,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
netvsc_send_tx_complete(net_device, incoming_channel,
- device, packet);
+ device, desc, budget);
break;
default:
@@ -709,8 +724,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
packet->page_buf_cnt;
/* Add padding */
- if (skb && skb->xmit_more && remain &&
- !packet->cp_partial) {
+ if (skb->xmit_more && remain && !packet->cp_partial) {
padding = net_device->pkt_align - remain;
rndis_msg->msg_len += padding;
packet->total_data_buflen += padding;
@@ -868,9 +882,7 @@ int netvsc_send(struct hv_device *device,
if (msdp->pkt)
msd_len = msdp->pkt->total_data_buflen;
- try_batch = (skb != NULL) && msd_len > 0 && msdp->count <
- net_device->max_pkt;
-
+ try_batch = msd_len > 0 && msdp->count < net_device->max_pkt;
if (try_batch && msd_len + pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = msdp->pkt->send_buf_index;
@@ -880,7 +892,7 @@ int netvsc_send(struct hv_device *device,
section_index = msdp->pkt->send_buf_index;
packet->cp_partial = true;
- } else if ((skb != NULL) && pktlen + net_device->pkt_align <
+ } else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
if (section_index != NETVSC_INVALID_INDEX) {
@@ -1065,28 +1077,29 @@ static inline struct recv_comp_data *get_recv_comp_slot(
return rcd;
}
-static void netvsc_receive(struct net_device *ndev,
+static int netvsc_receive(struct net_device *ndev,
struct netvsc_device *net_device,
struct net_device_context *net_device_ctx,
struct hv_device *device,
struct vmbus_channel *channel,
- struct vmtransfer_page_packet_header *vmxferpage_packet,
+ const struct vmpacket_descriptor *desc,
struct nvsp_message *nvsp)
{
+ const struct vmtransfer_page_packet_header *vmxferpage_packet
+ = container_of(desc, const struct vmtransfer_page_packet_header, d);
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
char *recv_buf = net_device->recv_buf;
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
int ret;
- struct recv_comp_data *rcd;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
netif_err(net_device_ctx, rx_err, ndev,
"Unknown nvsp packet type received %u\n",
nvsp->hdr.msg_type);
- return;
+ return 0;
}
if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
@@ -1094,7 +1107,7 @@ static void netvsc_receive(struct net_device *ndev,
"Invalid xfer page set id - expecting %x got %x\n",
NETVSC_RECEIVE_BUFFER_ID,
vmxferpage_packet->xfer_pageset_id);
- return;
+ return 0;
}
count = vmxferpage_packet->range_cnt;
@@ -1110,26 +1123,26 @@ static void netvsc_receive(struct net_device *ndev,
channel, data, buflen);
}
- if (!net_device->chan_table[q_idx].mrc.buf) {
+ if (net_device->chan_table[q_idx].mrc.buf) {
+ struct recv_comp_data *rcd;
+
+ rcd = get_recv_comp_slot(net_device, channel, q_idx);
+ if (rcd) {
+ rcd->tid = vmxferpage_packet->d.trans_id;
+ rcd->status = status;
+ } else {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, vmxferpage_packet->d.trans_id);
+ }
+ } else {
ret = netvsc_send_recv_completion(channel,
vmxferpage_packet->d.trans_id,
status);
if (ret)
netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
q_idx, vmxferpage_packet->d.trans_id, ret);
- return;
}
-
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
-
- if (!rcd) {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- return;
- }
-
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
+ return count;
}
static void netvsc_send_table(struct hv_device *hdev,
@@ -1175,28 +1188,25 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
}
}
-static void netvsc_process_raw_pkt(struct hv_device *device,
- struct vmbus_channel *channel,
- struct netvsc_device *net_device,
- struct net_device *ndev,
- u64 request_id,
- struct vmpacket_descriptor *desc)
+static int netvsc_process_raw_pkt(struct hv_device *device,
+ struct vmbus_channel *channel,
+ struct netvsc_device *net_device,
+ struct net_device *ndev,
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct nvsp_message *nvmsg
- = (struct nvsp_message *)((unsigned long)desc
- + (desc->offset8 << 3));
+ struct nvsp_message *nvmsg = hv_pkt_data(desc);
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(net_device, channel, device, desc);
+ netvsc_send_completion(net_device, channel, device,
+ desc, budget);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(ndev, net_device, net_device_ctx,
- device, channel,
- (struct vmtransfer_page_packet_header *)desc,
- nvmsg);
+ return netvsc_receive(ndev, net_device, net_device_ctx,
+ device, channel, desc, nvmsg);
break;
case VM_PKT_DATA_INBAND:
@@ -1205,53 +1215,74 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
default:
netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
- desc->type, request_id);
+ desc->type, desc->trans_id);
break;
}
+
+ return 0;
}
-void netvsc_channel_cb(void *context)
+static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
{
- struct vmbus_channel *channel = context;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
- struct hv_device *device;
- struct netvsc_device *net_device;
- struct vmpacket_descriptor *desc;
- struct net_device *ndev;
- bool need_to_commit = false;
+ struct vmbus_channel *primary = channel->primary_channel;
- if (channel->primary_channel != NULL)
- device = channel->primary_channel->device_obj;
- else
- device = channel->device_obj;
+ return primary ? primary->device_obj : channel->device_obj;
+}
- ndev = hv_get_drvdata(device);
- if (unlikely(!ndev))
- return;
+/* Network processing softirq
+ * Process data in incoming ring buffer from host
+ * Stops when ring is empty or budget is met or exceeded.
+ */
+int netvsc_poll(struct napi_struct *napi, int budget)
+{
+ struct netvsc_channel *nvchan
+ = container_of(napi, struct netvsc_channel, napi);
+ struct vmbus_channel *channel = nvchan->channel;
+ struct hv_device *device = netvsc_channel_to_device(channel);
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ struct net_device *ndev = hv_get_drvdata(device);
+ struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ int work_done = 0;
- net_device = net_device_to_netvsc_device(ndev);
- if (unlikely(!net_device))
- return;
+ /* If starting a new interval */
+ if (!nvchan->desc)
+ nvchan->desc = hv_pkt_iter_first(channel);
- if (unlikely(net_device->destroy &&
- netvsc_channel_idle(net_device, q_idx)))
- return;
+ while (nvchan->desc && work_done < budget) {
+ work_done += netvsc_process_raw_pkt(device, channel, net_device,
+ ndev, nvchan->desc, budget);
+ nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
+ }
- /* commit_rd_index() -> hv_signal_on_read() needs this. */
- init_cached_read_index(channel);
+ /* If receive ring was exhausted
+ * and not doing busy poll
+ * then re-enable host interrupts
+ * and reschedule if ring is not empty.
+ */
+ if (work_done < budget &&
+ napi_complete_done(napi, work_done) &&
+ hv_end_read(&channel->inbound) != 0)
+ napi_reschedule(napi);
- while ((desc = get_next_pkt_raw(channel)) != NULL) {
- netvsc_process_raw_pkt(device, channel, net_device,
- ndev, desc->trans_id, desc);
+ netvsc_chk_recv_comp(net_device, channel, q_idx);
- put_pkt_raw(channel, desc);
- need_to_commit = true;
- }
+ /* Driver may overshoot since multiple packets per descriptor */
+ return min(work_done, budget);
+}
- if (need_to_commit)
- commit_rd_index(channel);
+/* Call back when data is available in host ring buffer.
+ * Processing is deferred until network softirq (NAPI)
+ */
+void netvsc_channel_cb(void *context)
+{
+ struct netvsc_channel *nvchan = context;
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ if (napi_schedule_prep(&nvchan->napi)) {
+ /* disable interupts from host */
+ hv_begin_read(&nvchan->channel->inbound);
+
+ __napi_schedule(&nvchan->napi);
+ }
}
/*
@@ -1273,10 +1304,29 @@ int netvsc_device_add(struct hv_device *device,
net_device->ring_size = ring_size;
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(device->channel, HV_CALL_ISR);
+
+ /* If we're reopening the device we may have multiple queues, fill the
+ * chn_table with the default channel to use it before subchannels are
+ * opened.
+ * Initialize the channel state before we open;
+ * we can be interrupted as soon as we open the channel.
+ */
+
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ struct netvsc_channel *nvchan = &net_device->chan_table[i];
+
+ nvchan->channel = device->channel;
+ }
+
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, device->channel);
+ netvsc_channel_cb,
+ net_device->chan_table);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -1286,19 +1336,15 @@ int netvsc_device_add(struct hv_device *device,
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
- /* If we're reopening the device we may have multiple queues, fill the
- * chn_table with the default channel to use it before subchannels are
- * opened.
- */
- for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- net_device->chan_table[i].channel = device->channel;
+ /* Enable NAPI handler for init callbacks */
+ netif_napi_add(ndev, &net_device->chan_table[0].napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+ napi_enable(&net_device->chan_table[0].napi);
/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
* populated.
*/
- wmb();
-
- net_device_ctx->nvdev = net_device;
+ rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device);
@@ -1311,11 +1357,13 @@ int netvsc_device_add(struct hv_device *device,
return ret;
close:
+ netif_napi_del(&net_device->chan_table[0].napi);
+
/* Now, we can close the channel safely */
vmbus_close(device->channel);
cleanup:
- free_netvsc_device(net_device);
+ free_netvsc_device(&net_device->rcu);
return ret;
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5ede87f30463..4421a6d00375 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -62,7 +62,7 @@ static void do_set_multicast(struct work_struct *w)
container_of(w, struct net_device_context, work);
struct hv_device *device_obj = ndevctx->device_ctx;
struct net_device *ndev = hv_get_drvdata(device_obj);
- struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
struct rndis_device *rdev;
if (!nvdev)
@@ -116,7 +116,7 @@ static int netvsc_open(struct net_device *net)
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret;
u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
@@ -191,6 +191,54 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
+/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
+ * hash for non-TCP traffic with only IP numbers.
+ */
+static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+{
+ struct flow_keys flow;
+ u32 hash;
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+ return 0;
+
+ if (flow.basic.ip_proto == IPPROTO_TCP) {
+ return skb_get_hash(skb);
+ } else {
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
+ else
+ hash = 0;
+
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+ }
+
+ return hash;
+}
+
+static inline int netvsc_get_tx_queue(struct net_device *ndev,
+ struct sk_buff *skb, int old_idx)
+{
+ const struct net_device_context *ndc = netdev_priv(ndev);
+ struct sock *sk = skb->sk;
+ int q_idx;
+
+ q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ (VRSS_SEND_TAB_SIZE - 1)];
+
+ /* If queue index changed record the new value */
+ if (q_idx != old_idx &&
+ sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, q_idx);
+
+ return q_idx;
+}
+
/*
* Select queue for transmit.
*
@@ -205,24 +253,22 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
unsigned int num_tx_queues = ndev->real_num_tx_queues;
- struct sock *sk = skb->sk;
- int q_idx = sk_tx_queue_get(sk);
-
- if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
- u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
- int new_idx;
-
- new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
+ int q_idx = sk_tx_queue_get(skb->sk);
- if (q_idx != new_idx && sk &&
- sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_idx);
-
- q_idx = new_idx;
+ if (q_idx < 0 || skb->ooo_okay) {
+ /* If forwarding a packet, we use the recorded queue when
+ * available for better cache locality.
+ */
+ if (skb_rx_queue_recorded(skb))
+ q_idx = skb_get_rx_queue(skb);
+ else
+ q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
}
+ while (unlikely(q_idx >= num_tx_queues))
+ q_idx -= num_tx_queues;
+
return q_idx;
}
@@ -584,13 +630,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
+ struct napi_struct *napi,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan,
void *data, u32 buflen)
{
struct sk_buff *skb;
- skb = netdev_alloc_skb_ip_align(net, buflen);
+ skb = napi_alloc_skb(napi, buflen);
if (!skb)
return skb;
@@ -636,12 +683,12 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_pkt_8021q_info *vlan)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *net_device = net_device_ctx->nvdev;
+ struct netvsc_device *net_device;
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
struct net_device *vf_netdev;
struct sk_buff *skb;
struct netvsc_stats *rx_stats;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
-
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
@@ -654,13 +701,20 @@ int netvsc_recv_callback(struct net_device *net,
* interface in the guest.
*/
rcu_read_lock();
+ net_device = rcu_dereference(net_device_ctx->nvdev);
+ if (unlikely(!net_device))
+ goto drop;
+
+ nvchan = &net_device->chan_table[q_idx];
vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
if (vf_netdev && (vf_netdev->flags & IFF_UP))
net = vf_netdev;
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len);
+ skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
+ csum_info, vlan, data, len);
if (unlikely(!skb)) {
+drop:
++net->stats.rx_dropped;
rcu_read_unlock();
return NVSP_STAT_FAIL;
@@ -674,7 +728,7 @@ int netvsc_recv_callback(struct net_device *net,
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = &net_device->chan_table[q_idx].rx_stats;
+ rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
rx_stats->bytes += len;
@@ -685,12 +739,7 @@ int netvsc_recv_callback(struct net_device *net,
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
- /*
- * Pass the skb back up. Network stack will deallocate the skb when it
- * is done.
- * TODO - use NAPI?
- */
- netif_receive_skb(skb);
+ napi_gro_receive(&nvchan->napi, skb);
rcu_read_unlock();
return 0;
@@ -707,7 +756,7 @@ static void netvsc_get_channels(struct net_device *net,
struct ethtool_channels *channel)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
if (nvdev) {
channel->max_combined = nvdev->max_chn;
@@ -744,8 +793,9 @@ static int netvsc_set_channels(struct net_device *net,
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
unsigned int count = channels->combined_count;
+ bool was_running;
int ret;
/* We do not support separate count for rx, tx, or other */
@@ -756,7 +806,7 @@ static int netvsc_set_channels(struct net_device *net,
if (count > net->num_tx_queues || count > net->num_rx_queues)
return -EINVAL;
- if (net_device_ctx->start_remove || !nvdev || nvdev->destroy)
+ if (!nvdev || nvdev->destroy)
return -ENODEV;
if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5)
@@ -765,11 +815,13 @@ static int netvsc_set_channels(struct net_device *net,
if (count > nvdev->max_chn)
return -EINVAL;
- ret = netvsc_close(net);
- if (ret)
- return ret;
+ was_running = netif_running(net);
+ if (was_running) {
+ ret = netvsc_close(net);
+ if (ret)
+ return ret;
+ }
- net_device_ctx->start_remove = true;
rndis_filter_device_remove(dev, nvdev);
ret = netvsc_set_queues(net, dev, count);
@@ -778,8 +830,8 @@ static int netvsc_set_channels(struct net_device *net,
else
netvsc_set_queues(net, dev, nvdev->num_chn);
- netvsc_open(net);
- net_device_ctx->start_remove = false;
+ if (was_running)
+ ret = netvsc_open(net);
/* We may have missed link change notifications */
schedule_delayed_work(&net_device_ctx->dwork, 0);
@@ -787,18 +839,19 @@ static int netvsc_set_channels(struct net_device *net,
return ret;
}
-static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd)
+static bool
+netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
{
- struct ethtool_cmd diff1 = *cmd;
- struct ethtool_cmd diff2 = {};
+ struct ethtool_link_ksettings diff1 = *cmd;
+ struct ethtool_link_ksettings diff2 = {};
- ethtool_cmd_speed_set(&diff1, 0);
- diff1.duplex = 0;
+ diff1.base.speed = 0;
+ diff1.base.duplex = 0;
/* advertising and cmd are usually set */
- diff1.advertising = 0;
- diff1.cmd = 0;
+ ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
+ diff1.base.cmd = 0;
/* We set port to PORT_OTHER */
- diff2.port = PORT_OTHER;
+ diff2.base.port = PORT_OTHER;
return !memcmp(&diff1, &diff2, sizeof(diff1));
}
@@ -808,33 +861,35 @@ static void netvsc_init_settings(struct net_device *dev)
struct net_device_context *ndc = netdev_priv(dev);
ndc->speed = SPEED_UNKNOWN;
- ndc->duplex = DUPLEX_UNKNOWN;
+ ndc->duplex = DUPLEX_FULL;
}
-static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
- ethtool_cmd_speed_set(cmd, ndc->speed);
- cmd->duplex = ndc->duplex;
- cmd->port = PORT_OTHER;
+ cmd->base.speed = ndc->speed;
+ cmd->base.duplex = ndc->duplex;
+ cmd->base.port = PORT_OTHER;
return 0;
}
-static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
u32 speed;
- speed = ethtool_cmd_speed(cmd);
+ speed = cmd->base.speed;
if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->duplex) ||
+ !ethtool_validate_duplex(cmd->base.duplex) ||
!netvsc_validate_ethtool_ss_cmd(cmd))
return -EINVAL;
ndc->speed = speed;
- ndc->duplex = cmd->duplex;
+ ndc->duplex = cmd->base.duplex;
return 0;
}
@@ -842,24 +897,27 @@ static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
- struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
struct netvsc_device_info device_info;
- int ret;
+ bool was_running;
+ int ret = 0;
- if (ndevctx->start_remove || !nvdev || nvdev->destroy)
+ if (!nvdev || nvdev->destroy)
return -ENODEV;
- ret = netvsc_close(ndev);
- if (ret)
- goto out;
+ was_running = netif_running(ndev);
+ if (was_running) {
+ ret = netvsc_close(ndev);
+ if (ret)
+ return ret;
+ }
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
device_info.max_num_vrss_chns = nvdev->num_chn;
- ndevctx->start_remove = true;
rndis_filter_device_remove(hdev, nvdev);
/* 'nvdev' has been freed in rndis_filter_device_remove() ->
@@ -872,9 +930,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
rndis_filter_device_add(hdev, &device_info);
-out:
- netvsc_open(ndev);
- ndevctx->start_remove = false;
+ if (was_running)
+ ret = netvsc_open(ndev);
/* We may have missed link change notifications */
schedule_delayed_work(&ndevctx->dwork, 0);
@@ -886,7 +943,7 @@ static void netvsc_get_stats64(struct net_device *net,
struct rtnl_link_stats64 *t)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = ndev_ctx->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
int i;
if (!nvdev)
@@ -971,7 +1028,10 @@ static const struct {
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
switch (string_set) {
case ETH_SS_STATS:
@@ -985,13 +1045,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
const struct netvsc_stats *qstats;
unsigned int start;
u64 packets, bytes;
int i, j;
+ if (!nvdev)
+ return;
+
for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
@@ -1020,10 +1083,13 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
u8 *p = data;
int i;
+ if (!nvdev)
+ return;
+
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
@@ -1075,7 +1141,10 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
u32 *rules)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
switch (info->cmd) {
case ETHTOOL_GRXRINGS:
@@ -1111,13 +1180,17 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = ndc->nvdev;
- struct rndis_device *rndis_dev = ndev->extension;
+ struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
int i;
+ if (!ndev)
+ return -ENODEV;
+
if (hfunc)
*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
+ rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
indir[i] = rndis_dev->ind_table[i];
@@ -1133,13 +1206,17 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = ndc->nvdev;
- struct rndis_device *rndis_dev = ndev->extension;
+ struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
int i;
+ if (!ndev)
+ return -ENODEV;
+
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
+ rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
if (indir[i] >= dev->num_rx_queues)
@@ -1168,13 +1245,13 @@ static const struct ethtool_ops ethtool_ops = {
.get_channels = netvsc_get_channels,
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
- .get_settings = netvsc_get_settings,
- .set_settings = netvsc_set_settings,
.get_rxnfc = netvsc_get_rxnfc,
.get_rxfh_key_size = netvsc_get_rxfh_key_size,
.get_rxfh_indir_size = netvsc_rss_indir_size,
.get_rxfh = netvsc_get_rxfh,
.set_rxfh = netvsc_set_rxfh,
+ .get_link_ksettings = netvsc_get_link_ksettings,
+ .set_link_ksettings = netvsc_set_link_ksettings,
};
static const struct net_device_ops device_ops = {
@@ -1210,10 +1287,10 @@ static void netvsc_link_change(struct work_struct *w)
unsigned long flags, next_reconfig, delay;
rtnl_lock();
- if (ndev_ctx->start_remove)
+ net_device = rtnl_dereference(ndev_ctx->nvdev);
+ if (!net_device)
goto out_unlock;
- net_device = ndev_ctx->nvdev;
rdev = net_device->extension;
next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
@@ -1354,7 +1431,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
return NOTIFY_DONE;
@@ -1380,7 +1457,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
@@ -1414,7 +1491,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
netvsc_switch_datapath(ndev, false);
@@ -1474,8 +1551,6 @@ static int netvsc_probe(struct hv_device *dev,
hv_set_drvdata(dev, net);
- net_device_ctx->start_remove = false;
-
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
@@ -1492,8 +1567,7 @@ static int netvsc_probe(struct hv_device *dev,
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
- device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT,
- num_online_cpus());
+ device_info.num_chn = VRSS_CHANNEL_DEFAULT;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
@@ -1509,6 +1583,7 @@ static int netvsc_probe(struct hv_device *dev,
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
+ /* RCU not necessary here, device not registered */
nvdev = net_device_ctx->nvdev;
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
@@ -1544,26 +1619,20 @@ static int netvsc_remove(struct hv_device *dev)
ndev_ctx = netdev_priv(net);
- /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels()
- * removing the device.
- */
- rtnl_lock();
- ndev_ctx->start_remove = true;
- rtnl_unlock();
+ netif_device_detach(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
cancel_work_sync(&ndev_ctx->work);
- /* Stop outbound asap */
- netif_tx_disable(net);
-
- unregister_netdev(net);
-
/*
* Call to the vsc driver to let it know that the device is being
- * removed
+ * removed. Also blocks mtu and channel changes.
*/
+ rtnl_lock();
rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+ rtnl_unlock();
+
+ unregister_netdev(net);
hv_set_drvdata(dev, NULL);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 19356f56b7b1..ab92c3c95951 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -819,16 +819,14 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
{
struct rndis_request *request;
struct rndis_set_request *set;
- struct rndis_set_complete *set_complete;
int ret;
request = get_rndis_request(dev, RNDIS_MSG_SET,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
sizeof(u32));
- if (!request) {
- ret = -ENOMEM;
- goto cleanup;
- }
+ if (!request)
+ return -ENOMEM;
+
/* Setup the rndis set */
set = &request->request_msg.msg.set_req;
@@ -840,15 +838,11 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
&new_filter, sizeof(u32));
ret = rndis_filter_send_request(dev, request);
- if (ret != 0)
- goto cleanup;
+ if (ret == 0)
+ wait_for_completion(&request->wait_event);
- wait_for_completion(&request->wait_event);
+ put_rndis_request(dev, request);
- set_complete = &request->response_msg.msg.set_complete;
-cleanup:
- if (request)
- put_rndis_request(dev, request);
return ret;
}
@@ -926,8 +920,6 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
struct rndis_halt_request *halt;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
struct netvsc_device *nvdev = net_device_ctx->nvdev;
- struct hv_device *hdev = net_device_ctx->device_ctx;
- ulong flags;
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -945,9 +937,10 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
dev->state = RNDIS_DEV_UNINITIALIZED;
cleanup:
- spin_lock_irqsave(&hdev->channel->inbound_lock, flags);
nvdev->destroy = true;
- spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags);
+
+ /* Force flag to be ordered before waiting */
+ wmb();
/* Wait for all send completions */
wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
@@ -996,26 +989,38 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
hv_get_drvdata(new_sc->primary_channel->device_obj);
struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
int ret;
- unsigned long flags;
if (chn_index >= nvscdev->num_chn)
return;
- nvscdev->chan_table[chn_index].mrc.buf
+ nvchan = nvscdev->chan_table + chn_index;
+ nvchan->mrc.buf
= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
+ if (!nvchan->mrc.buf)
+ return;
+
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(new_sc, HV_CALL_ISR);
+
+ /* Set the channel before opening.*/
+ nvchan->channel = new_sc;
+ netif_napi_add(ndev, &nvchan->napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+
ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
nvscdev->ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, new_sc);
-
+ netvsc_channel_cb, nvchan);
if (ret == 0)
- nvscdev->chan_table[chn_index].channel = new_sc;
+ napi_enable(&nvchan->napi);
+ else
+ netdev_err(ndev, "sub channel open failed (%d)\n", ret);
- spin_lock_irqsave(&nvscdev->sc_lock, flags);
- nvscdev->num_sc_offered--;
- spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
- if (nvscdev->num_sc_offered == 0)
+ if (refcount_dec_and_test(&nvscdev->sc_offered))
complete(&nvscdev->channel_init_wait);
}
@@ -1032,12 +1037,9 @@ int rndis_filter_device_add(struct hv_device *dev,
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
unsigned int gso_max_size = GSO_MAX_SIZE;
- u32 mtu, size;
- u32 num_rss_qs;
- u32 sc_delta;
+ u32 mtu, size, num_rss_qs;
const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
- unsigned long flags;
int i, ret;
rndis_device = get_rndis_device();
@@ -1060,7 +1062,7 @@ int rndis_filter_device_add(struct hv_device *dev,
net_device->max_chn = 1;
net_device->num_chn = 1;
- spin_lock_init(&net_device->sc_lock);
+ refcount_set(&net_device->sc_offered, 0);
net_device->extension = rndis_device;
rndis_device->ndev = net;
@@ -1174,34 +1176,30 @@ int rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
- net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
-
- num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
-
/*
* We will limit the VRSS channels to the number CPUs in the NUMA node
* the primary channel is currently bound to.
+ *
+ * This also guarantees that num_possible_rss_qs <= num_online_cpus
*/
node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
- num_possible_rss_qs = cpumask_weight(node_cpu_mask);
+ num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
+ rsscap.num_recv_que);
- /* We will use the given number of channels if available. */
- if (device_info->num_chn && device_info->num_chn < net_device->max_chn)
- net_device->num_chn = device_info->num_chn;
- else
- net_device->num_chn = min(num_possible_rss_qs, num_rss_qs);
+ net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
- num_rss_qs = net_device->num_chn - 1;
+ /* We will use the given number of channels if available. */
+ net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
for (i = 0; i < ITAB_NUM; i++)
rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
net_device->num_chn);
- net_device->num_sc_offered = num_rss_qs;
-
- if (net_device->num_chn == 1)
- goto out;
+ num_rss_qs = net_device->num_chn - 1;
+ if (num_rss_qs == 0)
+ return 0;
+ refcount_set(&net_device->sc_offered, num_rss_qs);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
init_packet = &net_device->channel_init_pkt;
@@ -1217,32 +1215,23 @@ int rndis_filter_device_add(struct hv_device *dev,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
goto out;
- wait_for_completion(&net_device->channel_init_wait);
- if (init_packet->msg.v5_msg.subchn_comp.status !=
- NVSP_STAT_SUCCESS) {
+ if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
ret = -ENODEV;
goto out;
}
+ wait_for_completion(&net_device->channel_init_wait);
+
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
- ret = rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
- net_device->num_chn);
-
- /*
- * Set the number of sub-channels to be received.
- */
- spin_lock_irqsave(&net_device->sc_lock, flags);
- sc_delta = num_rss_qs - (net_device->num_chn - 1);
- net_device->num_sc_offered -= sc_delta;
- spin_unlock_irqrestore(&net_device->sc_lock, flags);
-
+ /* ignore failues from setting rss parameters, still have channels */
+ rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
+ net_device->num_chn);
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
- net_device->num_sc_offered = 0;
}
return 0; /* return 0 because primary channel can be used alone */
@@ -1257,12 +1246,6 @@ void rndis_filter_device_remove(struct hv_device *dev,
{
struct rndis_device *rndis_dev = net_dev->extension;
- /* If not all subchannel offers are complete, wait for them until
- * completion to avoid race.
- */
- if (net_dev->num_sc_offered > 0)
- wait_for_completion(&net_dev->channel_init_wait);
-
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);