From 30636258a7c9174be44ddb2318bae4f66d4beab0 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Thu, 30 May 2024 11:41:43 +0800 Subject: virtio_net: fix missing lock protection on control_buf access Refactored the handling of control_buf to be within the cvq_lock critical section, mitigating race conditions between reading device responses and new command submissions. Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.") Signed-off-by: Heng Qi Reviewed-by: Hariprasad Kelam Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240530034143.19579-1-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4a802c0ea2cb..1ea8e6a24286 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd { struct scatterlist *sgs[5], hdr, stat; u32 out_num = 0, tmp, in_num = 0; + bool ok; int ret; /* Caller should know better */ @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd } unlock: + ok = vi->ctrl->status == VIRTIO_NET_OK; mutex_unlock(&vi->cvq_lock); - return vi->ctrl->status == VIRTIO_NET_OK; + return ok; } static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, -- cgit v1.2.3 From 9e0945b1901c9eed4fbee3b8a3870487b2bdc936 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Tue, 28 May 2024 21:41:15 +0800 Subject: virtio_net: fix possible dim status unrecoverable When the dim worker is scheduled, if it no longer needs to issue commands, dim may not be able to return to the working state later. For example, the following single queue scenario: 1. The dim worker of rxq0 is scheduled, and the dim status is changed to DIM_APPLY_NEW_PROFILE; 2. dim is disabled or parameters have not been modified; 3. virtnet_rx_dim_work exits directly; Then, even if net_dim is invoked again, it cannot work because the state is not restored to DIM_START_MEASURE. Fixes: 6208799553a8 ("virtio-net: support rx netdim") Signed-off-by: Heng Qi Reviewed-by: Jiri Pirko Acked-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240528134116.117426-2-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1ea8e6a24286..774a292a6090 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -4419,9 +4419,9 @@ static void virtnet_rx_dim_work(struct work_struct *work) if (err) pr_debug("%s: Failed to send dim parameters on rxq%d\n", dev->name, qnum); - dim->state = DIM_START_MEASURE; } out: + dim->state = DIM_START_MEASURE; mutex_unlock(&rq->dim_lock); } -- cgit v1.2.3 From d1f0bd01bc58f35b5353ad9dbe5f7249a8f3368e Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Tue, 28 May 2024 21:41:16 +0800 Subject: virtio_net: fix a spurious deadlock issue When the following snippet is run, lockdep will report a deadlock[1]. /* Acquire all queues dim_locks */ for (i = 0; i < vi->max_queue_pairs; i++) mutex_lock(&vi->rq[i].dim_lock); There's no deadlock here because the vq locks are always taken in the same order, but lockdep can not figure it out. So refactoring the code to alleviate the problem. [1] ======================================================== WARNING: possible recursive locking detected 6.9.0-rc7+ #319 Not tainted -------------------------------------------- ethtool/962 is trying to acquire lock: but task is already holding lock: other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&vi->rq[i].dim_lock); lock(&vi->rq[i].dim_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by ethtool/962: #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at: ethnl_default_set_doit+0xbe/0x1e0 stack backtrace: CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x79/0xb0 check_deadlock+0x130/0x220 __lock_acquire+0x861/0x990 lock_acquire.part.0+0x72/0x1d0 ? lock_acquire+0xf8/0x130 __mutex_lock+0x71/0xd50 virtnet_set_coalesce+0x151/0x190 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0 ethnl_set_coalesce+0x34/0x90 ethnl_default_set_doit+0xdd/0x1e0 genl_family_rcv_msg_doit+0xdc/0x130 genl_family_rcv_msg+0x154/0x230 ? __pfx_ethnl_default_set_doit+0x10/0x10 genl_rcv_msg+0x4b/0xa0 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x1af/0x280 netlink_sendmsg+0x20e/0x460 __sys_sendto+0x1fe/0x210 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x3a2/0x8a0 ? __lock_release+0x5e/0x160 ? do_user_addr_fault+0x3a2/0x8a0 ? lock_release+0x72/0x140 ? do_user_addr_fault+0x3a7/0x8a0 __x64_sys_sendto+0x29/0x30 do_syscall_64+0x78/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce") Signed-off-by: Heng Qi Acked-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 774a292a6090..61a57d134544 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -4259,7 +4259,6 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL; bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce; struct scatterlist sgs_rx; - int ret = 0; int i; if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) @@ -4269,27 +4268,27 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets)) return -EINVAL; - /* Acquire all queues dim_locks */ - for (i = 0; i < vi->max_queue_pairs; i++) - mutex_lock(&vi->rq[i].dim_lock); - if (rx_ctrl_dim_on && !vi->rx_dim_enabled) { vi->rx_dim_enabled = true; - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { + mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].dim_enabled = true; - goto unlock; + mutex_unlock(&vi->rq[i].dim_lock); + } + return 0; } coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL); - if (!coal_rx) { - ret = -ENOMEM; - goto unlock; - } + if (!coal_rx) + return -ENOMEM; if (!rx_ctrl_dim_on && vi->rx_dim_enabled) { vi->rx_dim_enabled = false; - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { + mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].dim_enabled = false; + mutex_unlock(&vi->rq[i].dim_lock); + } } /* Since the per-queue coalescing params can be set, @@ -4302,22 +4301,19 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi, if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, - &sgs_rx)) { - ret = -EINVAL; - goto unlock; - } + &sgs_rx)) + return -EINVAL; vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; for (i = 0; i < vi->max_queue_pairs; i++) { + mutex_lock(&vi->rq[i].dim_lock); vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; - } -unlock: - for (i = vi->max_queue_pairs - 1; i >= 0; i--) mutex_unlock(&vi->rq[i].dim_lock); + } - return ret; + return 0; } static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, -- cgit v1.2.3 From 604141c036e1b636e2a71cf6e1aa09d1e45f40c2 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Mon, 17 Jun 2024 21:15:23 +0800 Subject: virtio_net: checksum offloading handling fix In virtio spec 0.95, VIRTIO_NET_F_GUEST_CSUM was designed to handle partially checksummed packets, and the validation of fully checksummed packets by the device is independent of VIRTIO_NET_F_GUEST_CSUM negotiation. However, the specification erroneously stated: "If VIRTIO_NET_F_GUEST_CSUM is not negotiated, the device MUST set flags to zero and SHOULD supply a fully checksummed packet to the driver." This statement is inaccurate because even without VIRTIO_NET_F_GUEST_CSUM negotiation, the device can still set the VIRTIO_NET_HDR_F_DATA_VALID flag. Essentially, the device can facilitate the validation of these packets' checksums - a process known as RX checksum offloading - removing the need for the driver to do so. This scenario is currently not implemented in the driver and requires correction. The necessary specification correction[1] has been made and approved in the virtio TC vote. [1] https://lists.oasis-open.org/archives/virtio-comment/202401/msg00011.html Fixes: 4f49129be6fa ("virtio-net: Set RXCSUM feature if GUEST_CSUM is available") Signed-off-by: Heng Qi Reviewed-by: Jiri Pirko Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 61a57d134544..aa70a7ed8072 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5666,8 +5666,16 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) - dev->features |= NETIF_F_RXCSUM; + + /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't + * need to calculate checksums for partially checksummed packets, + * as they're considered valid by the upper layer. + * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only + * receives fully checksummed packets. The device may assist in + * validating these packets' checksums, so the driver won't have to. + */ + dev->features |= NETIF_F_RXCSUM; + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; -- cgit v1.2.3 From 703eec1b242276f2d97d98f04790ddad319ddde4 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Mon, 17 Jun 2024 21:15:24 +0800 Subject: virtio_net: fixing XDP for fully checksummed packets handling The XDP program can't correctly handle partially checksummed packets, but works fine with fully checksummed packets. If the device has already validated fully checksummed packets, then the driver doesn't need to re-validate them, saving CPU resources. Additionally, the driver does not drop all partially checksummed packets when VIRTIO_NET_F_GUEST_CSUM is not negotiated. This is not a bug, as the driver has always done this. Fixes: 436c9453a1ac ("virtio-net: keep vnet header zeroed after processing XDP") Signed-off-by: Heng Qi Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/net/virtio_net.c') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index aa70a7ed8072..ea10db9a09fa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1360,6 +1360,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + goto err_xdp; + buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1677,6 +1681,10 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, if (unlikely(hdr->hdr.gso_type)) return NULL; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return NULL; + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers * with headroom may add hole in truesize, which * make their length exceed PAGE_SIZE. So we disabled the @@ -1943,6 +1951,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_common_hdr *hdr; + u8 flags; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -1951,6 +1960,15 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } + /* 1. Save the flags early, as the XDP program might overwrite them. + * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID + * stay valid after XDP processing. + * 2. XDP doesn't work with partially checksummed packets (refer to + * virtnet_xdp_set()), so packets marked as + * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. + */ + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); @@ -1966,7 +1984,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) + if (flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; if (virtio_net_hdr_to_skb(skb, &hdr->hdr, -- cgit v1.2.3