From 5c0a1cdd17ce9eb315102c65084af899622ed268 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Fri, 24 May 2024 16:22:28 -0400 Subject: drm/amdgpu: fix sriov host flr handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We send back the ready to reset message before we stop anything. This is wrong. Move it to when we are actually ready for the FLR to happen. In the current state since we take tens of seconds to stop everything, it is very likely that host would give up waiting and reset the GPU before we send ready, so it would be the same as before. But this gets rid of the hack with reset_domain locking and also let us tell how slow ready to reset actually is from the host. The ready to reset speed can be improved later. Signed-off-by: Yunxiang Li Acked-by: Christian König Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 78cd07744ebe..e1d63bed84bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -515,12 +515,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - /* wait until RCV_MSG become 3 */ - if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { - pr_err("failed to receive FLR_CMPL\n"); - return; - } - /* Trigger recovery due to world switch failure */ if (amdgpu_device_should_recover_gpu(adev)) { struct amdgpu_reset_context reset_context; -- cgit v1.2.3