diff options
| author | Dave Airlie <airlied@redhat.com> | 2024-03-08 11:21:13 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2024-03-08 11:21:13 +1000 |
| commit | af165fb00a1eb390976f6016fc69df0da0d27fad (patch) | |
| tree | 6351742220dd4b801c0c0b49689b3fc4937e95f1 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
| parent | Merge tag 'drm-misc-next-fixes-2024-02-29' of https://anongit.freedesktop.org... (diff) | |
| parent | drm/amdgpu: remove misleading amdgpu_pmops_runtime_idle() comment (diff) | |
| download | linux-af165fb00a1eb390976f6016fc69df0da0d27fad.tar.gz linux-af165fb00a1eb390976f6016fc69df0da0d27fad.zip | |
Merge tag 'amd-drm-next-6.9-2024-03-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.9-2024-03-01:
amdgpu:
- GC 11.5.1 updates
- Misc display cleanups
- NBIO 7.9 updates
- Backlight fixes
- DMUB fixes
- MPO fixes
- atomfirmware table updates
- SR-IOV fixes
- VCN 4.x updates
- use RMW accessors for pci config registers
- PSR fixes
- Suspend/resume fixes
- RAS fixes
- ABM fixes
- Misc code cleanups
- SI DPM fix
- Revert freesync video
amdkfd:
- Misc cleanups
- Error handling fixes
radeon:
- use RMW accessors for pci config registers
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240301204857.13960-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 46f3d1013e8c..8ebab6f22e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2439,6 +2439,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* For any RAS error that needs a full reset to + * recover, set the fatal error status + */ + if (hive) { + list_for_each_entry(remote_adev, + &hive->device_list, + gmc.xgmi.head) + amdgpu_ras_set_fed(remote_adev, + true); + } else { + amdgpu_ras_set_fed(adev, true); + } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -3440,6 +3452,26 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { @@ -3620,6 +3652,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, block == AMDGPU_RAS_BLOCK__SDMA || block == AMDGPU_RAS_BLOCK__VCN || block == AMDGPU_RAS_BLOCK__JPEG) && + (amdgpu_ras_mask & (1 << block)) && amdgpu_ras_is_poison_mode_supported(adev) && amdgpu_ras_get_ras_block(adev, block, 0)) ret = 1; |
