From 338f7412c7ea2ce007e83c5ad7c5e01d8cfce1e1 Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Wed, 19 Mar 2025 17:02:49 +0800 Subject: drm/amdgpu: Decode deferred error type in gfx aca bank parser In the case of injecting uncorrected error with background workload, the deferred error among uncorrected errors need to be specified by checking the deferred and poison bits of status register. v2: refine checking for deferred error v2: log possiable DEs among CEs v2: generate CPER records for DEs among UEs Signed-off-by: Xiang Liu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index ffd4c64e123c..dc47f5fd4ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, { struct aca_bank_node *node; struct aca_bank *bank; + int r; if (!adev->cper.enabled) return; @@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, /* UEs must be encoded into separate CPER entries */ if (type == ACA_SMU_TYPE_UE) { + struct aca_banks de_banks; + + aca_banks_init(&de_banks); list_for_each_entry(node, &banks->list, node) { bank = &node->bank; - if (amdgpu_cper_generate_ue_record(adev, bank)) - dev_warn(adev->dev, "fail to generate ue cper records\n"); + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + r = aca_banks_add_bank(&de_banks, bank); + if (r) + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); + } else { + if (amdgpu_cper_generate_ue_record(adev, bank)) + dev_warn(adev->dev, "fail to generate ue cper records\n"); + } + } + + if (!list_empty(&de_banks.list)) { + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) + dev_warn(adev->dev, "fail to generate de cper records\n"); } + + aca_banks_release(&de_banks); } else { /* * SMU_TYPE_CE banks are combined into 1 CPER entries, @@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + /* DEs may contain in CEs or UEs */ + if (type != ACA_ERROR_TYPE_DEFERRED) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } -- cgit v1.2.3