diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
41 files changed, 687 insertions, 127 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3af5acff8518..7645e498faa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,7 @@ #define MAX_GPU_INSTANCE 64 -#define GFX_SLICE_PERIOD msecs_to_jiffies(250) +#define GFX_SLICE_PERIOD_MS 250 struct amdgpu_gpu_instance { struct amdgpu_device *adev; @@ -1111,8 +1111,6 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; - /* indicate amdgpu suspension status */ - bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index cce85389427f..b8d4e07d2043 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -172,8 +172,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, &buffer); obj = (union acpi_object *)buffer.pointer; - /* Fail if calling the method fails and ATIF is supported */ - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* Fail if calling the method fails */ + if (ACPI_FAILURE(status)) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); kfree(obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b545940e512b..24343c312480 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -890,6 +890,15 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) return kgd2kfd_start_sched(adev->kfd.dev, node_id); } +/* check if there are KFD queues active */ +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return false; + + return kgd2kfd_compute_active(adev->kfd.dev, node_id); +} + /* Config CGTT_SQ_CLK_CTRL */ int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 7e0a22072536..4b80ad860639 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -268,6 +268,7 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault @@ -431,6 +432,7 @@ int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -511,5 +513,10 @@ static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } + +static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + return false; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 37d8657f0776..a68338cb7b4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz int r; uint32_t *data, x; - if (size & 0x3 || *pos & 0x3) + if (size > 4096 || size & 0x3 || *pos & 0x3) return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); @@ -1648,7 +1648,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | 0444, root, + S_IFREG | 0400, root, adev, debugfs_regs[i]); if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); @@ -2096,17 +2096,20 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); amdgpu_debugfs_jpeg_sched_mask_init(adev); + amdgpu_debugfs_gfx_sched_mask_init(adev); + amdgpu_debugfs_compute_sched_mask_init(adev); + amdgpu_debugfs_sdma_sched_mask_init(adev); amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); - debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, + debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); - debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, + debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev, &amdgpu_evict_gtt_fops); - debugfs_create_file("amdgpu_test_ib", 0444, root, adev, + debugfs_create_file("amdgpu_test_ib", 0400, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f12fab13386a..0450eab6ade7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6452,6 +6452,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); int r; + if (amdgpu_sriov_vf(adev)) + return false; + r = pci_save_state(pdev); if (!r) { kfree(adev->pci_state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73f4d56c5de4..1040204ac8b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1795,11 +1795,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, switch (le16_to_cpu(nps_info->v1.header.version_major)) { case 1: + mem_ranges = kvcalloc(nps_info->v1.count, + sizeof(*mem_ranges), + GFP_KERNEL); + if (!mem_ranges) + return -ENOMEM; *nps_type = nps_info->v1.nps_type; *range_cnt = nps_info->v1.count; - mem_ranges = kvzalloc( - *range_cnt * sizeof(struct amdgpu_gmc_memrange), - GFP_KERNEL); for (i = 0; i < *range_cnt; i++) { mem_ranges[i].base_address = nps_info->v1.instance_info[i].base_address; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6ac7d335e28e..38686203bea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -888,7 +888,7 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to -1, or disabled. Userspace can only override this level after + * Defaults to -1, or auto. Userspace can only override this level after * boot if it's set to auto. */ int amdgpu_dm_abm_level = -1; @@ -2505,7 +2505,6 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2520,7 +2519,6 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index 35fee3e8cde2..8cd69836dd99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -200,7 +200,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, dev_err_ratelimited(&i2c_adap->dev, "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d", eeprom_addr, buf_size, - read ? "read" : "write", EEPROM_OFFSET_SIZE); + str_read_write(read), EEPROM_OFFSET_SIZE); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 00a4ab082459..df2cf5c33925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -33,6 +33,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> +#include <drm/drm_file.h> #include "amdgpu.h" #include "amdgpu_vm.h" @@ -65,6 +66,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [TTM_PL_VRAM] = "vram", [TTM_PL_TT] = "gtt", [TTM_PL_SYSTEM] = "cpu", + [AMDGPU_PL_GDS] = "gds", + [AMDGPU_PL_GWS] = "gws", + [AMDGPU_PL_OA] = "oa", + [AMDGPU_PL_DOORBELL] = "doorbell", }; unsigned int hw_ip, i; int ret; @@ -86,32 +91,30 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid); - for (i = 0; i < TTM_PL_PRIV; i++) + for (i = 0; i < ARRAY_SIZE(pl_name); i++) { + if (!pl_name[i]) + continue; + drm_print_memory_stats(p, &stats[i].drm, DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_PURGEABLE, pl_name[i]); + } /* Legacy amdgpu keys, alias to drm-resident-memory-: */ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].total/1024UL); + stats[TTM_PL_VRAM].drm.resident/1024UL); drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", - stats[TTM_PL_TT].total/1024UL); + stats[TTM_PL_TT].drm.resident/1024UL); drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", - stats[TTM_PL_SYSTEM].total/1024UL); + stats[TTM_PL_SYSTEM].drm.resident/1024UL); /* Amdgpu specific memory accounting keys: */ - drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].visible/1024UL); drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].evicted/1024UL); - drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].evicted_visible/1024UL); drm_printf(p, "amd-requested-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].requested/1024UL); - drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].requested_visible/1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats[TTM_PL_TT].requested/1024UL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 256b95232de5..b2033f8352f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -78,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; - adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, - PAGE_SIZE, DMA_BIDIRECTIONAL); + adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; @@ -99,8 +100,9 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return; - dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); adev->dummy_page_addr = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72..2f3f09dfb1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1602,7 +1602,7 @@ static DEVICE_ATTR(current_compute_partition, 0644, static DEVICE_ATTR(available_compute_partition, 0444, amdgpu_gfx_get_available_compute_partition, NULL); -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1629,7 +1629,7 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return r; } -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1646,25 +1646,47 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) &dev_attr_available_compute_partition); } -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) { int r; r = device_create_file(adev->dev, &dev_attr_enforce_isolation); if (r) return r; + if (adev->gfx.enable_cleaner_shader) + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - if (r) + return r; +} + +static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + if (adev->gfx.enable_cleaner_shader) + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_gfx_sysfs_xcp_init(adev); + if (r) { + dev_err(adev->dev, "failed to create xcp sysfs files"); return r; + } - return 0; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + dev_err(adev->dev, "failed to create isolation sysfs files"); + + return r; } -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { - device_remove_file(adev->dev, &dev_attr_enforce_isolation); - device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); + amdgpu_gfx_sysfs_xcp_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, @@ -1752,7 +1774,7 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, if (adev->gfx.kfd_sch_req_count[idx] == 0 && adev->gfx.kfd_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { if (adev->gfx.kfd_sch_req_count[idx] == 0) { @@ -1807,8 +1829,9 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); } if (fences) { + /* we've already had our timeslice, so let's wrap this up */ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ if (adev->kfd.init_complete) { @@ -1821,6 +1844,51 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) mutex_unlock(&adev->enforce_isolation_mutex); } +static void +amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, + u32 idx) +{ + unsigned long cjiffies; + bool wait = false; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + /* set the initial values if nothing is set */ + if (!adev->gfx.enforce_isolation_jiffies[idx]) { + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + /* Make sure KFD gets a chance to run */ + if (amdgpu_amdkfd_compute_active(adev, idx)) { + cjiffies = jiffies; + if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { + cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; + if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { + /* if our time is up, let KGD work drain before scheduling more */ + wait = true; + /* reset the timer period */ + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } else { + /* set the timer period to what's left in our time slice */ + adev->gfx.enforce_isolation_time[idx] = + GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); + } + } else { + /* if jiffies wrap around we will just wait a little longer */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + } + } else { + /* if there is no KFD work, then set the full slice period */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); + + if (wait) + msleep(GFX_SLICE_PERIOD_MS); +} + void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -1837,6 +1905,9 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) if (idx >= MAX_XCP) return; + /* Don't submit more work until KFD has had some time */ + amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); + mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) @@ -1868,3 +1939,144 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) } mutex_unlock(&adev->enforce_isolation_mutex); } + +/* + * debugfs for to enable/disable gfx job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_gfx_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, + amdgpu_debugfs_gfx_sched_mask_get, + amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_gfx_rings > 1)) + return; + sprintf(name, "amdgpu_gfx_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_gfx_sched_mask_fops); +#endif +} + +/* + * debugfs for to enable/disable compute job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_compute_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops, + amdgpu_debugfs_compute_sched_mask_get, + amdgpu_debugfs_compute_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_compute_rings > 1)) + return; + sprintf(name, "amdgpu_compute_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_compute_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f710178a21bc..fd73e527f446 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -472,6 +472,8 @@ struct amdgpu_gfx { struct mutex kfd_sch_mutex; u64 kfd_sch_req_count[MAX_XCP]; bool kfd_sch_inactive[MAX_XCP]; + unsigned long enforce_isolation_jiffies[MAX_XCP]; + unsigned long enforce_isolation_time[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -577,11 +579,11 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size, const void *cleaner_shader_ptr); -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index ea2663169bf3..b9d08bc96581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -137,6 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* attempt a per ring reset */ if (amdgpu_gpu_recovery && ring->funcs->reset) { + dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); /* stop the scheduler, but don't mess with the * bad job yet because if ring reset fails * we'll fall back to full GPU reset. @@ -152,6 +153,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) drm_sched_start(&ring->sched, 0); goto exit; } + dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); } if (amdgpu_device_should_recover_gpu(ring->adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 18ee60378727..3ca03b5e0f91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -348,6 +348,24 @@ static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgp return ret; } +static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry) +{ + bool ret; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_CE: + ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]); + break; + case AMDGPU_MCA_ERROR_TYPE_UE: + default: + ret = true; + break; + } + + return ret; +} + static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_query_context *qctx) { @@ -373,7 +391,8 @@ static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mc amdgpu_mca_bank_set_add_entry(mca_set, &entry); - amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); + if (amdgpu_mca_bank_should_dump(adev, type, &entry)) + amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 6909af56fcad..b10383f83d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1594,6 +1594,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) char ucode_prefix[30]; char fw_name[50]; bool need_retry = false; + u32 *ucode_ptr; int r; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, @@ -1631,6 +1632,10 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) adev->mes.data_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + + sizeof(union amdgpu_firmware_header)); + adev->mes.fw_version[pipe] = + le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { int ucode, ucode_data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 96788c0f42f1..0684e482a204 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + uint32_t fw_version[AMDGPU_MAX_MES_PIPES]; bool enable_legacy_queue_map; uint32_t total_max_queue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1e6a044e3143..b5f65ef1efcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,6 +40,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" +#include "amdgpu_vm.h" /** * DOC: amdgpu_object @@ -1208,23 +1209,11 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, type = res->mem_type; } - /* Squash some into 'cpu' to keep the legacy userspace view. */ - switch (type) { - case TTM_PL_VRAM: - case TTM_PL_TT: - case TTM_PL_SYSTEM: - break; - default: - type = TTM_PL_SYSTEM; - break; - } - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) return; /* DRM stats common fields: */ - stats[type].total += size; if (drm_gem_object_is_shared_for_memory_stats(obj)) stats[type].drm.shared += size; else @@ -1237,23 +1226,14 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, stats[type].drm.active += size; else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) stats[type].drm.purgeable += size; - - if (type == TTM_PL_VRAM && amdgpu_res_cpu_visible(adev, res)) - stats[type].visible += size; } /* amdgpu specific stats: */ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { stats[TTM_PL_VRAM].requested += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].requested_visible += size; - - if (type != TTM_PL_VRAM) { + if (type != TTM_PL_VRAM) stats[TTM_PL_VRAM].evicted += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].evicted_visible += size; - } } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { stats[TTM_PL_TT].requested += size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7260349917ef..be6769852ece 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -139,17 +139,6 @@ struct amdgpu_bo_vm { struct amdgpu_vm_bo_base entries[]; }; -struct amdgpu_mem_stats { - struct drm_memory_stats drm; - - uint64_t total; - uint64_t visible; - uint64_t evicted; - uint64_t evicted_visible; - uint64_t requested; - uint64_t requested_visible; -}; - static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) { return container_of(tbo, struct amdgpu_bo, tbo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index abd5e980c9c7..17cf10c0b72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1834,6 +1834,9 @@ int psp_ras_initialize(struct psp_context *psp) ras_cmd->ras_in_message.init_flags.xcc_mask = adev->gfx.xcc_mask; ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + ras_cmd->ras_in_message.init_flags.nps_mode = + adev->gmc.gmc_funcs->query_mem_partition_mode(adev); ret = psp_ta_load(psp, &psp->ras_context.context); @@ -3563,6 +3566,36 @@ out: return err; } +static bool is_ta_fw_applicable(struct psp_context *psp, + const struct psp_fw_bin_desc *desc) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t fw_version; + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: + /* for now, AUX TA only exists on 13.0.6 ta bin, + * from v20.00.0x.14 + */ + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 6)) { + fw_version = le32_to_cpu(desc->fw_version); + + if (adev->flags & AMD_IS_APU && + (fw_version & 0xff) >= 0x14) + return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX; + else + return desc->fw_type == TA_FW_TYPE_PSP_XGMI; + } + break; + default: + break; + } + + return true; +} + static int parse_ta_bin_descriptor(struct psp_context *psp, const struct psp_fw_bin_desc *desc, const struct ta_firmware_header_v2_0 *ta_hdr) @@ -3572,6 +3605,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, if (!psp || !desc || !ta_hdr) return -EINVAL; + if (!is_ta_fw_applicable(psp, desc)) + return 0; + ucode_start_addr = (uint8_t *)ta_hdr + le32_to_cpu(desc->offset_bytes) + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); @@ -3584,6 +3620,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, psp->asd_context.bin_desc.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version); psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes); psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1d9eda883bb8..b772299e1067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2605,6 +2605,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; reset_context.src = AMDGPU_RESET_SRC_RAS; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); /* Perform full reset in fatal error mode */ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 42f616c05f50..a6e28fe3f8d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -109,21 +109,17 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { uint32_t occupied, chunk1, chunk2; - uint32_t *dst; occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count) ? count : chunk1; chunk2 = count - chunk1; if (chunk1) - memset32(dst, ring->funcs->nop, chunk1); + memset32(&ring->ring[occupied], ring->funcs->nop, chunk1); - if (chunk2) { - dst = (void *)ring->ring; - memset32(dst, ring->funcs->nop, chunk2); - } + if (chunk2) + memset32(ring->ring, ring->funcs->nop, chunk2); ring->wptr += count; ring->wptr &= ring->ptr_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 574336d6714a..36fc9578c53c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -246,7 +246,7 @@ struct amdgpu_ring { struct drm_gpu_scheduler sched; struct amdgpu_bo *ring_obj; - volatile uint32_t *ring; + uint32_t *ring; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; @@ -288,7 +288,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned int set_q_mode_offs; - volatile u32 *set_q_mode_ptr; + u32 *set_q_mode_ptr; u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; @@ -386,10 +386,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) { unsigned occupied, chunk1, chunk2; - void *dst; occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; chunk2 = count_dw - chunk1; @@ -397,12 +395,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, chunk2 <<= 2; if (chunk1) - memcpy(dst, src, chunk1); + memcpy(&ring->ring[occupied], src, chunk1); if (chunk2) { src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); + memcpy(ring->ring, src, chunk2); } ring->wptr += count_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 183a976ba29d..5868b4a32ea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -343,3 +343,73 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) return 0; } + +/* + * debugfs for to enable/disable sdma job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->sdma.num_instances) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops, + amdgpu_debugfs_sdma_sched_mask_get, + amdgpu_debugfs_sdma_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->sdma.num_instances > 1)) + return; + sprintf(name, "amdgpu_sdma_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_sdma_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 087ce0f6fa07..a37fcd9bb981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -175,5 +175,5 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); - +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0637414fc70e..9f922ec50ea2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1851,6 +1851,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); + dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, adev_to_drm(adev)->anon_inode->i_mapping, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4e23419b92d4..4150ec0aa10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -163,6 +163,7 @@ enum ta_fw_type { TA_FW_TYPE_PSP_DTM, TA_FW_TYPE_PSP_RAP, TA_FW_TYPE_PSP_SECUREDISPLAY, + TA_FW_TYPE_PSP_XGMI_AUX, TA_FW_TYPE_MAX_INDEX, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b855810ee86..8d9bf7a0857f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1161,7 +1161,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, int r; amdgpu_sync_create(&sync); - if (clear || !bo) { + if (clear) { mem = NULL; /* Implicitly sync to command submissions in the same VM before @@ -1176,6 +1176,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (r) goto error_free; } + } else if (!bo) { + mem = NULL; + + /* PRT map operations don't need to sync to anything. */ } else { struct drm_gem_object *obj = &bo->tbo.base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c5b41e3ed14f..5d119ac26c4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -42,7 +42,6 @@ struct amdgpu_bo_va; struct amdgpu_job; struct amdgpu_bo_list_entry; struct amdgpu_bo_vm; -struct amdgpu_mem_stats; /* * GPUVM handling @@ -322,6 +321,16 @@ struct amdgpu_vm_fault_info { unsigned int vmhub; }; +struct amdgpu_mem_stats { + struct drm_memory_stats drm; + + /* buffers that requested this placement */ + uint64_t requested; + /* buffers that requested this placement + * but are currently evicted */ + uint64_t evicted; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 7ac89d78a5bf..b63f53242c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -77,6 +77,7 @@ struct amdgpu_xcp_cfg { u8 num_res; struct amdgpu_xcp_mgr *xcp_mgr; struct kobject kobj; + u16 compatible_nps_modes; }; struct amdgpu_xcp_ip_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 890976b7ce77..e157d6d857b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -455,6 +455,7 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, int max_res[AMDGPU_XCP_RES_MAX] = {}; bool res_lt_xcp; int num_xcp, i; + u16 nps_modes; if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) return -EINVAL; @@ -467,23 +468,33 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, switch (mode) { case AMDGPU_SPX_PARTITION_MODE: num_xcp = 1; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_DPX_PARTITION_MODE: num_xcp = 2; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_TPX_PARTITION_MODE: num_xcp = 3; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_QPX_PARTITION_MODE: num_xcp = 4; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: num_xcp = NUM_XCC(adev->gfx.xcc_mask); + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; default: return -EINVAL; } + xcp_cfg->compatible_nps_modes = + (adev->gmc.supported_nps_modes & nps_modes); xcp_cfg->num_res = ARRAY_SIZE(max_res); for (i = 0; i < xcp_cfg->num_res; i++) { @@ -537,7 +548,7 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, case AMDGPU_SPX_PARTITION_MODE: return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0; + return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: return (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 3) && diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9da95b25e158..d1a18ca584dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4853,9 +4853,10 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v10_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; + return 0; } @@ -4907,7 +4908,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); gfx_v10_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 894fc04201c3..62e4c446793d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -46,6 +46,7 @@ #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" +#include "gfx_v11_0_cleaner_shader.h" #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" @@ -1579,8 +1580,24 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 2280 && + adev->gfx.pfp_fw_version >= 2370 && + adev->gfx.mec_fw_version >= 2450 && + adev->mes.fw_version[0] >= 99) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; + break; } /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ @@ -1708,7 +1725,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v11_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1773,7 +1790,7 @@ static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v11_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm new file mode 100644 index 000000000000..9b90b66368c7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// Navi3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + +shader main + asic(GFX11) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + + // + // CLEAR VGPRs + // + s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance) + +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_sub_u32 m0, m0, 8 + s_cbranch_scc0 label_0005 + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h new file mode 100644 index 000000000000..3218cc04f543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_11_0_3 */ +static const u32 gfx_11_0_3_cleaner_shader_hex[] = { + 0xb0804006, 0xbe8200ff, + 0x00000058, 0xbefd0080, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefd0002, 0x80828802, + 0xbfa1fff5, 0xbe8200ff, + 0x80000000, 0x8b020002, + 0xbfa10012, 0xbefe00c1, + 0xbeff00c1, 0xd71f0001, + 0x0001007f, 0xd7200001, + 0x0002027e, 0x16020288, + 0xbe8200bf, 0xbefd00c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd7006a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbfa1fff7, 0xbefd00ff, + 0x00000068, 0xbe804280, + 0xbe814280, 0xbe824280, + 0xbe834280, 0x80fd847d, + 0xbfa1fffa, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbfb00000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 9fec28d8a5fc..1b99f90cd193 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1466,7 +1466,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v12_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1529,7 +1529,7 @@ static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v12_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 66947850d7e4..a880dce16ae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2402,7 +2402,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -2443,7 +2443,7 @@ static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) } gfx_v9_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -3288,8 +3288,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) * confirmed that the APU gfx10/gfx11 needn't such update. */ if (adev->flags & AMD_IS_APU && - adev->in_s3 && !adev->suspend_complete) { - DRM_INFO(" Will skip the CSB packet resubmit\n"); + adev->in_s3 && !pm_resume_via_firmware()) { + DRM_INFO("Will skip the CSB packet resubmit\n"); return 0; } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 016290f00592..983088805c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1171,10 +1171,6 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_4_3_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); - if (r) - return r; - return 0; } @@ -1199,7 +1195,6 @@ static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); amdgpu_gfx_sysfs_fini(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 93e44e7ee3fa..533b4b2b432d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -578,22 +578,16 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) { - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. + * 1) Only reset on APU side, dGPU hasn't checked yet. + * 2) S3 suspend aborted in the normal S3 suspend or + * performing pm core test. */ if (adev->flags & AMD_IS_APU && adev->in_s3 && - sol_reg) { - adev->suspend_complete = false; + !pm_resume_via_firmware()) return true; - } else { - adev->suspend_complete = true; + else return false; - } } static int soc15_asic_reset(struct amdgpu_device *adev) @@ -603,11 +597,17 @@ static int soc15_asic_reset(struct amdgpu_device *adev) * successfully. So now, temporarily enable it for the * S3 suspend abort case. */ - if (((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) && - !soc15_need_reset_on_resume(adev)) + + if ((adev->apu_flags & AMD_APU_IS_PICASSO || + !(adev->apu_flags & AMD_APU_IS_RAVEN)) && + soc15_need_reset_on_resume(adev)) + goto asic_reset; + + if ((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) return 0; +asic_reset: switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_PCI: dev_info(adev->dev, "PCI reset\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 1c07ebdc0d1f..93fbb3354720 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -897,9 +897,10 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) /* Will reset for the following suspend abort cases. * 1) Only reset dGPU side. * 2) S3 suspend got aborted and TOS is active. + * As for dGPU suspend abort cases the SOL value + * will be kept as zero at this resume point. */ - if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && - !adev->suspend_complete) { + if (!(adev->flags & AMD_IS_APU) && adev->in_s3) { sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); msleep(100); sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 3ac56a9645eb..21b71a427b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -113,6 +113,14 @@ enum ta_ras_address_type { TA_RAS_PA_TO_MCA, }; +enum ta_ras_nps_mode { + TA_RAS_UNKNOWN_MODE = 0, + TA_RAS_NPS1_MODE = 1, + TA_RAS_NPS2_MODE = 2, + TA_RAS_NPS4_MODE = 4, + TA_RAS_NPS8_MODE = 8, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -139,6 +147,7 @@ struct ta_ras_init_flags { uint8_t dgpu_mode; uint16_t xcc_mask; uint8_t channel_dis_num; + uint8_t nps_mode; }; struct ta_ras_mca_addr { |
