aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c18
55 files changed, 659 insertions, 333 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index d58e74ae8ade..2afecc55090f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -104,7 +104,8 @@ amdgpu-y += \
amdgpu-y += \
df_v1_7.o \
df_v3_6.o \
- df_v4_3.o
+ df_v4_3.o \
+ df_v4_6_2.o
# add GMC block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8df702eaa2ad..8b4ca2576a3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,9 +363,6 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
-#define HW_REV(_Major, _Minor, _Rev) \
- ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
-
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
@@ -1119,6 +1116,13 @@ static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
return adev->ip_versions[ip][inst] & ~0xFFU;
}
+static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This returns full version - major/minor/rev/variant/subrevision */
+ return adev->ip_versions[ip][inst];
+}
+
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
return container_of(ddev, struct amdgpu_device, ddev);
@@ -1333,9 +1337,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
-bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
-bool amdgpu_device_aspm_support_quirk(void);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2bca37044ad0..d62e49758635 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -68,7 +68,7 @@ struct amdgpu_acpi_xcc_info {
struct amdgpu_acpi_dev_info {
struct list_head list;
struct list_head xcc_list;
- uint16_t bdf;
+ uint32_t sbdf;
uint16_t supp_xcp_mode;
uint16_t xcp_mode;
uint16_t mem_mode;
@@ -927,7 +927,7 @@ static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
#endif
}
-static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
{
struct amdgpu_acpi_dev_info *acpi_dev;
@@ -935,14 +935,14 @@ static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
return NULL;
list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
- if (acpi_dev->bdf == bdf)
+ if (acpi_dev->sbdf == sbdf)
return acpi_dev;
return NULL;
}
static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
- struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+ struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
{
struct amdgpu_acpi_dev_info *tmp;
union acpi_object *obj;
@@ -955,7 +955,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
INIT_LIST_HEAD(&tmp->xcc_list);
INIT_LIST_HEAD(&tmp->list);
- tmp->bdf = bdf;
+ tmp->sbdf = sbdf;
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_SUPP_MODE, NULL,
@@ -1007,7 +1007,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
DRM_DEBUG_DRIVER(
"New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
- tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
tmp->tmr_base, tmp->tmr_size);
list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
*dev_info = tmp;
@@ -1023,7 +1023,7 @@ out:
}
static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
- u16 *bdf)
+ u32 *sbdf)
{
union acpi_object *obj;
acpi_status status;
@@ -1054,8 +1054,10 @@ static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
/* xcp node of this xcc [47:40] */
xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF domain of this xcc [31:16] */
+ *sbdf = (obj->integer.value) & 0xFFFF0000;
/* PF bus/dev/fn of this xcc [63:48] */
- *bdf = (obj->integer.value >> 48) & 0xFFFF;
+ *sbdf |= (obj->integer.value >> 48) & 0xFFFF;
ACPI_FREE(obj);
obj = NULL;
@@ -1079,7 +1081,7 @@ static int amdgpu_acpi_enumerate_xcc(void)
struct acpi_device *acpi_dev;
char hid[ACPI_ID_LEN];
int ret, id;
- u16 bdf;
+ u32 sbdf;
INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
xa_init(&numa_info_xa);
@@ -1107,16 +1109,16 @@ static int amdgpu_acpi_enumerate_xcc(void)
xcc_info->handle = acpi_device_handle(acpi_dev);
acpi_dev_put(acpi_dev);
- ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf);
if (ret) {
kfree(xcc_info);
continue;
}
- dev_info = amdgpu_acpi_get_dev(bdf);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
- ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);
if (ret == -ENOMEM)
return ret;
@@ -1136,13 +1138,14 @@ int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size)
{
struct amdgpu_acpi_dev_info *dev_info;
- u16 bdf;
+ u32 sbdf;
if (!tmr_offset || !tmr_size)
return -EINVAL;
- bdf = pci_dev_id(adev->pdev);
- dev_info = amdgpu_acpi_get_dev(bdf);
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
return -ENOENT;
@@ -1157,13 +1160,14 @@ int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
{
struct amdgpu_acpi_dev_info *dev_info;
struct amdgpu_acpi_xcc_info *xcc_info;
- u16 bdf;
+ u32 sbdf;
if (!numa_info)
return -EINVAL;
- bdf = pci_dev_id(adev->pdev);
- dev_info = amdgpu_acpi_get_dev(bdf);
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
return -ENOENT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index bd1d4b2b6b16..23448359838b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -425,6 +425,32 @@ validate_fail:
return ret;
}
+static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
+
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -1784,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
@@ -1793,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
@@ -1866,10 +1902,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
@@ -1998,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto out_unreserve;
- }
- }
-
list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
@@ -2037,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- dma_resv_add_fence(bo->tbo.base.resv,
- &avm->process_info->eviction_fence->base,
- DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
@@ -2074,7 +2089,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
@@ -2115,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mem->mapped_to_gpu_memory);
}
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
-
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
@@ -2351,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
return 0;
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
err_free_mem:
kfree(*mem);
err_put_obj:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 38ccec913f00..f3a09ecb7699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "atom.h"
+#include <linux/device.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
@@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return false;
+ /* ATRM is for on-platform devices only */
+ if (dev_is_removable(&adev->pdev->dev))
+ return false;
+
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 122472d88756..0245de81cabd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1116,6 +1116,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
return r;
}
+ /* FIXME: In theory this loop shouldn't be needed any more when
+ * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+ * with p->ticket. But removing it caused test regressions, so I'm
+ * leaving it here for now.
+ */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
bo_va = e->bo_va;
if (bo_va == NULL)
@@ -1130,7 +1135,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
return r;
}
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 3136a0774dd9..a53f436fa9f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -748,6 +748,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ if (!adev->smc_rreg)
+ return -EPERM;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -804,6 +807,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
ssize_t result = 0;
int r;
+ if (!adev->smc_wreg)
+ return -EPERM;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9340e8dc0413..fd8cd8e2d3f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -41,6 +41,7 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -1073,6 +1074,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
+ /* TODO: check the return val and stop device initialization if boot fails */
+ amdgpu_psp_query_boot_status(adev);
return ret;
} else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1456,14 +1459,14 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
}
/*
- * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
- * speed switching. Until we have confirmation from Intel that a specific host
- * supports it, it's safer that we keep it disabled for all.
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
*
* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
*/
-bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(void)
{
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
@@ -1496,20 +1499,13 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
default:
return false;
}
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
+ return false;
return pcie_aspm_enabled(adev->pdev);
}
-bool amdgpu_device_aspm_support_quirk(void)
-{
-#if IS_ENABLED(CONFIG_X86)
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
-#else
- return true;
-#endif
-}
-
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
@@ -2230,7 +2226,6 @@ out:
*/
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
- struct drm_device *dev = adev_to_drm(adev);
struct pci_dev *parent;
int i, r;
bool total;
@@ -2301,7 +2296,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
((adev->flags & AMD_IS_APU) == 0) &&
- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+ !dev_is_removable(&adev->pdev->dev))
adev->flags |= AMD_IS_PX;
if (!(adev->flags & AMD_IS_APU)) {
@@ -2315,6 +2310,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported())
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2492,6 +2489,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
}
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
ring->num_hw_submission, 0,
timeout, adev->reset_domain->wq,
ring->sched_score, ring->name,
@@ -2661,6 +2659,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset) {
kgd2kfd_init_zone_device(adev);
@@ -3258,6 +3259,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
amdgpu_virt_request_full_gpu(adev, false);
}
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
return r;
@@ -3447,6 +3450,9 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase2(adev);
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
return r;
}
@@ -3958,13 +3964,23 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ r = psp_gpu_reset(adev);
+ break;
+ default:
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ break;
+ }
+
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -4128,7 +4144,7 @@ fence_driver_init:
px = amdgpu_device_supports_px(ddev);
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_register_client(adev->pdev,
&amdgpu_switcheroo_ops, px);
@@ -4234,6 +4250,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
@@ -4276,7 +4294,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
px = amdgpu_device_supports_px(adev_to_drm(adev));
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
@@ -4405,6 +4423,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_ras_suspend(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
amdgpu_device_ip_suspend_phase1(adev);
if (!adev->in_s0ix)
@@ -5176,6 +5196,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
goto out;
+ if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -5555,10 +5578,6 @@ skip_hw_reset:
drm_sched_start(&ring->sched, true);
}
- if (adev->enable_mes &&
- amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
- amdgpu_mes_self_test(tmp_adev);
-
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b6cddcad122f..0431eafa86b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -35,6 +35,7 @@
#include "df_v1_7.h"
#include "df_v3_6.h"
#include "df_v4_3.h"
+#include "df_v4_6_2.h"
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
@@ -98,6 +99,7 @@
MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMP0_SMN_C2PMSG_33 0x16061
#define mmMM_INDEX 0x0
#define mmMM_INDEX_HI 0x6
#define mmMM_DATA 0x1
@@ -238,8 +240,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
uint8_t *binary)
{
- uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- int ret = 0;
+ uint64_t vram_size;
+ u32 msg;
+ int i, ret = 0;
+
+ /* It can take up to a second for IFWI init to complete on some dGPUs,
+ * but generally it should be in the 60-100ms range. Normally this starts
+ * as soon as the device gets power so by the time the OS loads this has long
+ * completed. However, when a card is hotplugged via e.g., USB4, we need to
+ * wait for this to complete. Once the C2PMSG is updated, we can
+ * continue.
+ */
+ if (dev_is_removable(&adev->pdev->dev)) {
+ for (i = 0; i < 1000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ msleep(1);
+ }
+ }
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
if (vram_size) {
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
@@ -1487,7 +1507,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
- if (le16_to_cpu(gc_info->v2.header.version_minor == 1)) {
+ if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
@@ -2448,6 +2468,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
adev->gmc.xgmi.supported = true;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
+ adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
+
/* set NBIO version */
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
@@ -2559,6 +2582,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 3, 0):
adev->df.funcs = &df_v4_3_funcs;
break;
+ case IP_VERSION(4, 6, 2):
+ adev->df.funcs = &df_v4_6_2_funcs;
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index b5e28fa3f414..e7e87a3b2601 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
if (!r)
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (!r)
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY)
DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6cc6e3991410..3095a3a864af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2041,6 +2041,14 @@ static const struct pci_device_id pciidlist[] = {
MODULE_DEVICE_TABLE(pci, pciidlist);
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+ /* differentiate between P10 and P11 asics with the same DID */
+ {0x67FF, 0xE3, CHIP_POLARIS10},
+ {0x67FF, 0xE7, CHIP_POLARIS10},
+ {0x67FF, 0xF3, CHIP_POLARIS10},
+ {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
static const struct drm_driver amdgpu_kms_driver;
static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
@@ -2083,6 +2091,22 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
}
}
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+ if (pdev->device == asic_type_quirks[i].device &&
+ pdev->revision == asic_type_quirks[i].revision) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= asic_type_quirks[i].type;
+ break;
+ }
+ }
+
+ return flags;
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -2110,15 +2134,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
"See modparam exp_hw_support\n");
return -ENODEV;
}
- /* differentiate between P10 and P11 asics with the same DID */
- if (pdev->device == 0x67FF &&
- (pdev->revision == 0xE3 ||
- pdev->revision == 0xE7 ||
- pdev->revision == 0xF3 ||
- pdev->revision == 0xF7)) {
- flags &= ~AMD_ASIC_MASK;
- flags |= CHIP_POLARIS10;
- }
+
+ flags = amdgpu_fix_asic_type(pdev, flags);
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
* however, SME requires an indirect IOMMU mapping because the encryption
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index cba7e6cdc7cc..5706b282a0c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -56,21 +56,15 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
{
- struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_mem_stats stats;
ktime_t usage[AMDGPU_HW_IP_NUM];
- uint32_t bus, dev, fn, domain;
unsigned int hw_ip;
int ret;
memset(&stats, 0, sizeof(stats));
- bus = adev->pdev->bus->number;
- domain = pci_domain_nr(adev->pdev->bus);
- dev = PCI_SLOT(adev->pdev->devfn);
- fn = PCI_FUNC(adev->pdev->devfn);
ret = amdgpu_bo_reserve(vm->root.bo, false);
if (ret)
@@ -88,9 +82,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
*/
drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
- drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name);
- drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
- drm_printf(p, "drm-client-id:\t%llu\n", vm->immediate.fence_context);
drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c92e0aba69e1..a2a29dcb2422 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -385,9 +385,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring = &kiq->ring;
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
/* create MQD for KIQ */
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index a02992bff6af..2dce338b0f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -786,6 +786,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 78476bc75b4e..1f357198533f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -325,8 +325,8 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
int i;
/* Signal all jobs not yet scheduled */
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
+ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 59f10b353b3a..9ddbf1494326 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -557,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
mqd_prop.hqd_active = false;
+ if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+ p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
+ }
+
mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
+ if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+ p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
amdgpu_bo_unreserve(q->mqd_obj);
}
@@ -994,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
switch (queue_type) {
case AMDGPU_RING_TYPE_GFX:
ring->funcs = adev->gfx.gfx_ring[0].funcs;
+ ring->me = adev->gfx.gfx_ring[0].me;
+ ring->pipe = adev->gfx.gfx_ring[0].pipe;
break;
case AMDGPU_RING_TYPE_COMPUTE:
ring->funcs = adev->gfx.compute_ring[0].funcs;
+ ring->me = adev->gfx.compute_ring[0].me;
+ ring->pipe = adev->gfx.compute_ring[0].pipe;
break;
case AMDGPU_RING_TYPE_SDMA:
ring->funcs = adev->sdma.instance[0].ring.funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 648bd5e12830..32b701cc0376 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2120,6 +2120,21 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
return ret;
}
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+ int ret = 0;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return 0;
+
+ if (psp->funcs &&
+ psp->funcs->query_boot_status)
+ ret = psp->funcs->query_boot_status(psp);
+
+ return ret;
+}
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 7111dd32e66f..5d36ad3f48c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -134,6 +134,7 @@ struct psp_funcs {
int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+ int (*query_boot_status)(struct psp_context *psp);
};
struct ta_funcs {
@@ -537,4 +538,6 @@ int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3c83a2b8fb2c..b7fe5951b166 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -635,8 +635,11 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
static inline void put_obj(struct ras_manager *obj)
{
- if (obj && (--obj->use == 0))
+ if (obj && (--obj->use == 0)) {
list_del(&obj->node);
+ amdgpu_ras_error_data_fini(&obj->err_data);
+ }
+
if (obj && (obj->use < 0))
DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
}
@@ -666,6 +669,9 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
if (alive_obj(obj))
return NULL;
+ if (amdgpu_ras_error_data_init(&obj->err_data))
+ return NULL;
+
obj->head = *head;
obj->adev = adev;
list_add(&obj->node, &con->head);
@@ -1023,44 +1029,68 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
}
static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
- struct ras_query_if *query_if,
+ struct ras_manager *ras_mgr,
struct ras_err_data *err_data,
+ const char *blk_name,
bool is_ue)
{
- struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
- const char *blk_name = get_ras_block_str(&query_if->head);
struct amdgpu_smuio_mcm_config_info *mcm_info;
struct ras_err_node *err_node;
struct ras_err_info *err_info;
- if (is_ue)
- dev_info(adev->dev, "%ld uncorrectable hardware errors detected in %s block\n",
- ras_mgr->err_data.ue_count, blk_name);
- else
- dev_info(adev->dev, "%ld correctable hardware errors detected in %s block\n",
- ras_mgr->err_data.ce_count, blk_name);
+ if (is_ue) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ue_count) {
+ dev_info(adev->dev, "socket: %d, die: %d, "
+ "%lld new uncorrectable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ue_count,
+ blk_name);
+ }
+ }
- for_each_ras_error(err_node, err_data) {
- err_info = &err_node->err_info;
- mcm_info = &err_info->mcm_info;
- if (is_ue && err_info->ue_count) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%lld uncorrectable hardware errors detected in %s block\n",
- mcm_info->socket_id,
- mcm_info->die_id,
- err_info->ue_count,
- blk_name);
- } else if (!is_ue && err_info->ce_count) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%lld correctable hardware errors detected in %s block\n",
- mcm_info->socket_id,
- mcm_info->die_id,
- err_info->ce_count,
- blk_name);
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ dev_info(adev->dev, "socket: %d, die: %d, "
+ "%lld uncorrectable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+ }
+
+ } else {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ce_count) {
+ dev_info(adev->dev, "socket: %d, die: %d, "
+ "%lld new correctable hardware errors detected in %s block, "
+ "no user action is needed\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ce_count,
+ blk_name);
+ }
+ }
+
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ dev_info(adev->dev, "socket: %d, die: %d, "
+ "%lld correctable hardware errors detected in total in %s block, "
+ "no user action is needed\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
}
}
}
+static inline bool err_data_has_source_info(struct ras_err_data *data)
+{
+ return !list_empty(&data->err_node_list);
+}
+
static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
struct ras_query_if *query_if,
struct ras_err_data *err_data)
@@ -1069,9 +1099,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
const char *blk_name = get_ras_block_str(&query_if->head);
if (err_data->ce_count) {
- if (!list_empty(&err_data->err_node_list)) {
- amdgpu_ras_error_print_error_data(adev, query_if,
- err_data, false);
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false);
} else if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
@@ -1094,9 +1123,8 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
}
if (err_data->ue_count) {
- if (!list_empty(&err_data->err_node_list)) {
- amdgpu_ras_error_print_error_data(adev, query_if,
- err_data, true);
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true);
} else if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
@@ -1118,6 +1146,25 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
}
+static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+
+ if (err_data_has_source_info(err_data)) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+
+ amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
+ amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
+ }
+ } else {
+ /* for legacy asic path which doesn't has error source info */
+ obj->err_data.ue_count += err_data->ue_count;
+ obj->err_data.ce_count += err_data->ce_count;
+ }
+}
+
/* query/inject/cure begin */
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info)
@@ -1156,8 +1203,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
}
}
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+ amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
@@ -1174,6 +1220,10 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
{
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ struct amdgpu_hive_info *hive;
+ int hive_ras_recovery = 0;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1181,7 +1231,20 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
return -EOPNOTSUPP;
}
- if (!amdgpu_ras_is_supported(adev, block))
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_mca_debug_mode(adev))
+ return -EOPNOTSUPP;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
+ hive_ras_recovery) &&
+ mca_funcs && mca_funcs->mca_set_debug_mode)
return -EOPNOTSUPP;
if (block_obj->hw_ops->reset_ras_error_count)
@@ -2692,7 +2755,8 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
return;
/* Init poison supported flag, the default value is false */
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
/* enabled by default when GPU is connected to CPU */
con->poison_supported = true;
} else if (adev->df.funcs &&
@@ -3529,11 +3593,10 @@ static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data
for_each_ras_error(err_node, err_data) {
ref_id = &err_node->err_info.mcm_info;
- if ((mcm_info->socket_id >= 0 && mcm_info->socket_id != ref_id->socket_id) ||
- (mcm_info->die_id >= 0 && mcm_info->die_id != ref_id->die_id))
- continue;
- return err_node;
+ if (mcm_info->socket_id == ref_id->socket_id &&
+ mcm_info->die_id == ref_id->die_id)
+ return err_node;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 2fdfef62ee27..665414c22ca9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -515,10 +515,7 @@ struct ras_manager {
/* IH data */
struct ras_ih_data ih_data;
- struct {
- unsigned long ue_count;
- unsigned long ce_count;
- } err_data;
+ struct ras_err_data err_data;
};
struct ras_badpage {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index e8cbc4142d80..1d9d187de6ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -292,27 +292,6 @@ out:
return err;
}
-void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *sdma;
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.has_page_queue) {
- sdma = &adev->sdma.instance[i].page;
- if (adev->mman.buffer_funcs_ring == sdma) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- break;
- }
- }
- sdma = &adev->sdma.instance[i].ring;
- if (adev->mman.buffer_funcs_ring == sdma) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- break;
- }
- }
-}
-
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
{
int err = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 513ac22120c1..173a2a308078 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -169,7 +169,6 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
bool duplicate);
void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
bool duplicate);
-void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f74347cc087a..d65e21914d8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -166,8 +166,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
}
}
- if (reset)
+ if (reset) {
+ /* use mode-2 reset for poison consumption */
+ if (!entry)
+ con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
amdgpu_ras_reset_gpu(adev);
+ }
}
kfree(err_data->err_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f3c9f93d8899..904252456d25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -844,6 +844,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
* @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
* @resv: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
@@ -860,7 +861,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* 0 for success, negative erro code for failure.
*/
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- bool immediate, bool unlocked, bool flush_tlb,
+ bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
@@ -898,6 +899,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
+ params.allow_override = allow_override;
/* Implicitly sync to command submissions in the same VM before
* unmapping. Sync to moving fences before mapping.
@@ -1073,6 +1075,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
struct ttm_resource *mem;
struct dma_fence **last_update;
bool flush_tlb = clear;
+ bool uncached;
struct dma_resv *resv;
uint64_t vram_base;
uint64_t flags;
@@ -1110,9 +1113,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
vram_base = bo_adev->vm_manager.vram_base_offset;
+ uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
} else {
flags = 0x0;
vram_base = 0;
+ uncached = false;
}
if (clear || (bo && bo->tbo.base.resv ==
@@ -1146,7 +1151,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
- resv, mapping->start, mapping->last,
+ !uncached, resv, mapping->start, mapping->last,
update_flags, mapping->offset,
vram_base, mem, pages_addr,
last_update);
@@ -1341,8 +1346,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
- mapping->start, mapping->last,
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+ resv, mapping->start, mapping->last,
init_pte_value, 0, 0, NULL, NULL,
&f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -1368,6 +1373,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
*
* Make sure all BOs which are moved are updated in the PTs.
*
@@ -1377,11 +1383,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
* PTs have to be reserved!
*/
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
- bool clear;
+ bool clear, unlock;
int r;
spin_lock(&vm->status_lock);
@@ -1404,17 +1411,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!adev->debug_vm && dma_resv_trylock(resv))
+ if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ clear = false;
+ unlock = true;
+ /* The caller is already holding the reservation lock */
+ } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
clear = false;
+ unlock = false;
/* Somebody else is using the BO right now */
- else
+ } else {
clear = true;
+ unlock = false;
+ }
r = amdgpu_vm_bo_update(adev, bo_va, clear);
if (r)
return r;
- if (!clear)
+ if (unlock)
dma_resv_unlock(resv);
spin_lock(&vm->status_lock);
}
@@ -2618,8 +2632,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
goto error_unlock;
}
- r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
- addr, flags, value, 0, NULL, NULL, NULL);
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+ NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 411d42fecfb6..2cd86d2bf73f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -246,6 +246,12 @@ struct amdgpu_vm_update_params {
* @table_freed: return true if page table is freed when updating
*/
bool table_freed;
+
+ /**
+ * @allow_override: true for memory that is not uncached: allows MTYPE
+ * to be overridden for NUMA local memory.
+ */
+ bool allow_override;
};
struct amdgpu_vm_update_funcs {
@@ -437,11 +443,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket);
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- bool immediate, bool unlocked, bool flush_tlb,
+ bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 9b025fd17b84..a2287bb25223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -843,7 +843,7 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
*/
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
adev->gmc.gmc_funcs->override_vm_pte_flags &&
- num_possible_nodes() > 1 && !params->pages_addr)
+ num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 18f58efc9dc7..08916538a615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
return true;
}
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 size = 0;
+ list_for_each_entry(block, head, link)
+ size += amdgpu_vram_mgr_block_size(block);
+
+ return size;
+}
/**
* DOC: mem_info_vram_total
@@ -516,6 +525,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
mutex_unlock(&mgr->lock);
vres->base.start = 0;
+ size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+ vres->base.size);
list_for_each_entry(block, &vres->blocks, link) {
unsigned long start;
@@ -523,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
amdgpu_vram_mgr_block_size(block);
start >>= PAGE_SHIFT;
- if (start > PFN_UP(vres->base.size))
- start -= PFN_UP(vres->base.size);
+ if (start > PFN_UP(size))
+ start -= PFN_UP(size);
else
start = 0;
vres->base.start = max(vres->base.start, start);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index e63abdf52b6c..4dfaa017cf7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1709,10 +1709,6 @@ static void cik_program_aspm(struct amdgpu_device *adev)
if (pci_is_root_bus(adev->pdev->bus))
return;
- /* XXX double check APUs */
- if (adev->flags & AMD_IS_APU)
- return;
-
orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index ee5dce6f6043..a3fccc4c1f43 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -308,8 +308,6 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
@@ -498,9 +496,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
new file mode 100644
index 000000000000..a47960a0babd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_6_2.h"
+
+static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /* return true since related regs are inaccessible */
+ return true;
+}
+
+const struct amdgpu_df_funcs df_v4_6_2_funcs = {
+ .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
new file mode 100644
index 000000000000..3bc3e6d216e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_6_2_H__
+#define __DF_V4_6_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_6_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d9ccacd06fba..c8a3bf01743f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3498,6 +3498,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
+static int gfx_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
@@ -6465,11 +6467,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
/* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
@@ -6743,7 +6752,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -6766,7 +6775,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
@@ -6787,11 +6796,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
@@ -7172,6 +7181,13 @@ static int gfx_v10_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ /* WA added for Vangogh asic fixing the SMU suspend failure
+ * It needs to set power gating again during gfxoff control
+ * otherwise the gfxoff disallowing will be failed to set.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+ gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE);
+
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
if (amdgpu_gfx_disable_kgq(adev, 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index fd22943685f7..0c6133cc5e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -155,6 +155,7 @@ static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
{
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
@@ -3714,11 +3715,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
@@ -4007,7 +4008,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -4030,7 +4031,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
@@ -4051,11 +4052,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 362bf51ab1d2..41bbabd9ad4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -256,6 +256,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+ tmp = RREG32(scratch_reg0_offset);
r = amdgpu_ring_alloc(ring, 3);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 19eaada35ede..4713a62ad586 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -73,7 +73,8 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* fini/suspend, so the overall state doesn't
* change over the course of suspend/resume.
*/
- if (!adev->in_s0ix)
+ if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend ||
+ amdgpu_in_reset(adev)))
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5fed01e34928..b66c5f7e1c56 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1251,12 +1251,15 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
return;
}
- /* Only override mappings with MTYPE_NC, which is the safe default for
- * cacheable memory.
+ /* MTYPE_NC is the same default and can be overridden.
+ * MTYPE_UC will be present if the memory is extended-coherent
+ * and can also be overridden.
*/
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
- AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
- dev_dbg_ratelimited(adev->dev, "MTYPE is not NC\n");
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) &&
+ (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) {
+ dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
return;
}
@@ -1283,15 +1286,23 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
vm->mem_id, local_node, nid);
if (nid == local_node) {
uint64_t old_flags = *flags;
- unsigned int mtype_local = MTYPE_RW;
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
+ unsigned int mtype_local = MTYPE_RW;
- if (amdgpu_mtype_local == 1)
- mtype_local = MTYPE_NC;
- else if (amdgpu_mtype_local == 2)
- mtype_local = MTYPE_CC;
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(mtype_local);
+ } else {
+ /* MTYPE_UC case */
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
+ }
- *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
- AMDGPU_PTE_MTYPE_VG10(mtype_local);
dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
old_flags, *flags);
}
@@ -2018,11 +2029,8 @@ static int gmc_v9_0_sw_init(void *handle)
* vm size is 256TB (48bit), maximum size of Vega10,
* block size 512 (9bit)
*/
- /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
- if (amdgpu_sriov_vf(adev))
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
- else
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index e523627cfe25..df218d5ca775 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -28,6 +28,7 @@
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
#include <linux/pci.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
@@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
@@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index def89379b51a..4df1055e640a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -254,7 +254,7 @@ static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
- if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
return;
def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
@@ -283,7 +283,7 @@ static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
- if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
return;
def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index eccb006e78aa..23f26f8caad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -56,8 +56,15 @@ static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
+ tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ /* If it is VF or subrevision holds a non-zero value, that should be used */
+ if (tmp || amdgpu_sriov_vf(adev))
+ return tmp;
+
+ /* If discovery subrev is not updated, use register version */
tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
- tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, STRAP_ATI_REV_ID_DEV0_F0);
+ tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
return tmp;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 1995c7459f20..4d7976b77767 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -513,11 +513,10 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
@@ -609,9 +608,8 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->enable_aspm) &&
- amdgpu_device_should_use_aspm(adev))
+ if (adev->nbio.funcs->enable_aspm &&
+ amdgpu_device_should_use_aspm(adev))
adev->nbio.funcs->enable_aspm(adev, !enter);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 4142e2fcd866..3cf4684d0d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -759,6 +759,83 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
return 0;
}
+
+static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev,
+ uint32_t inst,
+ uint32_t boot_error)
+{
+ uint32_t socket_id;
+ uint32_t aid_id;
+ uint32_t hbm_id;
+ uint32_t reg_data;
+
+ socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID);
+ aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID);
+ hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID);
+
+ reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109);
+ dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
+ socket_id, aid_id, reg_data);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING))
+ dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
+ socket_id, aid_id, hbm_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD))
+ dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
+ socket_id, aid_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING))
+ dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
+ socket_id, aid_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING))
+ dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
+ socket_id, aid_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING))
+ dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
+ socket_id, aid_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING))
+ dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
+ socket_id, aid_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST))
+ dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
+ socket_id, aid_id, hbm_id);
+
+ if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST))
+ dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
+ socket_id, aid_id, hbm_id);
+}
+
+static int psp_v13_0_query_boot_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int inst_mask = adev->aid_mask;
+ uint32_t reg_data;
+ uint32_t i;
+ int ret = 0;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+ return 0;
+
+ if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+ return 0;
+
+ for_each_inst(i, inst_mask) {
+ reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126);
+ if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) {
+ psp_v13_0_boot_error_reporting(adev, i, reg_data);
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ return ret;
+}
+
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -781,6 +858,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.update_spirom = psp_v13_0_update_spirom,
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+ .query_boot_status = psp_v13_0_query_boot_status,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index b58a13bd75db..45377a175250 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -339,8 +339,6 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -474,9 +472,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index c5ea32687eb5..2ad615be4bb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -513,8 +513,6 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -746,9 +744,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 683d51ae4bf1..3d68dd5523c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -877,8 +877,6 @@ static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
@@ -913,8 +911,6 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
@@ -1402,13 +1398,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == page)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
@@ -1921,11 +1911,8 @@ static int sdma_v4_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1964,7 +1951,6 @@ static int sdma_v4_0_resume(void *handle)
if (adev->in_s0ix) {
sdma_v4_0_enable(adev, true);
sdma_v4_0_gfx_enable(adev, true);
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index c1ff5eda8961..3c485e5a531a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -559,8 +559,6 @@ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -825,9 +823,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -1426,11 +1421,8 @@ static int sdma_v5_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v5_0_ctx_switch_enable(adev, false);
sdma_v5_0_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 7d1e57189c8c..83c240f741b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -364,8 +364,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -625,9 +623,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -1284,11 +1279,8 @@ static int sdma_v5_2_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 7e4d5188cbfa..3c7ddd219de8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -348,8 +348,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
@@ -561,9 +559,6 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -1308,11 +1303,8 @@ static int sdma_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v6_0_ctxempty_int_enable(adev, false);
sdma_v6_0_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 4b81f29e5fd5..a757526153e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -2440,8 +2440,6 @@ static void si_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU)
- return;
orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
data &= ~LC_XMIT_N_FTS_MASK;
data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 42c4547f32ec..9aa0e11ee673 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -115,8 +115,6 @@ static void si_dma_stop(struct amdgpu_device *adev)
u32 rb_cntl;
unsigned i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
/* dma0 */
rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
@@ -177,9 +175,6 @@ static int si_dma_start(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 66ed28136bc8..d4b8d62f4294 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -646,8 +646,7 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 8c6cab641a1c..d5083c549330 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -433,8 +433,7 @@ static void soc21_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 025e6aeb058d..770b4b4e3138 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -88,6 +88,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
umc_v12_0_reset_error_count_per_channel, NULL);
}
+static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
+{
+ return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
+}
+
+static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
+{
+ return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
+ /* Identify data parity error in replay mode */
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
+ !(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
+}
+
static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t umc_reg_offset,
unsigned long *error_count)
@@ -104,10 +124,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0)))
+ if (umc_v12_0_is_correctable_error(mc_umc_status))
*error_count += 1;
}
@@ -125,11 +142,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
- if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
*error_count += 1;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index 46bfdee79bfd..c4c77257710c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -336,7 +336,7 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
unsigned long *error_count)
{
- uint64_t mc_umc_status;
+ uint16_t ecc_ce_cnt;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -345,12 +345,10 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
umc_inst * adev->umc.channel_inst_num +
ch_inst;
- /* check the MCUMC_STATUS */
- mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
- *error_count += 1;
- }
+ /* Retrieve CE count */
+ ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip;
+ if (ecc_ce_cnt)
+ *error_count += ecc_ce_cnt;
}
static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6a8494f98d3e..1a98812981f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1124,11 +1124,10 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
- if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU ||
- adev->asic_type < CHIP_POLARIS10)
+ if (adev->asic_type < CHIP_POLARIS10)
return;
orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
index 756f39348dd9..174f13eff575 100644
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -205,19 +205,21 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe)
static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe)
{
struct amdgpu_device *adev = vpe->ring.adev;
- uint32_t rb_cntl, ib_cntl;
+ uint32_t queue_reset;
+ int ret;
- rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 0);
- WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
+ queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ));
+ queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1);
+ WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset);
- ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 0);
- WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl);
+ ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0,
+ VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
+ if (ret)
+ dev_err(adev->dev, "VPE queue reset failed\n");
vpe->ring.sched.ready = false;
- return 0;
+ return ret;
}
static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev,